{ "best_metric": 0.10454361107840444, "best_model_checkpoint": "/workspace/checkpoint-save/checkpoint-31000", "epoch": 14.411901441190144, "eval_steps": 1000, "global_step": 31000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009298000929800093, "grad_norm": 26.84164047241211, "learning_rate": 4.999987663004646e-05, "loss": 9.0751, "step": 2 }, { "epoch": 0.0018596001859600185, "grad_norm": 14.609275817871094, "learning_rate": 4.999950652140343e-05, "loss": 6.2613, "step": 4 }, { "epoch": 0.002789400278940028, "grad_norm": 12.255420684814453, "learning_rate": 4.999888967772375e-05, "loss": 5.1042, "step": 6 }, { "epoch": 0.003719200371920037, "grad_norm": 6.8023810386657715, "learning_rate": 4.999802610509541e-05, "loss": 4.38, "step": 8 }, { "epoch": 0.004649000464900047, "grad_norm": 6.738560199737549, "learning_rate": 4.9996915812041525e-05, "loss": 4.3223, "step": 10 }, { "epoch": 0.005578800557880056, "grad_norm": 5.593843460083008, "learning_rate": 4.9995558809520234e-05, "loss": 4.1671, "step": 12 }, { "epoch": 0.006508600650860065, "grad_norm": 4.6850481033325195, "learning_rate": 4.9993955110924615e-05, "loss": 3.9741, "step": 14 }, { "epoch": 0.007438400743840074, "grad_norm": 5.263367176055908, "learning_rate": 4.999210473208251e-05, "loss": 3.8006, "step": 16 }, { "epoch": 0.008368200836820083, "grad_norm": 4.7343525886535645, "learning_rate": 4.999000769125641e-05, "loss": 3.6328, "step": 18 }, { "epoch": 0.009298000929800094, "grad_norm": 4.420749187469482, "learning_rate": 4.998766400914329e-05, "loss": 3.6684, "step": 20 }, { "epoch": 0.010227801022780102, "grad_norm": 5.276994228363037, "learning_rate": 4.998507370887432e-05, "loss": 3.5877, "step": 22 }, { "epoch": 0.011157601115760111, "grad_norm": 4.462757110595703, "learning_rate": 4.998223681601473e-05, "loss": 3.4634, "step": 24 }, { "epoch": 0.01208740120874012, "grad_norm": 4.126166343688965, "learning_rate": 4.99791533585635e-05, "loss": 3.2746, "step": 26 }, { "epoch": 0.01301720130172013, "grad_norm": 4.645055770874023, "learning_rate": 4.997582336695312e-05, "loss": 3.3373, "step": 28 }, { "epoch": 0.01394700139470014, "grad_norm": 3.7785751819610596, "learning_rate": 4.9972246874049254e-05, "loss": 3.3315, "step": 30 }, { "epoch": 0.014876801487680148, "grad_norm": 5.59454870223999, "learning_rate": 4.996842391515044e-05, "loss": 3.379, "step": 32 }, { "epoch": 0.01580660158066016, "grad_norm": 4.673776626586914, "learning_rate": 4.996435452798774e-05, "loss": 3.2525, "step": 34 }, { "epoch": 0.016736401673640166, "grad_norm": 4.380648136138916, "learning_rate": 4.9960038752724373e-05, "loss": 3.2296, "step": 36 }, { "epoch": 0.017666201766620176, "grad_norm": 4.780018329620361, "learning_rate": 4.99554766319553e-05, "loss": 3.203, "step": 38 }, { "epoch": 0.018596001859600187, "grad_norm": 4.648500442504883, "learning_rate": 4.9950668210706786e-05, "loss": 3.2905, "step": 40 }, { "epoch": 0.019525801952580194, "grad_norm": 4.603128433227539, "learning_rate": 4.994561353643604e-05, "loss": 3.2366, "step": 42 }, { "epoch": 0.020455602045560205, "grad_norm": 4.625993251800537, "learning_rate": 4.994031265903063e-05, "loss": 2.986, "step": 44 }, { "epoch": 0.021385402138540215, "grad_norm": 4.307627201080322, "learning_rate": 4.9934765630808095e-05, "loss": 3.1374, "step": 46 }, { "epoch": 0.022315202231520222, "grad_norm": 3.945112943649292, "learning_rate": 4.9928972506515354e-05, "loss": 3.1015, "step": 48 }, { "epoch": 0.023245002324500233, "grad_norm": 3.6010849475860596, "learning_rate": 4.992293334332821e-05, "loss": 3.1995, "step": 50 }, { "epoch": 0.02417480241748024, "grad_norm": 4.1106791496276855, "learning_rate": 4.991664820085075e-05, "loss": 3.0527, "step": 52 }, { "epoch": 0.02510460251046025, "grad_norm": 5.628199577331543, "learning_rate": 4.991011714111481e-05, "loss": 3.1071, "step": 54 }, { "epoch": 0.02603440260344026, "grad_norm": 4.327247142791748, "learning_rate": 4.990334022857932e-05, "loss": 3.0896, "step": 56 }, { "epoch": 0.02696420269642027, "grad_norm": 4.380341053009033, "learning_rate": 4.989631753012965e-05, "loss": 3.1778, "step": 58 }, { "epoch": 0.02789400278940028, "grad_norm": 4.265223026275635, "learning_rate": 4.9889049115077005e-05, "loss": 2.843, "step": 60 }, { "epoch": 0.02882380288238029, "grad_norm": 3.8485188484191895, "learning_rate": 4.988153505515772e-05, "loss": 2.9284, "step": 62 }, { "epoch": 0.029753602975360297, "grad_norm": 4.034395694732666, "learning_rate": 4.987377542453251e-05, "loss": 2.9839, "step": 64 }, { "epoch": 0.030683403068340307, "grad_norm": 3.5771090984344482, "learning_rate": 4.98657702997858e-05, "loss": 3.0091, "step": 66 }, { "epoch": 0.03161320316132032, "grad_norm": 4.391136646270752, "learning_rate": 4.985751975992497e-05, "loss": 3.219, "step": 68 }, { "epoch": 0.032543003254300325, "grad_norm": 3.924013614654541, "learning_rate": 4.9849023886379484e-05, "loss": 3.124, "step": 70 }, { "epoch": 0.03347280334728033, "grad_norm": 3.7509515285491943, "learning_rate": 4.984028276300019e-05, "loss": 3.026, "step": 72 }, { "epoch": 0.034402603440260346, "grad_norm": 4.312924385070801, "learning_rate": 4.983129647605847e-05, "loss": 3.0476, "step": 74 }, { "epoch": 0.03533240353324035, "grad_norm": 4.421923637390137, "learning_rate": 4.982206511424533e-05, "loss": 2.9608, "step": 76 }, { "epoch": 0.03626220362622036, "grad_norm": 5.102890968322754, "learning_rate": 4.981258876867058e-05, "loss": 2.9216, "step": 78 }, { "epoch": 0.037192003719200374, "grad_norm": 5.118149280548096, "learning_rate": 4.980286753286193e-05, "loss": 2.9678, "step": 80 }, { "epoch": 0.03812180381218038, "grad_norm": 5.108033657073975, "learning_rate": 4.9792901502764055e-05, "loss": 3.0479, "step": 82 }, { "epoch": 0.03905160390516039, "grad_norm": 4.716604232788086, "learning_rate": 4.978269077673764e-05, "loss": 3.0753, "step": 84 }, { "epoch": 0.0399814039981404, "grad_norm": 5.327079772949219, "learning_rate": 4.977223545555844e-05, "loss": 2.8873, "step": 86 }, { "epoch": 0.04091120409112041, "grad_norm": 3.7703065872192383, "learning_rate": 4.9761535642416245e-05, "loss": 2.8954, "step": 88 }, { "epoch": 0.04184100418410042, "grad_norm": 4.02114725112915, "learning_rate": 4.975059144291391e-05, "loss": 2.7848, "step": 90 }, { "epoch": 0.04277080427708043, "grad_norm": 4.449793815612793, "learning_rate": 4.9739402965066235e-05, "loss": 2.9486, "step": 92 }, { "epoch": 0.04370060437006044, "grad_norm": 4.573589324951172, "learning_rate": 4.9727970319299004e-05, "loss": 2.9151, "step": 94 }, { "epoch": 0.044630404463040445, "grad_norm": 4.297460556030273, "learning_rate": 4.971629361844781e-05, "loss": 2.8539, "step": 96 }, { "epoch": 0.04556020455602046, "grad_norm": 5.690791130065918, "learning_rate": 4.970437297775697e-05, "loss": 3.0798, "step": 98 }, { "epoch": 0.046490004649000466, "grad_norm": 4.590264320373535, "learning_rate": 4.96922085148784e-05, "loss": 3.1575, "step": 100 }, { "epoch": 0.04741980474198047, "grad_norm": 4.4879679679870605, "learning_rate": 4.967980034987045e-05, "loss": 2.8834, "step": 102 }, { "epoch": 0.04834960483496048, "grad_norm": 4.817439079284668, "learning_rate": 4.966714860519667e-05, "loss": 2.8452, "step": 104 }, { "epoch": 0.049279404927940494, "grad_norm": 3.688944101333618, "learning_rate": 4.965425340572469e-05, "loss": 2.7784, "step": 106 }, { "epoch": 0.0502092050209205, "grad_norm": 4.610483169555664, "learning_rate": 4.9641114878724915e-05, "loss": 2.8627, "step": 108 }, { "epoch": 0.05113900511390051, "grad_norm": 4.146760940551758, "learning_rate": 4.9627733153869304e-05, "loss": 2.7825, "step": 110 }, { "epoch": 0.05206880520688052, "grad_norm": 4.200835704803467, "learning_rate": 4.9614108363230095e-05, "loss": 2.8618, "step": 112 }, { "epoch": 0.05299860529986053, "grad_norm": 3.8388872146606445, "learning_rate": 4.960024064127845e-05, "loss": 2.8708, "step": 114 }, { "epoch": 0.05392840539284054, "grad_norm": 3.918163537979126, "learning_rate": 4.95861301248832e-05, "loss": 2.8933, "step": 116 }, { "epoch": 0.05485820548582055, "grad_norm": 3.752009391784668, "learning_rate": 4.957177695330943e-05, "loss": 2.8593, "step": 118 }, { "epoch": 0.05578800557880056, "grad_norm": 4.043272495269775, "learning_rate": 4.955718126821718e-05, "loss": 2.816, "step": 120 }, { "epoch": 0.056717805671780565, "grad_norm": 3.978485584259033, "learning_rate": 4.9542343213659933e-05, "loss": 2.8078, "step": 122 }, { "epoch": 0.05764760576476058, "grad_norm": 4.077065467834473, "learning_rate": 4.9527262936083315e-05, "loss": 2.8227, "step": 124 }, { "epoch": 0.058577405857740586, "grad_norm": 3.348025321960449, "learning_rate": 4.951194058432358e-05, "loss": 2.8647, "step": 126 }, { "epoch": 0.05950720595072059, "grad_norm": 4.182656288146973, "learning_rate": 4.949637630960613e-05, "loss": 2.9102, "step": 128 }, { "epoch": 0.06043700604370061, "grad_norm": 3.805508613586426, "learning_rate": 4.9480570265544104e-05, "loss": 2.7437, "step": 130 }, { "epoch": 0.061366806136680614, "grad_norm": 4.192744255065918, "learning_rate": 4.946452260813676e-05, "loss": 2.6799, "step": 132 }, { "epoch": 0.06229660622966062, "grad_norm": 4.164530277252197, "learning_rate": 4.944823349576801e-05, "loss": 2.687, "step": 134 }, { "epoch": 0.06322640632264064, "grad_norm": 4.058234691619873, "learning_rate": 4.94317030892048e-05, "loss": 2.7054, "step": 136 }, { "epoch": 0.06415620641562064, "grad_norm": 4.543248176574707, "learning_rate": 4.941493155159558e-05, "loss": 2.8457, "step": 138 }, { "epoch": 0.06508600650860065, "grad_norm": 3.8340401649475098, "learning_rate": 4.9397919048468644e-05, "loss": 2.6479, "step": 140 }, { "epoch": 0.06601580660158066, "grad_norm": 3.7976367473602295, "learning_rate": 4.938066574773054e-05, "loss": 2.9002, "step": 142 }, { "epoch": 0.06694560669456066, "grad_norm": 4.197248458862305, "learning_rate": 4.9363171819664394e-05, "loss": 2.6947, "step": 144 }, { "epoch": 0.06787540678754068, "grad_norm": 3.8797290325164795, "learning_rate": 4.9345437436928175e-05, "loss": 2.7928, "step": 146 }, { "epoch": 0.06880520688052069, "grad_norm": 3.9591214656829834, "learning_rate": 4.9327462774553125e-05, "loss": 2.8033, "step": 148 }, { "epoch": 0.0697350069735007, "grad_norm": 4.016685962677002, "learning_rate": 4.930924800994187e-05, "loss": 2.6011, "step": 150 }, { "epoch": 0.0706648070664807, "grad_norm": 4.322315692901611, "learning_rate": 4.9290793322866815e-05, "loss": 2.8389, "step": 152 }, { "epoch": 0.07159460715946071, "grad_norm": 3.8187756538391113, "learning_rate": 4.927209889546824e-05, "loss": 2.6916, "step": 154 }, { "epoch": 0.07252440725244072, "grad_norm": 4.5557475090026855, "learning_rate": 4.9253164912252613e-05, "loss": 2.8439, "step": 156 }, { "epoch": 0.07345420734542074, "grad_norm": 4.476308822631836, "learning_rate": 4.9233991560090696e-05, "loss": 2.7726, "step": 158 }, { "epoch": 0.07438400743840075, "grad_norm": 4.913254737854004, "learning_rate": 4.921457902821575e-05, "loss": 2.5342, "step": 160 }, { "epoch": 0.07531380753138076, "grad_norm": 5.02558708190918, "learning_rate": 4.9194927508221604e-05, "loss": 2.6299, "step": 162 }, { "epoch": 0.07624360762436076, "grad_norm": 4.535084247589111, "learning_rate": 4.917503719406084e-05, "loss": 2.7971, "step": 164 }, { "epoch": 0.07717340771734077, "grad_norm": 4.182591915130615, "learning_rate": 4.915490828204283e-05, "loss": 2.7582, "step": 166 }, { "epoch": 0.07810320781032078, "grad_norm": 4.1888861656188965, "learning_rate": 4.913454097083182e-05, "loss": 2.7291, "step": 168 }, { "epoch": 0.07903300790330078, "grad_norm": 7.0589399337768555, "learning_rate": 4.911393546144493e-05, "loss": 2.6613, "step": 170 }, { "epoch": 0.0799628079962808, "grad_norm": 4.748135566711426, "learning_rate": 4.909309195725022e-05, "loss": 2.7612, "step": 172 }, { "epoch": 0.08089260808926081, "grad_norm": 5.596706390380859, "learning_rate": 4.9072010663964666e-05, "loss": 2.6595, "step": 174 }, { "epoch": 0.08182240818224082, "grad_norm": 4.975738525390625, "learning_rate": 4.905069178965212e-05, "loss": 2.6832, "step": 176 }, { "epoch": 0.08275220827522083, "grad_norm": 5.111588001251221, "learning_rate": 4.9029135544721275e-05, "loss": 2.7406, "step": 178 }, { "epoch": 0.08368200836820083, "grad_norm": 4.659754753112793, "learning_rate": 4.900734214192356e-05, "loss": 2.5461, "step": 180 }, { "epoch": 0.08461180846118084, "grad_norm": 4.999963760375977, "learning_rate": 4.898531179635105e-05, "loss": 2.5839, "step": 182 }, { "epoch": 0.08554160855416086, "grad_norm": 4.484140396118164, "learning_rate": 4.8963044725434374e-05, "loss": 2.7225, "step": 184 }, { "epoch": 0.08647140864714087, "grad_norm": 4.14189338684082, "learning_rate": 4.894054114894053e-05, "loss": 2.6972, "step": 186 }, { "epoch": 0.08740120874012088, "grad_norm": 5.563836574554443, "learning_rate": 4.891780128897074e-05, "loss": 2.7435, "step": 188 }, { "epoch": 0.08833100883310088, "grad_norm": 4.364253997802734, "learning_rate": 4.889482536995823e-05, "loss": 2.6541, "step": 190 }, { "epoch": 0.08926080892608089, "grad_norm": 4.758110523223877, "learning_rate": 4.887161361866606e-05, "loss": 2.5148, "step": 192 }, { "epoch": 0.0901906090190609, "grad_norm": 3.6075069904327393, "learning_rate": 4.8848166264184824e-05, "loss": 2.3732, "step": 194 }, { "epoch": 0.09112040911204092, "grad_norm": 4.10892391204834, "learning_rate": 4.882448353793046e-05, "loss": 2.6504, "step": 196 }, { "epoch": 0.09205020920502092, "grad_norm": 3.9181854724884033, "learning_rate": 4.88005656736419e-05, "loss": 2.5456, "step": 198 }, { "epoch": 0.09298000929800093, "grad_norm": 4.361126899719238, "learning_rate": 4.877641290737882e-05, "loss": 2.6398, "step": 200 }, { "epoch": 0.09390980939098094, "grad_norm": 4.867696762084961, "learning_rate": 4.875202547751927e-05, "loss": 2.5948, "step": 202 }, { "epoch": 0.09483960948396095, "grad_norm": 4.049160957336426, "learning_rate": 4.8727403624757345e-05, "loss": 2.5698, "step": 204 }, { "epoch": 0.09576940957694095, "grad_norm": 4.446025371551514, "learning_rate": 4.870254759210078e-05, "loss": 2.6652, "step": 206 }, { "epoch": 0.09669920966992096, "grad_norm": 4.179646015167236, "learning_rate": 4.867745762486859e-05, "loss": 2.6617, "step": 208 }, { "epoch": 0.09762900976290098, "grad_norm": 4.07493782043457, "learning_rate": 4.865213397068862e-05, "loss": 2.7031, "step": 210 }, { "epoch": 0.09855880985588099, "grad_norm": 4.781419277191162, "learning_rate": 4.8626576879495106e-05, "loss": 2.7076, "step": 212 }, { "epoch": 0.099488609948861, "grad_norm": 3.8315060138702393, "learning_rate": 4.8600786603526234e-05, "loss": 2.6641, "step": 214 }, { "epoch": 0.100418410041841, "grad_norm": 4.456698417663574, "learning_rate": 4.857476339732161e-05, "loss": 2.518, "step": 216 }, { "epoch": 0.10134821013482101, "grad_norm": 3.7563629150390625, "learning_rate": 4.854850751771976e-05, "loss": 2.5228, "step": 218 }, { "epoch": 0.10227801022780102, "grad_norm": 4.346120357513428, "learning_rate": 4.8522019223855635e-05, "loss": 2.4566, "step": 220 }, { "epoch": 0.10320781032078104, "grad_norm": 4.484522342681885, "learning_rate": 4.8495298777157985e-05, "loss": 2.6214, "step": 222 }, { "epoch": 0.10413761041376104, "grad_norm": 4.117067337036133, "learning_rate": 4.8468346441346846e-05, "loss": 2.5532, "step": 224 }, { "epoch": 0.10506741050674105, "grad_norm": 4.339029788970947, "learning_rate": 4.844116248243088e-05, "loss": 2.6489, "step": 226 }, { "epoch": 0.10599721059972106, "grad_norm": 4.260483741760254, "learning_rate": 4.84137471687048e-05, "loss": 2.6547, "step": 228 }, { "epoch": 0.10692701069270107, "grad_norm": 4.363969802856445, "learning_rate": 4.8386100770746674e-05, "loss": 2.5651, "step": 230 }, { "epoch": 0.10785681078568107, "grad_norm": 4.2329535484313965, "learning_rate": 4.835822356141529e-05, "loss": 2.656, "step": 232 }, { "epoch": 0.1087866108786611, "grad_norm": 4.558073043823242, "learning_rate": 4.833011581584745e-05, "loss": 2.4932, "step": 234 }, { "epoch": 0.1097164109716411, "grad_norm": 4.170455455780029, "learning_rate": 4.830177781145526e-05, "loss": 2.622, "step": 236 }, { "epoch": 0.11064621106462111, "grad_norm": 4.184195518493652, "learning_rate": 4.827320982792338e-05, "loss": 2.579, "step": 238 }, { "epoch": 0.11157601115760112, "grad_norm": 5.278513431549072, "learning_rate": 4.824441214720627e-05, "loss": 2.6352, "step": 240 }, { "epoch": 0.11250581125058112, "grad_norm": 4.864531517028809, "learning_rate": 4.8215385053525426e-05, "loss": 2.54, "step": 242 }, { "epoch": 0.11343561134356113, "grad_norm": 4.603059768676758, "learning_rate": 4.8186128833366524e-05, "loss": 2.643, "step": 244 }, { "epoch": 0.11436541143654114, "grad_norm": 3.965649127960205, "learning_rate": 4.8156643775476664e-05, "loss": 2.4326, "step": 246 }, { "epoch": 0.11529521152952116, "grad_norm": 4.606848239898682, "learning_rate": 4.812693017086145e-05, "loss": 2.4578, "step": 248 }, { "epoch": 0.11622501162250116, "grad_norm": 4.161537170410156, "learning_rate": 4.8096988312782174e-05, "loss": 2.4659, "step": 250 }, { "epoch": 0.11715481171548117, "grad_norm": 4.140401363372803, "learning_rate": 4.8066818496752875e-05, "loss": 2.601, "step": 252 }, { "epoch": 0.11808461180846118, "grad_norm": 4.079921722412109, "learning_rate": 4.803642102053746e-05, "loss": 2.5432, "step": 254 }, { "epoch": 0.11901441190144119, "grad_norm": 3.512240171432495, "learning_rate": 4.800579618414676e-05, "loss": 2.3225, "step": 256 }, { "epoch": 0.1199442119944212, "grad_norm": 5.103122711181641, "learning_rate": 4.7974944289835526e-05, "loss": 2.5911, "step": 258 }, { "epoch": 0.12087401208740121, "grad_norm": 5.166843891143799, "learning_rate": 4.794386564209952e-05, "loss": 2.3322, "step": 260 }, { "epoch": 0.12180381218038122, "grad_norm": 4.457730770111084, "learning_rate": 4.791256054767243e-05, "loss": 2.6771, "step": 262 }, { "epoch": 0.12273361227336123, "grad_norm": 4.705641269683838, "learning_rate": 4.788102931552293e-05, "loss": 2.4992, "step": 264 }, { "epoch": 0.12366341236634124, "grad_norm": 4.598447322845459, "learning_rate": 4.784927225685152e-05, "loss": 2.568, "step": 266 }, { "epoch": 0.12459321245932124, "grad_norm": 5.053762435913086, "learning_rate": 4.781728968508756e-05, "loss": 2.5607, "step": 268 }, { "epoch": 0.12552301255230125, "grad_norm": 5.074231147766113, "learning_rate": 4.778508191588612e-05, "loss": 2.4087, "step": 270 }, { "epoch": 0.12645281264528127, "grad_norm": 4.156777381896973, "learning_rate": 4.775264926712488e-05, "loss": 2.5638, "step": 272 }, { "epoch": 0.12738261273826126, "grad_norm": 5.305769443511963, "learning_rate": 4.771999205890099e-05, "loss": 2.4857, "step": 274 }, { "epoch": 0.12831241283124128, "grad_norm": 4.491017818450928, "learning_rate": 4.7687110613527905e-05, "loss": 2.6353, "step": 276 }, { "epoch": 0.1292422129242213, "grad_norm": 4.875871181488037, "learning_rate": 4.765400525553223e-05, "loss": 2.5564, "step": 278 }, { "epoch": 0.1301720130172013, "grad_norm": 4.711216449737549, "learning_rate": 4.762067631165047e-05, "loss": 2.5199, "step": 280 }, { "epoch": 0.13110181311018132, "grad_norm": 4.018083095550537, "learning_rate": 4.7587124110825855e-05, "loss": 2.2491, "step": 282 }, { "epoch": 0.1320316132031613, "grad_norm": 4.232217788696289, "learning_rate": 4.7553348984205056e-05, "loss": 2.5599, "step": 284 }, { "epoch": 0.13296141329614133, "grad_norm": 3.929190158843994, "learning_rate": 4.751935126513494e-05, "loss": 2.4031, "step": 286 }, { "epoch": 0.13389121338912133, "grad_norm": 4.586203575134277, "learning_rate": 4.748513128915926e-05, "loss": 2.5091, "step": 288 }, { "epoch": 0.13482101348210135, "grad_norm": 4.274206161499023, "learning_rate": 4.745068939401537e-05, "loss": 2.4527, "step": 290 }, { "epoch": 0.13575081357508137, "grad_norm": 5.016088485717773, "learning_rate": 4.741602591963088e-05, "loss": 2.4478, "step": 292 }, { "epoch": 0.13668061366806136, "grad_norm": 4.313106536865234, "learning_rate": 4.738114120812027e-05, "loss": 2.3779, "step": 294 }, { "epoch": 0.13761041376104138, "grad_norm": 3.867921829223633, "learning_rate": 4.7346035603781564e-05, "loss": 2.3785, "step": 296 }, { "epoch": 0.13854021385402138, "grad_norm": 4.427067756652832, "learning_rate": 4.731070945309292e-05, "loss": 2.3083, "step": 298 }, { "epoch": 0.1394700139470014, "grad_norm": 3.5589725971221924, "learning_rate": 4.7275163104709166e-05, "loss": 2.3, "step": 300 }, { "epoch": 0.1403998140399814, "grad_norm": 5.225103378295898, "learning_rate": 4.7239396909458424e-05, "loss": 2.6607, "step": 302 }, { "epoch": 0.1413296141329614, "grad_norm": 3.9209322929382324, "learning_rate": 4.7203411220338585e-05, "loss": 2.374, "step": 304 }, { "epoch": 0.14225941422594143, "grad_norm": 4.408271312713623, "learning_rate": 4.716720639251388e-05, "loss": 2.5803, "step": 306 }, { "epoch": 0.14318921431892143, "grad_norm": 5.7045159339904785, "learning_rate": 4.7130782783311353e-05, "loss": 2.4565, "step": 308 }, { "epoch": 0.14411901441190145, "grad_norm": 3.9071848392486572, "learning_rate": 4.709414075221731e-05, "loss": 2.5845, "step": 310 }, { "epoch": 0.14504881450488144, "grad_norm": 5.1077070236206055, "learning_rate": 4.70572806608738e-05, "loss": 2.6088, "step": 312 }, { "epoch": 0.14597861459786146, "grad_norm": 4.379369258880615, "learning_rate": 4.7020202873075056e-05, "loss": 2.3582, "step": 314 }, { "epoch": 0.14690841469084148, "grad_norm": 4.9492621421813965, "learning_rate": 4.698290775476387e-05, "loss": 2.358, "step": 316 }, { "epoch": 0.14783821478382148, "grad_norm": 4.258172512054443, "learning_rate": 4.694539567402801e-05, "loss": 2.4049, "step": 318 }, { "epoch": 0.1487680148768015, "grad_norm": 4.419554710388184, "learning_rate": 4.6907667001096557e-05, "loss": 2.3472, "step": 320 }, { "epoch": 0.1496978149697815, "grad_norm": 4.3051533699035645, "learning_rate": 4.686972210833629e-05, "loss": 2.4617, "step": 322 }, { "epoch": 0.1506276150627615, "grad_norm": 4.763060092926025, "learning_rate": 4.6831561370247975e-05, "loss": 2.4598, "step": 324 }, { "epoch": 0.1515574151557415, "grad_norm": 4.459979057312012, "learning_rate": 4.67931851634627e-05, "loss": 2.4066, "step": 326 }, { "epoch": 0.15248721524872152, "grad_norm": 6.6437907218933105, "learning_rate": 4.6754593866738114e-05, "loss": 2.3919, "step": 328 }, { "epoch": 0.15341701534170155, "grad_norm": 4.752775192260742, "learning_rate": 4.671578786095475e-05, "loss": 2.4608, "step": 330 }, { "epoch": 0.15434681543468154, "grad_norm": 5.056458473205566, "learning_rate": 4.667676752911222e-05, "loss": 2.4676, "step": 332 }, { "epoch": 0.15527661552766156, "grad_norm": 4.574734210968018, "learning_rate": 4.6637533256325443e-05, "loss": 2.33, "step": 334 }, { "epoch": 0.15620641562064155, "grad_norm": 5.173153877258301, "learning_rate": 4.659808542982085e-05, "loss": 2.2878, "step": 336 }, { "epoch": 0.15713621571362157, "grad_norm": 4.311580657958984, "learning_rate": 4.655842443893257e-05, "loss": 2.3785, "step": 338 }, { "epoch": 0.15806601580660157, "grad_norm": 4.768575191497803, "learning_rate": 4.651855067509856e-05, "loss": 2.3305, "step": 340 }, { "epoch": 0.1589958158995816, "grad_norm": 4.231751918792725, "learning_rate": 4.647846453185678e-05, "loss": 2.466, "step": 342 }, { "epoch": 0.1599256159925616, "grad_norm": 4.150324821472168, "learning_rate": 4.643816640484127e-05, "loss": 2.2707, "step": 344 }, { "epoch": 0.1608554160855416, "grad_norm": 4.049415111541748, "learning_rate": 4.63976566917783e-05, "loss": 2.3902, "step": 346 }, { "epoch": 0.16178521617852162, "grad_norm": 3.9730939865112305, "learning_rate": 4.635693579248235e-05, "loss": 2.3257, "step": 348 }, { "epoch": 0.16271501627150162, "grad_norm": 4.670469284057617, "learning_rate": 4.631600410885227e-05, "loss": 2.425, "step": 350 }, { "epoch": 0.16364481636448164, "grad_norm": 5.079502582550049, "learning_rate": 4.627486204486726e-05, "loss": 2.5145, "step": 352 }, { "epoch": 0.16457461645746166, "grad_norm": 4.396912574768066, "learning_rate": 4.623351000658288e-05, "loss": 2.181, "step": 354 }, { "epoch": 0.16550441655044165, "grad_norm": 4.0926833152771, "learning_rate": 4.619194840212705e-05, "loss": 2.1548, "step": 356 }, { "epoch": 0.16643421664342167, "grad_norm": 4.533802032470703, "learning_rate": 4.615017764169602e-05, "loss": 2.3554, "step": 358 }, { "epoch": 0.16736401673640167, "grad_norm": 4.112464427947998, "learning_rate": 4.610819813755034e-05, "loss": 2.3852, "step": 360 }, { "epoch": 0.1682938168293817, "grad_norm": 4.536698341369629, "learning_rate": 4.606601030401077e-05, "loss": 2.3968, "step": 362 }, { "epoch": 0.16922361692236168, "grad_norm": 4.523467540740967, "learning_rate": 4.6023614557454184e-05, "loss": 2.4168, "step": 364 }, { "epoch": 0.1701534170153417, "grad_norm": 4.114748001098633, "learning_rate": 4.5981011316309495e-05, "loss": 2.2756, "step": 366 }, { "epoch": 0.17108321710832172, "grad_norm": 4.5569682121276855, "learning_rate": 4.59382010010535e-05, "loss": 2.2887, "step": 368 }, { "epoch": 0.17201301720130172, "grad_norm": 3.948456287384033, "learning_rate": 4.589518403420672e-05, "loss": 2.3473, "step": 370 }, { "epoch": 0.17294281729428174, "grad_norm": 4.607423782348633, "learning_rate": 4.585196084032924e-05, "loss": 2.3652, "step": 372 }, { "epoch": 0.17387261738726173, "grad_norm": 4.404171943664551, "learning_rate": 4.580853184601655e-05, "loss": 2.3809, "step": 374 }, { "epoch": 0.17480241748024175, "grad_norm": 4.412327289581299, "learning_rate": 4.5764897479895276e-05, "loss": 2.2951, "step": 376 }, { "epoch": 0.17573221757322174, "grad_norm": 4.964651584625244, "learning_rate": 4.5721058172619e-05, "loss": 2.3096, "step": 378 }, { "epoch": 0.17666201766620176, "grad_norm": 4.200504779815674, "learning_rate": 4.5677014356864e-05, "loss": 2.3076, "step": 380 }, { "epoch": 0.17759181775918179, "grad_norm": 4.83593225479126, "learning_rate": 4.563276646732495e-05, "loss": 2.2263, "step": 382 }, { "epoch": 0.17852161785216178, "grad_norm": 4.583965301513672, "learning_rate": 4.558831494071064e-05, "loss": 2.3969, "step": 384 }, { "epoch": 0.1794514179451418, "grad_norm": 4.963813304901123, "learning_rate": 4.554366021573971e-05, "loss": 2.201, "step": 386 }, { "epoch": 0.1803812180381218, "grad_norm": 4.432450771331787, "learning_rate": 4.5498802733136267e-05, "loss": 2.4745, "step": 388 }, { "epoch": 0.18131101813110181, "grad_norm": 4.115283489227295, "learning_rate": 4.545374293562554e-05, "loss": 2.2799, "step": 390 }, { "epoch": 0.18224081822408184, "grad_norm": 4.761922836303711, "learning_rate": 4.540848126792956e-05, "loss": 2.446, "step": 392 }, { "epoch": 0.18317061831706183, "grad_norm": 3.864854097366333, "learning_rate": 4.536301817676269e-05, "loss": 2.276, "step": 394 }, { "epoch": 0.18410041841004185, "grad_norm": 4.151713848114014, "learning_rate": 4.5317354110827304e-05, "loss": 2.2672, "step": 396 }, { "epoch": 0.18503021850302184, "grad_norm": 4.346314907073975, "learning_rate": 4.52714895208093e-05, "loss": 2.4714, "step": 398 }, { "epoch": 0.18596001859600186, "grad_norm": 4.151995658874512, "learning_rate": 4.522542485937365e-05, "loss": 2.4428, "step": 400 }, { "epoch": 0.18688981868898186, "grad_norm": 4.524176120758057, "learning_rate": 4.517916058115997e-05, "loss": 2.3897, "step": 402 }, { "epoch": 0.18781961878196188, "grad_norm": 3.7254161834716797, "learning_rate": 4.513269714277801e-05, "loss": 2.1562, "step": 404 }, { "epoch": 0.1887494188749419, "grad_norm": 4.314216613769531, "learning_rate": 4.508603500280316e-05, "loss": 2.2344, "step": 406 }, { "epoch": 0.1896792189679219, "grad_norm": 4.059554100036621, "learning_rate": 4.503917462177189e-05, "loss": 2.2541, "step": 408 }, { "epoch": 0.1906090190609019, "grad_norm": 4.6279497146606445, "learning_rate": 4.499211646217724e-05, "loss": 2.4191, "step": 410 }, { "epoch": 0.1915388191538819, "grad_norm": 5.201217174530029, "learning_rate": 4.494486098846425e-05, "loss": 2.2596, "step": 412 }, { "epoch": 0.19246861924686193, "grad_norm": 4.684686183929443, "learning_rate": 4.489740866702537e-05, "loss": 2.3386, "step": 414 }, { "epoch": 0.19339841933984192, "grad_norm": 4.790379047393799, "learning_rate": 4.484975996619585e-05, "loss": 2.3831, "step": 416 }, { "epoch": 0.19432821943282194, "grad_norm": 4.145999431610107, "learning_rate": 4.480191535624915e-05, "loss": 2.2221, "step": 418 }, { "epoch": 0.19525801952580196, "grad_norm": 4.353785037994385, "learning_rate": 4.475387530939223e-05, "loss": 2.2895, "step": 420 }, { "epoch": 0.19618781961878196, "grad_norm": 3.875204086303711, "learning_rate": 4.470564029976097e-05, "loss": 2.1557, "step": 422 }, { "epoch": 0.19711761971176198, "grad_norm": 4.1211957931518555, "learning_rate": 4.4657210803415444e-05, "loss": 2.1684, "step": 424 }, { "epoch": 0.19804741980474197, "grad_norm": 4.364296913146973, "learning_rate": 4.4608587298335226e-05, "loss": 2.3719, "step": 426 }, { "epoch": 0.198977219897722, "grad_norm": 4.453756332397461, "learning_rate": 4.455977026441468e-05, "loss": 2.2629, "step": 428 }, { "epoch": 0.199907019990702, "grad_norm": 4.076657295227051, "learning_rate": 4.451076018345822e-05, "loss": 2.3528, "step": 430 }, { "epoch": 0.200836820083682, "grad_norm": 4.152651786804199, "learning_rate": 4.446155753917557e-05, "loss": 2.1629, "step": 432 }, { "epoch": 0.20176662017666203, "grad_norm": 4.728516578674316, "learning_rate": 4.441216281717694e-05, "loss": 2.3917, "step": 434 }, { "epoch": 0.20269642026964202, "grad_norm": 4.4827351570129395, "learning_rate": 4.436257650496832e-05, "loss": 2.1727, "step": 436 }, { "epoch": 0.20362622036262204, "grad_norm": 4.671657085418701, "learning_rate": 4.431279909194658e-05, "loss": 2.3108, "step": 438 }, { "epoch": 0.20455602045560203, "grad_norm": 4.897982120513916, "learning_rate": 4.4262831069394696e-05, "loss": 2.3453, "step": 440 }, { "epoch": 0.20548582054858205, "grad_norm": 4.223210334777832, "learning_rate": 4.421267293047688e-05, "loss": 2.2064, "step": 442 }, { "epoch": 0.20641562064156208, "grad_norm": 4.6727166175842285, "learning_rate": 4.416232517023371e-05, "loss": 2.2857, "step": 444 }, { "epoch": 0.20734542073454207, "grad_norm": 4.188732147216797, "learning_rate": 4.4111788285577254e-05, "loss": 2.0424, "step": 446 }, { "epoch": 0.2082752208275221, "grad_norm": 4.568185329437256, "learning_rate": 4.406106277528616e-05, "loss": 2.2188, "step": 448 }, { "epoch": 0.20920502092050208, "grad_norm": 4.507286548614502, "learning_rate": 4.401014914000074e-05, "loss": 2.4434, "step": 450 }, { "epoch": 0.2101348210134821, "grad_norm": 5.976804733276367, "learning_rate": 4.395904788221801e-05, "loss": 2.3782, "step": 452 }, { "epoch": 0.2110646211064621, "grad_norm": 4.830550193786621, "learning_rate": 4.3907759506286766e-05, "loss": 2.1667, "step": 454 }, { "epoch": 0.21199442119944212, "grad_norm": 4.47223424911499, "learning_rate": 4.385628451840256e-05, "loss": 2.4437, "step": 456 }, { "epoch": 0.21292422129242214, "grad_norm": 4.695533752441406, "learning_rate": 4.380462342660275e-05, "loss": 2.254, "step": 458 }, { "epoch": 0.21385402138540213, "grad_norm": 4.260176658630371, "learning_rate": 4.3752776740761456e-05, "loss": 2.1618, "step": 460 }, { "epoch": 0.21478382147838215, "grad_norm": 4.031797409057617, "learning_rate": 4.370074497258452e-05, "loss": 2.259, "step": 462 }, { "epoch": 0.21571362157136215, "grad_norm": 4.22615909576416, "learning_rate": 4.364852863560452e-05, "loss": 2.1626, "step": 464 }, { "epoch": 0.21664342166434217, "grad_norm": 4.113691806793213, "learning_rate": 4.359612824517559e-05, "loss": 2.1371, "step": 466 }, { "epoch": 0.2175732217573222, "grad_norm": 4.68644905090332, "learning_rate": 4.354354431846844e-05, "loss": 2.155, "step": 468 }, { "epoch": 0.21850302185030218, "grad_norm": 5.12609338760376, "learning_rate": 4.34907773744652e-05, "loss": 2.2832, "step": 470 }, { "epoch": 0.2194328219432822, "grad_norm": 4.6008172035217285, "learning_rate": 4.343782793395431e-05, "loss": 2.4014, "step": 472 }, { "epoch": 0.2203626220362622, "grad_norm": 4.7459330558776855, "learning_rate": 4.338469651952536e-05, "loss": 2.1821, "step": 474 }, { "epoch": 0.22129242212924222, "grad_norm": 5.448611736297607, "learning_rate": 4.333138365556396e-05, "loss": 2.2819, "step": 476 }, { "epoch": 0.2222222222222222, "grad_norm": 4.039874076843262, "learning_rate": 4.327788986824656e-05, "loss": 2.0847, "step": 478 }, { "epoch": 0.22315202231520223, "grad_norm": 4.008144855499268, "learning_rate": 4.3224215685535246e-05, "loss": 2.3179, "step": 480 }, { "epoch": 0.22408182240818225, "grad_norm": 4.057142734527588, "learning_rate": 4.317036163717253e-05, "loss": 2.1906, "step": 482 }, { "epoch": 0.22501162250116225, "grad_norm": 4.775846004486084, "learning_rate": 4.3116328254676125e-05, "loss": 2.2181, "step": 484 }, { "epoch": 0.22594142259414227, "grad_norm": 4.998971462249756, "learning_rate": 4.30621160713337e-05, "loss": 2.3403, "step": 486 }, { "epoch": 0.22687122268712226, "grad_norm": 4.324237823486328, "learning_rate": 4.300772562219762e-05, "loss": 2.1862, "step": 488 }, { "epoch": 0.22780102278010228, "grad_norm": 4.7408270835876465, "learning_rate": 4.2953157444079665e-05, "loss": 2.2282, "step": 490 }, { "epoch": 0.22873082287308227, "grad_norm": 4.64622688293457, "learning_rate": 4.289841207554572e-05, "loss": 2.3684, "step": 492 }, { "epoch": 0.2296606229660623, "grad_norm": 4.636557102203369, "learning_rate": 4.284349005691047e-05, "loss": 2.1148, "step": 494 }, { "epoch": 0.23059042305904232, "grad_norm": 4.849292278289795, "learning_rate": 4.278839193023209e-05, "loss": 2.188, "step": 496 }, { "epoch": 0.2315202231520223, "grad_norm": 5.605935573577881, "learning_rate": 4.273311823930679e-05, "loss": 2.3875, "step": 498 }, { "epoch": 0.23245002324500233, "grad_norm": 4.441534042358398, "learning_rate": 4.2677669529663635e-05, "loss": 2.1226, "step": 500 }, { "epoch": 0.23337982333798232, "grad_norm": 4.407844543457031, "learning_rate": 4.262204634855898e-05, "loss": 2.0893, "step": 502 }, { "epoch": 0.23430962343096234, "grad_norm": 4.561530590057373, "learning_rate": 4.2566249244971185e-05, "loss": 2.0934, "step": 504 }, { "epoch": 0.23523942352394236, "grad_norm": 3.8607325553894043, "learning_rate": 4.251027876959511e-05, "loss": 2.0802, "step": 506 }, { "epoch": 0.23616922361692236, "grad_norm": 4.207155704498291, "learning_rate": 4.2454135474836777e-05, "loss": 2.1073, "step": 508 }, { "epoch": 0.23709902370990238, "grad_norm": 4.435304164886475, "learning_rate": 4.2397819914807815e-05, "loss": 2.1878, "step": 510 }, { "epoch": 0.23802882380288237, "grad_norm": 4.477855682373047, "learning_rate": 4.234133264532008e-05, "loss": 2.1557, "step": 512 }, { "epoch": 0.2389586238958624, "grad_norm": 4.910909652709961, "learning_rate": 4.2284674223880126e-05, "loss": 2.231, "step": 514 }, { "epoch": 0.2398884239888424, "grad_norm": 4.534247398376465, "learning_rate": 4.222784520968368e-05, "loss": 2.1865, "step": 516 }, { "epoch": 0.2408182240818224, "grad_norm": 4.536930084228516, "learning_rate": 4.217084616361018e-05, "loss": 2.0369, "step": 518 }, { "epoch": 0.24174802417480243, "grad_norm": 6.204085350036621, "learning_rate": 4.2113677648217184e-05, "loss": 2.2676, "step": 520 }, { "epoch": 0.24267782426778242, "grad_norm": 5.043148994445801, "learning_rate": 4.205634022773488e-05, "loss": 2.1728, "step": 522 }, { "epoch": 0.24360762436076244, "grad_norm": 4.441319942474365, "learning_rate": 4.199883446806044e-05, "loss": 2.2791, "step": 524 }, { "epoch": 0.24453742445374244, "grad_norm": 4.622180938720703, "learning_rate": 4.194116093675252e-05, "loss": 2.2377, "step": 526 }, { "epoch": 0.24546722454672246, "grad_norm": 4.700043201446533, "learning_rate": 4.188332020302558e-05, "loss": 2.1303, "step": 528 }, { "epoch": 0.24639702463970248, "grad_norm": 4.551712989807129, "learning_rate": 4.18253128377443e-05, "loss": 2.0938, "step": 530 }, { "epoch": 0.24732682473268247, "grad_norm": 4.108737468719482, "learning_rate": 4.176713941341797e-05, "loss": 2.1132, "step": 532 }, { "epoch": 0.2482566248256625, "grad_norm": 4.604151725769043, "learning_rate": 4.17088005041948e-05, "loss": 2.1034, "step": 534 }, { "epoch": 0.24918642491864249, "grad_norm": 4.597217559814453, "learning_rate": 4.1650296685856264e-05, "loss": 2.2177, "step": 536 }, { "epoch": 0.2501162250116225, "grad_norm": 5.369827747344971, "learning_rate": 4.159162853581144e-05, "loss": 2.2053, "step": 538 }, { "epoch": 0.2510460251046025, "grad_norm": 4.734828472137451, "learning_rate": 4.1532796633091275e-05, "loss": 2.2638, "step": 540 }, { "epoch": 0.2519758251975825, "grad_norm": 4.740562915802002, "learning_rate": 4.147380155834291e-05, "loss": 2.1377, "step": 542 }, { "epoch": 0.25290562529056254, "grad_norm": 4.465574741363525, "learning_rate": 4.1414643893823893e-05, "loss": 2.2568, "step": 544 }, { "epoch": 0.25383542538354253, "grad_norm": 5.246885776519775, "learning_rate": 4.1355324223396516e-05, "loss": 2.2122, "step": 546 }, { "epoch": 0.2547652254765225, "grad_norm": 4.4123759269714355, "learning_rate": 4.129584313252195e-05, "loss": 2.088, "step": 548 }, { "epoch": 0.2556950255695026, "grad_norm": 4.591398239135742, "learning_rate": 4.1236201208254576e-05, "loss": 2.1764, "step": 550 }, { "epoch": 0.25662482566248257, "grad_norm": 5.1638407707214355, "learning_rate": 4.1176399039236096e-05, "loss": 2.1266, "step": 552 }, { "epoch": 0.25755462575546256, "grad_norm": 4.475945949554443, "learning_rate": 4.111643721568977e-05, "loss": 2.0847, "step": 554 }, { "epoch": 0.2584844258484426, "grad_norm": 4.5842766761779785, "learning_rate": 4.10563163294146e-05, "loss": 2.0488, "step": 556 }, { "epoch": 0.2594142259414226, "grad_norm": 4.270354747772217, "learning_rate": 4.099603697377945e-05, "loss": 2.1606, "step": 558 }, { "epoch": 0.2603440260344026, "grad_norm": 4.379874229431152, "learning_rate": 4.0935599743717234e-05, "loss": 2.1473, "step": 560 }, { "epoch": 0.2612738261273826, "grad_norm": 4.079301357269287, "learning_rate": 4.087500523571901e-05, "loss": 2.065, "step": 562 }, { "epoch": 0.26220362622036264, "grad_norm": 4.514833927154541, "learning_rate": 4.0814254047828096e-05, "loss": 2.0902, "step": 564 }, { "epoch": 0.26313342631334263, "grad_norm": 4.833306789398193, "learning_rate": 4.075334677963421e-05, "loss": 2.0204, "step": 566 }, { "epoch": 0.2640632264063226, "grad_norm": 4.539350986480713, "learning_rate": 4.06922840322675e-05, "loss": 2.1216, "step": 568 }, { "epoch": 0.2649930264993027, "grad_norm": 4.5180583000183105, "learning_rate": 4.0631066408392624e-05, "loss": 1.9453, "step": 570 }, { "epoch": 0.26592282659228267, "grad_norm": 5.044022560119629, "learning_rate": 4.056969451220281e-05, "loss": 1.9341, "step": 572 }, { "epoch": 0.26685262668526266, "grad_norm": 4.4487624168396, "learning_rate": 4.05081689494139e-05, "loss": 1.9342, "step": 574 }, { "epoch": 0.26778242677824265, "grad_norm": 4.208011627197266, "learning_rate": 4.044649032725835e-05, "loss": 2.0515, "step": 576 }, { "epoch": 0.2687122268712227, "grad_norm": 4.572739601135254, "learning_rate": 4.038465925447928e-05, "loss": 2.1114, "step": 578 }, { "epoch": 0.2696420269642027, "grad_norm": 4.769423484802246, "learning_rate": 4.03226763413244e-05, "loss": 2.0059, "step": 580 }, { "epoch": 0.2705718270571827, "grad_norm": 4.150985240936279, "learning_rate": 4.0260542199540044e-05, "loss": 2.0426, "step": 582 }, { "epoch": 0.27150162715016274, "grad_norm": 5.442625045776367, "learning_rate": 4.019825744236512e-05, "loss": 2.0914, "step": 584 }, { "epoch": 0.27243142724314273, "grad_norm": 4.594238758087158, "learning_rate": 4.013582268452502e-05, "loss": 2.1513, "step": 586 }, { "epoch": 0.2733612273361227, "grad_norm": 4.3913679122924805, "learning_rate": 4.0073238542225605e-05, "loss": 1.9892, "step": 588 }, { "epoch": 0.2742910274291027, "grad_norm": 4.504171371459961, "learning_rate": 4.0010505633147085e-05, "loss": 2.0143, "step": 590 }, { "epoch": 0.27522082752208277, "grad_norm": 4.59321403503418, "learning_rate": 3.994762457643796e-05, "loss": 1.9915, "step": 592 }, { "epoch": 0.27615062761506276, "grad_norm": 4.7557549476623535, "learning_rate": 3.988459599270886e-05, "loss": 2.1402, "step": 594 }, { "epoch": 0.27708042770804275, "grad_norm": 4.339559078216553, "learning_rate": 3.9821420504026473e-05, "loss": 2.0686, "step": 596 }, { "epoch": 0.2780102278010228, "grad_norm": 6.258368492126465, "learning_rate": 3.975809873390736e-05, "loss": 2.0857, "step": 598 }, { "epoch": 0.2789400278940028, "grad_norm": 4.531676292419434, "learning_rate": 3.969463130731182e-05, "loss": 1.9998, "step": 600 }, { "epoch": 0.2798698279869828, "grad_norm": 5.337503433227539, "learning_rate": 3.963101885063775e-05, "loss": 2.055, "step": 602 }, { "epoch": 0.2807996280799628, "grad_norm": 4.745869159698486, "learning_rate": 3.9567261991714404e-05, "loss": 1.9999, "step": 604 }, { "epoch": 0.28172942817294283, "grad_norm": 5.425375938415527, "learning_rate": 3.950336135979624e-05, "loss": 2.1593, "step": 606 }, { "epoch": 0.2826592282659228, "grad_norm": 4.363291263580322, "learning_rate": 3.943931758555669e-05, "loss": 2.006, "step": 608 }, { "epoch": 0.2835890283589028, "grad_norm": 4.700246810913086, "learning_rate": 3.937513130108197e-05, "loss": 2.0109, "step": 610 }, { "epoch": 0.28451882845188287, "grad_norm": 4.571547031402588, "learning_rate": 3.9310803139864775e-05, "loss": 2.0189, "step": 612 }, { "epoch": 0.28544862854486286, "grad_norm": 4.4561238288879395, "learning_rate": 3.92463337367981e-05, "loss": 2.0491, "step": 614 }, { "epoch": 0.28637842863784285, "grad_norm": 4.416543483734131, "learning_rate": 3.918172372816892e-05, "loss": 2.0906, "step": 616 }, { "epoch": 0.28730822873082285, "grad_norm": 4.50541877746582, "learning_rate": 3.911697375165194e-05, "loss": 2.0236, "step": 618 }, { "epoch": 0.2882380288238029, "grad_norm": 5.216824054718018, "learning_rate": 3.9052084446303274e-05, "loss": 1.9934, "step": 620 }, { "epoch": 0.2891678289167829, "grad_norm": 4.56364107131958, "learning_rate": 3.898705645255419e-05, "loss": 2.1322, "step": 622 }, { "epoch": 0.2900976290097629, "grad_norm": 4.668964385986328, "learning_rate": 3.892189041220471e-05, "loss": 2.029, "step": 624 }, { "epoch": 0.29102742910274293, "grad_norm": 4.406254768371582, "learning_rate": 3.885658696841736e-05, "loss": 2.1611, "step": 626 }, { "epoch": 0.2919572291957229, "grad_norm": 4.351002216339111, "learning_rate": 3.879114676571077e-05, "loss": 1.9737, "step": 628 }, { "epoch": 0.2928870292887029, "grad_norm": 4.899750232696533, "learning_rate": 3.8725570449953306e-05, "loss": 2.1032, "step": 630 }, { "epoch": 0.29381682938168296, "grad_norm": 4.495682716369629, "learning_rate": 3.865985866835674e-05, "loss": 2.0984, "step": 632 }, { "epoch": 0.29474662947466296, "grad_norm": 4.662699222564697, "learning_rate": 3.8594012069469824e-05, "loss": 2.1063, "step": 634 }, { "epoch": 0.29567642956764295, "grad_norm": 4.402945518493652, "learning_rate": 3.852803130317191e-05, "loss": 2.2111, "step": 636 }, { "epoch": 0.29660622966062294, "grad_norm": 4.312417507171631, "learning_rate": 3.846191702066651e-05, "loss": 2.1586, "step": 638 }, { "epoch": 0.297536029753603, "grad_norm": 4.685673713684082, "learning_rate": 3.839566987447492e-05, "loss": 2.0173, "step": 640 }, { "epoch": 0.298465829846583, "grad_norm": 4.916240692138672, "learning_rate": 3.832929051842972e-05, "loss": 2.2418, "step": 642 }, { "epoch": 0.299395629939563, "grad_norm": 4.831565856933594, "learning_rate": 3.826277960766835e-05, "loss": 1.8663, "step": 644 }, { "epoch": 0.30032543003254303, "grad_norm": 4.196077823638916, "learning_rate": 3.819613779862667e-05, "loss": 2.2087, "step": 646 }, { "epoch": 0.301255230125523, "grad_norm": 5.031626224517822, "learning_rate": 3.81293657490324e-05, "loss": 2.1777, "step": 648 }, { "epoch": 0.302185030218503, "grad_norm": 4.312190532684326, "learning_rate": 3.8062464117898724e-05, "loss": 2.0522, "step": 650 }, { "epoch": 0.303114830311483, "grad_norm": 4.879306316375732, "learning_rate": 3.799543356551774e-05, "loss": 2.0994, "step": 652 }, { "epoch": 0.30404463040446306, "grad_norm": 4.381790637969971, "learning_rate": 3.7928274753453936e-05, "loss": 2.0572, "step": 654 }, { "epoch": 0.30497443049744305, "grad_norm": 4.321512222290039, "learning_rate": 3.786098834453767e-05, "loss": 1.991, "step": 656 }, { "epoch": 0.30590423059042304, "grad_norm": 4.742088794708252, "learning_rate": 3.779357500285862e-05, "loss": 1.9416, "step": 658 }, { "epoch": 0.3068340306834031, "grad_norm": 4.434507846832275, "learning_rate": 3.772603539375928e-05, "loss": 1.9484, "step": 660 }, { "epoch": 0.3077638307763831, "grad_norm": 4.07865047454834, "learning_rate": 3.765837018382831e-05, "loss": 1.9057, "step": 662 }, { "epoch": 0.3086936308693631, "grad_norm": 4.440150260925293, "learning_rate": 3.759058004089401e-05, "loss": 1.898, "step": 664 }, { "epoch": 0.30962343096234307, "grad_norm": 5.3757219314575195, "learning_rate": 3.752266563401775e-05, "loss": 2.0269, "step": 666 }, { "epoch": 0.3105532310553231, "grad_norm": 4.685118198394775, "learning_rate": 3.7454627633487274e-05, "loss": 1.9623, "step": 668 }, { "epoch": 0.3114830311483031, "grad_norm": 4.824833869934082, "learning_rate": 3.7386466710810194e-05, "loss": 2.0433, "step": 670 }, { "epoch": 0.3124128312412831, "grad_norm": 3.863189935684204, "learning_rate": 3.7318183538707294e-05, "loss": 2.0234, "step": 672 }, { "epoch": 0.31334263133426316, "grad_norm": 4.477223873138428, "learning_rate": 3.724977879110593e-05, "loss": 1.8824, "step": 674 }, { "epoch": 0.31427243142724315, "grad_norm": 4.622427463531494, "learning_rate": 3.7181253143133315e-05, "loss": 2.0136, "step": 676 }, { "epoch": 0.31520223152022314, "grad_norm": 4.859879970550537, "learning_rate": 3.7112607271109964e-05, "loss": 1.996, "step": 678 }, { "epoch": 0.31613203161320313, "grad_norm": 4.073486804962158, "learning_rate": 3.704384185254289e-05, "loss": 1.9632, "step": 680 }, { "epoch": 0.3170618317061832, "grad_norm": 4.946434497833252, "learning_rate": 3.697495756611904e-05, "loss": 2.0951, "step": 682 }, { "epoch": 0.3179916317991632, "grad_norm": 4.783480167388916, "learning_rate": 3.690595509169849e-05, "loss": 2.0839, "step": 684 }, { "epoch": 0.31892143189214317, "grad_norm": 4.2394304275512695, "learning_rate": 3.6836835110307815e-05, "loss": 2.1756, "step": 686 }, { "epoch": 0.3198512319851232, "grad_norm": 4.0898356437683105, "learning_rate": 3.6767598304133324e-05, "loss": 1.8852, "step": 688 }, { "epoch": 0.3207810320781032, "grad_norm": 4.52060079574585, "learning_rate": 3.669824535651435e-05, "loss": 2.0037, "step": 690 }, { "epoch": 0.3217108321710832, "grad_norm": 4.5146989822387695, "learning_rate": 3.662877695193647e-05, "loss": 1.9028, "step": 692 }, { "epoch": 0.32264063226406325, "grad_norm": 4.943118572235107, "learning_rate": 3.6559193776024794e-05, "loss": 2.1288, "step": 694 }, { "epoch": 0.32357043235704325, "grad_norm": 4.932314872741699, "learning_rate": 3.648949651553721e-05, "loss": 1.9541, "step": 696 }, { "epoch": 0.32450023245002324, "grad_norm": 4.603333950042725, "learning_rate": 3.64196858583575e-05, "loss": 2.0298, "step": 698 }, { "epoch": 0.32543003254300323, "grad_norm": 4.115204334259033, "learning_rate": 3.634976249348868e-05, "loss": 1.9923, "step": 700 }, { "epoch": 0.3263598326359833, "grad_norm": 5.718911647796631, "learning_rate": 3.6279727111046146e-05, "loss": 2.0396, "step": 702 }, { "epoch": 0.3272896327289633, "grad_norm": 4.47902774810791, "learning_rate": 3.620958040225082e-05, "loss": 1.902, "step": 704 }, { "epoch": 0.32821943282194327, "grad_norm": 4.621737003326416, "learning_rate": 3.6139323059422415e-05, "loss": 1.8887, "step": 706 }, { "epoch": 0.3291492329149233, "grad_norm": 4.62278413772583, "learning_rate": 3.606895577597255e-05, "loss": 2.0092, "step": 708 }, { "epoch": 0.3300790330079033, "grad_norm": 4.751461029052734, "learning_rate": 3.5998479246397887e-05, "loss": 1.9072, "step": 710 }, { "epoch": 0.3310088331008833, "grad_norm": 4.394883155822754, "learning_rate": 3.592789416627333e-05, "loss": 1.787, "step": 712 }, { "epoch": 0.3319386331938633, "grad_norm": 4.409090518951416, "learning_rate": 3.585720123224513e-05, "loss": 1.9327, "step": 714 }, { "epoch": 0.33286843328684335, "grad_norm": 4.445274829864502, "learning_rate": 3.578640114202399e-05, "loss": 1.7947, "step": 716 }, { "epoch": 0.33379823337982334, "grad_norm": 4.6497979164123535, "learning_rate": 3.571549459437823e-05, "loss": 2.0325, "step": 718 }, { "epoch": 0.33472803347280333, "grad_norm": 4.181378364562988, "learning_rate": 3.5644482289126834e-05, "loss": 2.0249, "step": 720 }, { "epoch": 0.3356578335657834, "grad_norm": 4.457767009735107, "learning_rate": 3.5573364927132607e-05, "loss": 1.9805, "step": 722 }, { "epoch": 0.3365876336587634, "grad_norm": 4.531330585479736, "learning_rate": 3.550214321029517e-05, "loss": 1.8816, "step": 724 }, { "epoch": 0.33751743375174337, "grad_norm": 4.2451491355896, "learning_rate": 3.543081784154415e-05, "loss": 1.9834, "step": 726 }, { "epoch": 0.33844723384472336, "grad_norm": 4.621556282043457, "learning_rate": 3.5359389524832126e-05, "loss": 1.8957, "step": 728 }, { "epoch": 0.3393770339377034, "grad_norm": 5.699187278747559, "learning_rate": 3.5287858965127734e-05, "loss": 2.1215, "step": 730 }, { "epoch": 0.3403068340306834, "grad_norm": 4.326658248901367, "learning_rate": 3.5216226868408746e-05, "loss": 1.9295, "step": 732 }, { "epoch": 0.3412366341236634, "grad_norm": 4.576933860778809, "learning_rate": 3.514449394165502e-05, "loss": 2.1067, "step": 734 }, { "epoch": 0.34216643421664344, "grad_norm": 4.590579986572266, "learning_rate": 3.507266089284159e-05, "loss": 1.9855, "step": 736 }, { "epoch": 0.34309623430962344, "grad_norm": 5.2467498779296875, "learning_rate": 3.500072843093163e-05, "loss": 2.1409, "step": 738 }, { "epoch": 0.34402603440260343, "grad_norm": 4.429446697235107, "learning_rate": 3.492869726586953e-05, "loss": 1.7883, "step": 740 }, { "epoch": 0.3449558344955834, "grad_norm": 4.1945085525512695, "learning_rate": 3.485656810857378e-05, "loss": 1.8412, "step": 742 }, { "epoch": 0.3458856345885635, "grad_norm": 4.398087978363037, "learning_rate": 3.478434167093007e-05, "loss": 1.8054, "step": 744 }, { "epoch": 0.34681543468154347, "grad_norm": 4.793655872344971, "learning_rate": 3.4712018665784165e-05, "loss": 2.0902, "step": 746 }, { "epoch": 0.34774523477452346, "grad_norm": 4.483579158782959, "learning_rate": 3.463959980693492e-05, "loss": 1.9452, "step": 748 }, { "epoch": 0.3486750348675035, "grad_norm": 4.404364109039307, "learning_rate": 3.4567085809127256e-05, "loss": 1.8574, "step": 750 }, { "epoch": 0.3496048349604835, "grad_norm": 4.503608226776123, "learning_rate": 3.449447738804504e-05, "loss": 1.8378, "step": 752 }, { "epoch": 0.3505346350534635, "grad_norm": 5.007493019104004, "learning_rate": 3.442177526030408e-05, "loss": 1.8974, "step": 754 }, { "epoch": 0.3514644351464435, "grad_norm": 4.5584540367126465, "learning_rate": 3.434898014344502e-05, "loss": 1.9136, "step": 756 }, { "epoch": 0.35239423523942354, "grad_norm": 4.379871368408203, "learning_rate": 3.4276092755926294e-05, "loss": 2.0145, "step": 758 }, { "epoch": 0.35332403533240353, "grad_norm": 5.062580108642578, "learning_rate": 3.420311381711696e-05, "loss": 1.8809, "step": 760 }, { "epoch": 0.3542538354253835, "grad_norm": 5.451931476593018, "learning_rate": 3.413004404728971e-05, "loss": 1.8939, "step": 762 }, { "epoch": 0.35518363551836357, "grad_norm": 4.629837989807129, "learning_rate": 3.4056884167613657e-05, "loss": 1.8905, "step": 764 }, { "epoch": 0.35611343561134357, "grad_norm": 4.338338851928711, "learning_rate": 3.3983634900147284e-05, "loss": 1.7783, "step": 766 }, { "epoch": 0.35704323570432356, "grad_norm": 4.784144401550293, "learning_rate": 3.391029696783127e-05, "loss": 1.9943, "step": 768 }, { "epoch": 0.3579730357973036, "grad_norm": 5.219831943511963, "learning_rate": 3.383687109448144e-05, "loss": 2.0309, "step": 770 }, { "epoch": 0.3589028358902836, "grad_norm": 4.773104190826416, "learning_rate": 3.376335800478148e-05, "loss": 1.9956, "step": 772 }, { "epoch": 0.3598326359832636, "grad_norm": 4.837332248687744, "learning_rate": 3.3689758424275926e-05, "loss": 1.9521, "step": 774 }, { "epoch": 0.3607624360762436, "grad_norm": 5.036917686462402, "learning_rate": 3.361607307936293e-05, "loss": 1.8042, "step": 776 }, { "epoch": 0.36169223616922364, "grad_norm": 4.789027214050293, "learning_rate": 3.354230269728708e-05, "loss": 2.0133, "step": 778 }, { "epoch": 0.36262203626220363, "grad_norm": 4.4757866859436035, "learning_rate": 3.346844800613229e-05, "loss": 1.8751, "step": 780 }, { "epoch": 0.3635518363551836, "grad_norm": 4.071562767028809, "learning_rate": 3.339450973481452e-05, "loss": 1.7312, "step": 782 }, { "epoch": 0.36448163644816367, "grad_norm": 4.189321517944336, "learning_rate": 3.332048861307467e-05, "loss": 1.8119, "step": 784 }, { "epoch": 0.36541143654114366, "grad_norm": 4.909400463104248, "learning_rate": 3.324638537147132e-05, "loss": 1.8074, "step": 786 }, { "epoch": 0.36634123663412366, "grad_norm": 4.867971897125244, "learning_rate": 3.317220074137357e-05, "loss": 1.7643, "step": 788 }, { "epoch": 0.36727103672710365, "grad_norm": 4.358593940734863, "learning_rate": 3.309793545495373e-05, "loss": 1.888, "step": 790 }, { "epoch": 0.3682008368200837, "grad_norm": 4.378060340881348, "learning_rate": 3.3023590245180225e-05, "loss": 1.855, "step": 792 }, { "epoch": 0.3691306369130637, "grad_norm": 4.656728744506836, "learning_rate": 3.294916584581027e-05, "loss": 1.8114, "step": 794 }, { "epoch": 0.3700604370060437, "grad_norm": 4.308342933654785, "learning_rate": 3.287466299138261e-05, "loss": 1.7723, "step": 796 }, { "epoch": 0.37099023709902373, "grad_norm": 5.0333709716796875, "learning_rate": 3.280008241721036e-05, "loss": 1.8839, "step": 798 }, { "epoch": 0.3719200371920037, "grad_norm": 4.912903308868408, "learning_rate": 3.272542485937367e-05, "loss": 1.8782, "step": 800 }, { "epoch": 0.3728498372849837, "grad_norm": 4.4298624992370605, "learning_rate": 3.2650691054712505e-05, "loss": 1.7401, "step": 802 }, { "epoch": 0.3737796373779637, "grad_norm": 4.465354919433594, "learning_rate": 3.2575881740819334e-05, "loss": 1.6939, "step": 804 }, { "epoch": 0.37470943747094376, "grad_norm": 3.979252815246582, "learning_rate": 3.2500997656031894e-05, "loss": 1.7753, "step": 806 }, { "epoch": 0.37563923756392376, "grad_norm": 4.773442268371582, "learning_rate": 3.2426039539425856e-05, "loss": 2.0576, "step": 808 }, { "epoch": 0.37656903765690375, "grad_norm": 4.8242669105529785, "learning_rate": 3.235100813080758e-05, "loss": 1.885, "step": 810 }, { "epoch": 0.3774988377498838, "grad_norm": 4.1370062828063965, "learning_rate": 3.2275904170706776e-05, "loss": 1.7324, "step": 812 }, { "epoch": 0.3784286378428638, "grad_norm": 4.578678607940674, "learning_rate": 3.2200728400369206e-05, "loss": 1.8224, "step": 814 }, { "epoch": 0.3793584379358438, "grad_norm": 4.596166133880615, "learning_rate": 3.2125481561749375e-05, "loss": 1.8958, "step": 816 }, { "epoch": 0.3802882380288238, "grad_norm": 4.8619256019592285, "learning_rate": 3.2050164397503206e-05, "loss": 1.92, "step": 818 }, { "epoch": 0.3812180381218038, "grad_norm": 5.053321361541748, "learning_rate": 3.197477765098071e-05, "loss": 1.8986, "step": 820 }, { "epoch": 0.3821478382147838, "grad_norm": 4.790858745574951, "learning_rate": 3.189932206621862e-05, "loss": 1.9292, "step": 822 }, { "epoch": 0.3830776383077638, "grad_norm": 4.742454528808594, "learning_rate": 3.182379838793311e-05, "loss": 1.9227, "step": 824 }, { "epoch": 0.38400743840074386, "grad_norm": 4.168822288513184, "learning_rate": 3.174820736151238e-05, "loss": 1.858, "step": 826 }, { "epoch": 0.38493723849372385, "grad_norm": 4.876013278961182, "learning_rate": 3.167254973300937e-05, "loss": 1.7674, "step": 828 }, { "epoch": 0.38586703858670385, "grad_norm": 4.7320709228515625, "learning_rate": 3.15968262491343e-05, "loss": 1.9685, "step": 830 }, { "epoch": 0.38679683867968384, "grad_norm": 5.055155277252197, "learning_rate": 3.1521037657247395e-05, "loss": 1.9164, "step": 832 }, { "epoch": 0.3877266387726639, "grad_norm": 5.648740768432617, "learning_rate": 3.144518470535147e-05, "loss": 1.8817, "step": 834 }, { "epoch": 0.3886564388656439, "grad_norm": 4.968471050262451, "learning_rate": 3.136926814208453e-05, "loss": 1.9723, "step": 836 }, { "epoch": 0.3895862389586239, "grad_norm": 4.835752010345459, "learning_rate": 3.12932887167124e-05, "loss": 1.6993, "step": 838 }, { "epoch": 0.3905160390516039, "grad_norm": 5.687002658843994, "learning_rate": 3.1217247179121344e-05, "loss": 2.0707, "step": 840 }, { "epoch": 0.3914458391445839, "grad_norm": 5.079622745513916, "learning_rate": 3.114114427981064e-05, "loss": 1.8, "step": 842 }, { "epoch": 0.3923756392375639, "grad_norm": 4.911600589752197, "learning_rate": 3.1064980769885166e-05, "loss": 1.9814, "step": 844 }, { "epoch": 0.39330543933054396, "grad_norm": 4.715559959411621, "learning_rate": 3.098875740104803e-05, "loss": 1.7664, "step": 846 }, { "epoch": 0.39423523942352395, "grad_norm": 5.0014519691467285, "learning_rate": 3.09124749255931e-05, "loss": 1.8057, "step": 848 }, { "epoch": 0.39516503951650395, "grad_norm": 4.770448207855225, "learning_rate": 3.083613409639762e-05, "loss": 1.8733, "step": 850 }, { "epoch": 0.39609483960948394, "grad_norm": 5.363395690917969, "learning_rate": 3.075973566691475e-05, "loss": 1.8327, "step": 852 }, { "epoch": 0.397024639702464, "grad_norm": 4.663390636444092, "learning_rate": 3.068328039116615e-05, "loss": 1.8256, "step": 854 }, { "epoch": 0.397954439795444, "grad_norm": 5.28458309173584, "learning_rate": 3.0606769023734515e-05, "loss": 1.8527, "step": 856 }, { "epoch": 0.398884239888424, "grad_norm": 4.562227725982666, "learning_rate": 3.0530202319756164e-05, "loss": 1.7674, "step": 858 }, { "epoch": 0.399814039981404, "grad_norm": 4.6322808265686035, "learning_rate": 3.0453581034913554e-05, "loss": 1.8489, "step": 860 }, { "epoch": 0.400743840074384, "grad_norm": 5.348202705383301, "learning_rate": 3.037690592542782e-05, "loss": 1.877, "step": 862 }, { "epoch": 0.401673640167364, "grad_norm": 4.75622034072876, "learning_rate": 3.030017774805136e-05, "loss": 1.7738, "step": 864 }, { "epoch": 0.402603440260344, "grad_norm": 4.897692680358887, "learning_rate": 3.0223397260060278e-05, "loss": 1.7855, "step": 866 }, { "epoch": 0.40353324035332405, "grad_norm": 4.619227409362793, "learning_rate": 3.0146565219247022e-05, "loss": 1.9408, "step": 868 }, { "epoch": 0.40446304044630405, "grad_norm": 4.340457439422607, "learning_rate": 3.0069682383912793e-05, "loss": 1.7085, "step": 870 }, { "epoch": 0.40539284053928404, "grad_norm": 4.425292491912842, "learning_rate": 2.999274951286016e-05, "loss": 1.7481, "step": 872 }, { "epoch": 0.4063226406322641, "grad_norm": 4.578970909118652, "learning_rate": 2.991576736538548e-05, "loss": 1.7822, "step": 874 }, { "epoch": 0.4072524407252441, "grad_norm": 4.336738586425781, "learning_rate": 2.9838736701271484e-05, "loss": 1.8205, "step": 876 }, { "epoch": 0.4081822408182241, "grad_norm": 5.223724842071533, "learning_rate": 2.9761658280779728e-05, "loss": 1.9365, "step": 878 }, { "epoch": 0.40911204091120407, "grad_norm": 4.723943710327148, "learning_rate": 2.9684532864643095e-05, "loss": 1.8446, "step": 880 }, { "epoch": 0.4100418410041841, "grad_norm": 4.855127334594727, "learning_rate": 2.9607361214058315e-05, "loss": 1.8309, "step": 882 }, { "epoch": 0.4109716410971641, "grad_norm": 5.4069623947143555, "learning_rate": 2.953014409067841e-05, "loss": 1.8898, "step": 884 }, { "epoch": 0.4119014411901441, "grad_norm": 5.4189019203186035, "learning_rate": 2.9452882256605224e-05, "loss": 1.8162, "step": 886 }, { "epoch": 0.41283124128312415, "grad_norm": 4.2488484382629395, "learning_rate": 2.9375576474381874e-05, "loss": 1.8926, "step": 888 }, { "epoch": 0.41376104137610414, "grad_norm": 4.22416877746582, "learning_rate": 2.9298227506985215e-05, "loss": 1.7367, "step": 890 }, { "epoch": 0.41469084146908414, "grad_norm": 4.8620500564575195, "learning_rate": 2.9220836117818317e-05, "loss": 1.7753, "step": 892 }, { "epoch": 0.41562064156206413, "grad_norm": 4.778024196624756, "learning_rate": 2.914340307070297e-05, "loss": 1.9387, "step": 894 }, { "epoch": 0.4165504416550442, "grad_norm": 4.738189220428467, "learning_rate": 2.9065929129872067e-05, "loss": 1.9999, "step": 896 }, { "epoch": 0.4174802417480242, "grad_norm": 4.583161354064941, "learning_rate": 2.8988415059962126e-05, "loss": 1.8715, "step": 898 }, { "epoch": 0.41841004184100417, "grad_norm": 5.00653600692749, "learning_rate": 2.891086162600575e-05, "loss": 1.9439, "step": 900 }, { "epoch": 0.4193398419339842, "grad_norm": 4.409060001373291, "learning_rate": 2.883326959342399e-05, "loss": 1.703, "step": 902 }, { "epoch": 0.4202696420269642, "grad_norm": 4.636505126953125, "learning_rate": 2.8755639728018905e-05, "loss": 1.787, "step": 904 }, { "epoch": 0.4211994421199442, "grad_norm": 4.398307800292969, "learning_rate": 2.8677972795965913e-05, "loss": 1.8551, "step": 906 }, { "epoch": 0.4221292422129242, "grad_norm": 4.791444301605225, "learning_rate": 2.8600269563806285e-05, "loss": 1.8124, "step": 908 }, { "epoch": 0.42305904230590424, "grad_norm": 4.7582106590271, "learning_rate": 2.8522530798439547e-05, "loss": 1.8366, "step": 910 }, { "epoch": 0.42398884239888424, "grad_norm": 4.350075721740723, "learning_rate": 2.8444757267115938e-05, "loss": 1.8578, "step": 912 }, { "epoch": 0.42491864249186423, "grad_norm": 4.4349894523620605, "learning_rate": 2.8366949737428793e-05, "loss": 1.7337, "step": 914 }, { "epoch": 0.4258484425848443, "grad_norm": 4.460733413696289, "learning_rate": 2.8289108977307043e-05, "loss": 1.6925, "step": 916 }, { "epoch": 0.42677824267782427, "grad_norm": 4.682216644287109, "learning_rate": 2.8211235755007555e-05, "loss": 1.6441, "step": 918 }, { "epoch": 0.42770804277080426, "grad_norm": 4.60529088973999, "learning_rate": 2.8133330839107588e-05, "loss": 1.7507, "step": 920 }, { "epoch": 0.4286378428637843, "grad_norm": 4.347693920135498, "learning_rate": 2.8055394998497217e-05, "loss": 1.8878, "step": 922 }, { "epoch": 0.4295676429567643, "grad_norm": 4.74888277053833, "learning_rate": 2.7977429002371726e-05, "loss": 1.7137, "step": 924 }, { "epoch": 0.4304974430497443, "grad_norm": 4.4108171463012695, "learning_rate": 2.789943362022402e-05, "loss": 1.91, "step": 926 }, { "epoch": 0.4314272431427243, "grad_norm": 4.947292804718018, "learning_rate": 2.782140962183702e-05, "loss": 1.9591, "step": 928 }, { "epoch": 0.43235704323570434, "grad_norm": 4.388494491577148, "learning_rate": 2.7743357777276116e-05, "loss": 1.819, "step": 930 }, { "epoch": 0.43328684332868433, "grad_norm": 5.193339824676514, "learning_rate": 2.7665278856881478e-05, "loss": 1.7946, "step": 932 }, { "epoch": 0.43421664342166433, "grad_norm": 4.8065571784973145, "learning_rate": 2.758717363126055e-05, "loss": 1.8752, "step": 934 }, { "epoch": 0.4351464435146444, "grad_norm": 4.34586763381958, "learning_rate": 2.7509042871280355e-05, "loss": 1.6649, "step": 936 }, { "epoch": 0.43607624360762437, "grad_norm": 4.194509506225586, "learning_rate": 2.7430887348059973e-05, "loss": 1.662, "step": 938 }, { "epoch": 0.43700604370060436, "grad_norm": 4.379044532775879, "learning_rate": 2.735270783296284e-05, "loss": 1.6504, "step": 940 }, { "epoch": 0.43793584379358436, "grad_norm": 4.714005470275879, "learning_rate": 2.7274505097589232e-05, "loss": 1.759, "step": 942 }, { "epoch": 0.4388656438865644, "grad_norm": 4.276295185089111, "learning_rate": 2.7196279913768567e-05, "loss": 1.7008, "step": 944 }, { "epoch": 0.4397954439795444, "grad_norm": 4.682962417602539, "learning_rate": 2.7118033053551812e-05, "loss": 1.8516, "step": 946 }, { "epoch": 0.4407252440725244, "grad_norm": 4.838159561157227, "learning_rate": 2.7039765289203922e-05, "loss": 1.7097, "step": 948 }, { "epoch": 0.44165504416550444, "grad_norm": 5.148175239562988, "learning_rate": 2.6961477393196106e-05, "loss": 1.7111, "step": 950 }, { "epoch": 0.44258484425848443, "grad_norm": 4.4916486740112305, "learning_rate": 2.6883170138198303e-05, "loss": 1.6504, "step": 952 }, { "epoch": 0.4435146443514644, "grad_norm": 4.7085418701171875, "learning_rate": 2.6804844297071502e-05, "loss": 1.6591, "step": 954 }, { "epoch": 0.4444444444444444, "grad_norm": 4.861203670501709, "learning_rate": 2.672650064286014e-05, "loss": 1.7056, "step": 956 }, { "epoch": 0.44537424453742447, "grad_norm": 4.579597473144531, "learning_rate": 2.6648139948784433e-05, "loss": 1.7329, "step": 958 }, { "epoch": 0.44630404463040446, "grad_norm": 4.962484359741211, "learning_rate": 2.6569762988232828e-05, "loss": 1.8293, "step": 960 }, { "epoch": 0.44723384472338445, "grad_norm": 4.610439300537109, "learning_rate": 2.649137053475426e-05, "loss": 1.7779, "step": 962 }, { "epoch": 0.4481636448163645, "grad_norm": 4.844552040100098, "learning_rate": 2.6412963362050604e-05, "loss": 1.8191, "step": 964 }, { "epoch": 0.4490934449093445, "grad_norm": 5.490283012390137, "learning_rate": 2.6334542243969004e-05, "loss": 1.8491, "step": 966 }, { "epoch": 0.4500232450023245, "grad_norm": 4.761651992797852, "learning_rate": 2.6256107954494232e-05, "loss": 1.7685, "step": 968 }, { "epoch": 0.4509530450953045, "grad_norm": 4.972077369689941, "learning_rate": 2.6177661267741055e-05, "loss": 1.7949, "step": 970 }, { "epoch": 0.45188284518828453, "grad_norm": 4.446878910064697, "learning_rate": 2.609920295794661e-05, "loss": 1.6999, "step": 972 }, { "epoch": 0.4528126452812645, "grad_norm": 4.849398136138916, "learning_rate": 2.6020733799462743e-05, "loss": 1.823, "step": 974 }, { "epoch": 0.4537424453742445, "grad_norm": 4.538964748382568, "learning_rate": 2.594225456674835e-05, "loss": 1.5987, "step": 976 }, { "epoch": 0.45467224546722457, "grad_norm": 4.383857250213623, "learning_rate": 2.58637660343618e-05, "loss": 1.7619, "step": 978 }, { "epoch": 0.45560204556020456, "grad_norm": 4.809948444366455, "learning_rate": 2.5785268976953196e-05, "loss": 1.6337, "step": 980 }, { "epoch": 0.45653184565318455, "grad_norm": 4.870996952056885, "learning_rate": 2.5706764169256818e-05, "loss": 1.7756, "step": 982 }, { "epoch": 0.45746164574616455, "grad_norm": 4.594924449920654, "learning_rate": 2.5628252386083427e-05, "loss": 1.6677, "step": 984 }, { "epoch": 0.4583914458391446, "grad_norm": 4.230414867401123, "learning_rate": 2.5549734402312617e-05, "loss": 1.5704, "step": 986 }, { "epoch": 0.4593212459321246, "grad_norm": 4.4205241203308105, "learning_rate": 2.5471210992885195e-05, "loss": 1.6551, "step": 988 }, { "epoch": 0.4602510460251046, "grad_norm": 4.827736854553223, "learning_rate": 2.5392682932795503e-05, "loss": 1.7202, "step": 990 }, { "epoch": 0.46118084611808463, "grad_norm": 5.001062870025635, "learning_rate": 2.5314150997083803e-05, "loss": 1.707, "step": 992 }, { "epoch": 0.4621106462110646, "grad_norm": 4.376698017120361, "learning_rate": 2.5235615960828585e-05, "loss": 1.6666, "step": 994 }, { "epoch": 0.4630404463040446, "grad_norm": 4.942276954650879, "learning_rate": 2.515707859913896e-05, "loss": 1.8613, "step": 996 }, { "epoch": 0.46397024639702467, "grad_norm": 5.080979347229004, "learning_rate": 2.5078539687146968e-05, "loss": 1.8891, "step": 998 }, { "epoch": 0.46490004649000466, "grad_norm": 4.876296520233154, "learning_rate": 2.4999999999999977e-05, "loss": 1.7821, "step": 1000 }, { "epoch": 0.46490004649000466, "eval_cer": 0.7964058513542119, "eval_loss": 1.7276298999786377, "eval_runtime": 404.6689, "eval_samples_per_second": 31.369, "eval_steps_per_second": 0.981, "step": 1000 }, { "epoch": 0.46582984658298465, "grad_norm": 5.431021213531494, "learning_rate": 2.4921460312852987e-05, "loss": 1.8837, "step": 1002 }, { "epoch": 0.46675964667596465, "grad_norm": 4.848123073577881, "learning_rate": 2.4842921400860998e-05, "loss": 1.8234, "step": 1004 }, { "epoch": 0.4676894467689447, "grad_norm": 5.170319557189941, "learning_rate": 2.476438403917137e-05, "loss": 1.9926, "step": 1006 }, { "epoch": 0.4686192468619247, "grad_norm": 4.978299140930176, "learning_rate": 2.4685849002916152e-05, "loss": 1.6704, "step": 1008 }, { "epoch": 0.4695490469549047, "grad_norm": 4.6807684898376465, "learning_rate": 2.460731706720446e-05, "loss": 1.7091, "step": 1010 }, { "epoch": 0.47047884704788473, "grad_norm": 4.622561931610107, "learning_rate": 2.452878900711476e-05, "loss": 1.6941, "step": 1012 }, { "epoch": 0.4714086471408647, "grad_norm": 4.881537437438965, "learning_rate": 2.4450265597687342e-05, "loss": 1.5862, "step": 1014 }, { "epoch": 0.4723384472338447, "grad_norm": 4.371307373046875, "learning_rate": 2.437174761391653e-05, "loss": 1.5863, "step": 1016 }, { "epoch": 0.4732682473268247, "grad_norm": 4.333101272583008, "learning_rate": 2.4293235830743134e-05, "loss": 1.6898, "step": 1018 }, { "epoch": 0.47419804741980476, "grad_norm": 4.839664936065674, "learning_rate": 2.4214731023046755e-05, "loss": 1.8409, "step": 1020 }, { "epoch": 0.47512784751278475, "grad_norm": 4.741517066955566, "learning_rate": 2.413623396563816e-05, "loss": 1.5933, "step": 1022 }, { "epoch": 0.47605764760576474, "grad_norm": 4.271093368530273, "learning_rate": 2.4057745433251594e-05, "loss": 1.7081, "step": 1024 }, { "epoch": 0.4769874476987448, "grad_norm": 5.3658342361450195, "learning_rate": 2.3979266200537208e-05, "loss": 1.804, "step": 1026 }, { "epoch": 0.4779172477917248, "grad_norm": 4.814240455627441, "learning_rate": 2.390079704205334e-05, "loss": 1.8314, "step": 1028 }, { "epoch": 0.4788470478847048, "grad_norm": 5.009090900421143, "learning_rate": 2.382233873225889e-05, "loss": 1.7605, "step": 1030 }, { "epoch": 0.4797768479776848, "grad_norm": 4.0812273025512695, "learning_rate": 2.3743892045505723e-05, "loss": 1.8228, "step": 1032 }, { "epoch": 0.4807066480706648, "grad_norm": 4.1711907386779785, "learning_rate": 2.3665457756030948e-05, "loss": 1.5317, "step": 1034 }, { "epoch": 0.4816364481636448, "grad_norm": 4.612460136413574, "learning_rate": 2.3587036637949347e-05, "loss": 1.7478, "step": 1036 }, { "epoch": 0.4825662482566248, "grad_norm": 4.5331244468688965, "learning_rate": 2.350862946524569e-05, "loss": 1.5708, "step": 1038 }, { "epoch": 0.48349604834960486, "grad_norm": 4.5432047843933105, "learning_rate": 2.3430237011767127e-05, "loss": 1.7828, "step": 1040 }, { "epoch": 0.48442584844258485, "grad_norm": 4.624579429626465, "learning_rate": 2.3351860051215508e-05, "loss": 1.7418, "step": 1042 }, { "epoch": 0.48535564853556484, "grad_norm": 4.33305025100708, "learning_rate": 2.3273499357139815e-05, "loss": 1.7159, "step": 1044 }, { "epoch": 0.48628544862854484, "grad_norm": 4.461704254150391, "learning_rate": 2.3195155702928443e-05, "loss": 1.7225, "step": 1046 }, { "epoch": 0.4872152487215249, "grad_norm": 4.18277645111084, "learning_rate": 2.3116829861801635e-05, "loss": 1.7213, "step": 1048 }, { "epoch": 0.4881450488145049, "grad_norm": 4.539891719818115, "learning_rate": 2.3038522606803832e-05, "loss": 1.5978, "step": 1050 }, { "epoch": 0.48907484890748487, "grad_norm": 4.849283218383789, "learning_rate": 2.2960234710796016e-05, "loss": 1.7513, "step": 1052 }, { "epoch": 0.4900046490004649, "grad_norm": 4.313057899475098, "learning_rate": 2.288196694644812e-05, "loss": 1.5255, "step": 1054 }, { "epoch": 0.4909344490934449, "grad_norm": 4.695862293243408, "learning_rate": 2.2803720086231377e-05, "loss": 1.7949, "step": 1056 }, { "epoch": 0.4918642491864249, "grad_norm": 4.905305862426758, "learning_rate": 2.272549490241071e-05, "loss": 1.6749, "step": 1058 }, { "epoch": 0.49279404927940496, "grad_norm": 4.747077941894531, "learning_rate": 2.2647292167037093e-05, "loss": 1.7421, "step": 1060 }, { "epoch": 0.49372384937238495, "grad_norm": 5.540545463562012, "learning_rate": 2.2569112651939972e-05, "loss": 1.6711, "step": 1062 }, { "epoch": 0.49465364946536494, "grad_norm": 4.607746124267578, "learning_rate": 2.249095712871958e-05, "loss": 1.6105, "step": 1064 }, { "epoch": 0.49558344955834494, "grad_norm": 4.593832969665527, "learning_rate": 2.241282636873939e-05, "loss": 1.7166, "step": 1066 }, { "epoch": 0.496513249651325, "grad_norm": 5.044536113739014, "learning_rate": 2.233472114311845e-05, "loss": 1.5399, "step": 1068 }, { "epoch": 0.497443049744305, "grad_norm": 4.648595809936523, "learning_rate": 2.2256642222723818e-05, "loss": 1.6037, "step": 1070 }, { "epoch": 0.49837284983728497, "grad_norm": 4.43579626083374, "learning_rate": 2.217859037816291e-05, "loss": 1.6933, "step": 1072 }, { "epoch": 0.499302649930265, "grad_norm": 4.984807968139648, "learning_rate": 2.2100566379775916e-05, "loss": 1.708, "step": 1074 }, { "epoch": 0.500232450023245, "grad_norm": 4.383101940155029, "learning_rate": 2.2022570997628212e-05, "loss": 1.6405, "step": 1076 }, { "epoch": 0.5011622501162251, "grad_norm": 4.444571018218994, "learning_rate": 2.1944605001502715e-05, "loss": 1.6103, "step": 1078 }, { "epoch": 0.502092050209205, "grad_norm": 4.517734050750732, "learning_rate": 2.186666916089235e-05, "loss": 1.6598, "step": 1080 }, { "epoch": 0.503021850302185, "grad_norm": 4.9116058349609375, "learning_rate": 2.1788764244992386e-05, "loss": 1.7348, "step": 1082 }, { "epoch": 0.503951650395165, "grad_norm": 4.8286967277526855, "learning_rate": 2.1710891022692895e-05, "loss": 1.6311, "step": 1084 }, { "epoch": 0.504881450488145, "grad_norm": 4.621704578399658, "learning_rate": 2.1633050262571145e-05, "loss": 1.6364, "step": 1086 }, { "epoch": 0.5058112505811251, "grad_norm": 4.160428047180176, "learning_rate": 2.155524273288401e-05, "loss": 1.518, "step": 1088 }, { "epoch": 0.506741050674105, "grad_norm": 4.390166282653809, "learning_rate": 2.1477469201560395e-05, "loss": 1.6832, "step": 1090 }, { "epoch": 0.5076708507670851, "grad_norm": 4.913259983062744, "learning_rate": 2.1399730436193656e-05, "loss": 1.6995, "step": 1092 }, { "epoch": 0.5086006508600651, "grad_norm": 4.839380741119385, "learning_rate": 2.1322027204034032e-05, "loss": 1.7164, "step": 1094 }, { "epoch": 0.509530450953045, "grad_norm": 4.663187503814697, "learning_rate": 2.1244360271981036e-05, "loss": 1.6107, "step": 1096 }, { "epoch": 0.5104602510460251, "grad_norm": 4.481899261474609, "learning_rate": 2.116673040657595e-05, "loss": 1.7246, "step": 1098 }, { "epoch": 0.5113900511390052, "grad_norm": 4.791706562042236, "learning_rate": 2.1089138373994193e-05, "loss": 1.6248, "step": 1100 }, { "epoch": 0.5123198512319851, "grad_norm": 4.475832939147949, "learning_rate": 2.1011584940037805e-05, "loss": 1.6507, "step": 1102 }, { "epoch": 0.5132496513249651, "grad_norm": 4.251932621002197, "learning_rate": 2.0934070870127874e-05, "loss": 1.6632, "step": 1104 }, { "epoch": 0.5141794514179452, "grad_norm": 4.642002105712891, "learning_rate": 2.0856596929296975e-05, "loss": 1.577, "step": 1106 }, { "epoch": 0.5151092515109251, "grad_norm": 4.488365173339844, "learning_rate": 2.0779163882181618e-05, "loss": 1.5689, "step": 1108 }, { "epoch": 0.5160390516039052, "grad_norm": 4.80255651473999, "learning_rate": 2.0701772493014726e-05, "loss": 1.6581, "step": 1110 }, { "epoch": 0.5169688516968852, "grad_norm": 4.444528579711914, "learning_rate": 2.062442352561807e-05, "loss": 1.6258, "step": 1112 }, { "epoch": 0.5178986517898652, "grad_norm": 4.56230354309082, "learning_rate": 2.0547117743394714e-05, "loss": 1.6989, "step": 1114 }, { "epoch": 0.5188284518828452, "grad_norm": 5.3042378425598145, "learning_rate": 2.046985590932154e-05, "loss": 1.8424, "step": 1116 }, { "epoch": 0.5197582519758251, "grad_norm": 4.86590576171875, "learning_rate": 2.039263878594164e-05, "loss": 1.6026, "step": 1118 }, { "epoch": 0.5206880520688052, "grad_norm": 5.197612285614014, "learning_rate": 2.031546713535686e-05, "loss": 1.8108, "step": 1120 }, { "epoch": 0.5216178521617852, "grad_norm": 5.125945091247559, "learning_rate": 2.023834171922023e-05, "loss": 1.6369, "step": 1122 }, { "epoch": 0.5225476522547652, "grad_norm": 4.574217319488525, "learning_rate": 2.0161263298728475e-05, "loss": 1.6758, "step": 1124 }, { "epoch": 0.5234774523477452, "grad_norm": 4.838070869445801, "learning_rate": 2.008423263461447e-05, "loss": 1.6228, "step": 1126 }, { "epoch": 0.5244072524407253, "grad_norm": 5.235374927520752, "learning_rate": 2.0007250487139802e-05, "loss": 1.5977, "step": 1128 }, { "epoch": 0.5253370525337052, "grad_norm": 4.406445026397705, "learning_rate": 1.9930317616087166e-05, "loss": 1.6181, "step": 1130 }, { "epoch": 0.5262668526266853, "grad_norm": 4.446695804595947, "learning_rate": 1.9853434780752936e-05, "loss": 1.477, "step": 1132 }, { "epoch": 0.5271966527196653, "grad_norm": 4.7650909423828125, "learning_rate": 1.977660273993968e-05, "loss": 1.6301, "step": 1134 }, { "epoch": 0.5281264528126453, "grad_norm": 4.450459003448486, "learning_rate": 1.9699822251948606e-05, "loss": 1.5216, "step": 1136 }, { "epoch": 0.5290562529056253, "grad_norm": 4.6309099197387695, "learning_rate": 1.962309407457214e-05, "loss": 1.5502, "step": 1138 }, { "epoch": 0.5299860529986054, "grad_norm": 4.897838115692139, "learning_rate": 1.954641896508641e-05, "loss": 1.6047, "step": 1140 }, { "epoch": 0.5309158530915853, "grad_norm": 4.154879570007324, "learning_rate": 1.9469797680243804e-05, "loss": 1.5258, "step": 1142 }, { "epoch": 0.5318456531845653, "grad_norm": 5.933204650878906, "learning_rate": 1.9393230976265443e-05, "loss": 1.6495, "step": 1144 }, { "epoch": 0.5327754532775453, "grad_norm": 4.8325724601745605, "learning_rate": 1.9316719608833818e-05, "loss": 1.5936, "step": 1146 }, { "epoch": 0.5337052533705253, "grad_norm": 4.448587894439697, "learning_rate": 1.9240264333085214e-05, "loss": 1.6621, "step": 1148 }, { "epoch": 0.5346350534635054, "grad_norm": 4.968630313873291, "learning_rate": 1.916386590360234e-05, "loss": 1.6792, "step": 1150 }, { "epoch": 0.5355648535564853, "grad_norm": 4.631704807281494, "learning_rate": 1.9087525074406855e-05, "loss": 1.6375, "step": 1152 }, { "epoch": 0.5364946536494654, "grad_norm": 4.869335651397705, "learning_rate": 1.901124259895193e-05, "loss": 1.6428, "step": 1154 }, { "epoch": 0.5374244537424454, "grad_norm": 4.836317539215088, "learning_rate": 1.8935019230114785e-05, "loss": 1.5731, "step": 1156 }, { "epoch": 0.5383542538354253, "grad_norm": 4.446506977081299, "learning_rate": 1.8858855720189313e-05, "loss": 1.6723, "step": 1158 }, { "epoch": 0.5392840539284054, "grad_norm": 4.740911483764648, "learning_rate": 1.8782752820878604e-05, "loss": 1.7482, "step": 1160 }, { "epoch": 0.5402138540213854, "grad_norm": 4.922398567199707, "learning_rate": 1.870671128328754e-05, "loss": 1.7105, "step": 1162 }, { "epoch": 0.5411436541143654, "grad_norm": 4.573601245880127, "learning_rate": 1.8630731857915422e-05, "loss": 1.5749, "step": 1164 }, { "epoch": 0.5420734542073454, "grad_norm": 4.312158107757568, "learning_rate": 1.8554815294648476e-05, "loss": 1.6218, "step": 1166 }, { "epoch": 0.5430032543003255, "grad_norm": 4.940700054168701, "learning_rate": 1.847896234275255e-05, "loss": 1.6116, "step": 1168 }, { "epoch": 0.5439330543933054, "grad_norm": 4.468361854553223, "learning_rate": 1.840317375086565e-05, "loss": 1.6357, "step": 1170 }, { "epoch": 0.5448628544862855, "grad_norm": 4.6152496337890625, "learning_rate": 1.832745026699059e-05, "loss": 1.5907, "step": 1172 }, { "epoch": 0.5457926545792655, "grad_norm": 4.892419338226318, "learning_rate": 1.8251792638487562e-05, "loss": 1.5442, "step": 1174 }, { "epoch": 0.5467224546722455, "grad_norm": 4.7281646728515625, "learning_rate": 1.817620161206684e-05, "loss": 1.5757, "step": 1176 }, { "epoch": 0.5476522547652255, "grad_norm": 4.6037092208862305, "learning_rate": 1.8100677933781337e-05, "loss": 1.4748, "step": 1178 }, { "epoch": 0.5485820548582054, "grad_norm": 4.472536087036133, "learning_rate": 1.802522234901924e-05, "loss": 1.4645, "step": 1180 }, { "epoch": 0.5495118549511855, "grad_norm": 5.12103271484375, "learning_rate": 1.794983560249674e-05, "loss": 1.5564, "step": 1182 }, { "epoch": 0.5504416550441655, "grad_norm": 4.855698108673096, "learning_rate": 1.787451843825057e-05, "loss": 1.4618, "step": 1184 }, { "epoch": 0.5513714551371455, "grad_norm": 4.277547359466553, "learning_rate": 1.7799271599630746e-05, "loss": 1.5185, "step": 1186 }, { "epoch": 0.5523012552301255, "grad_norm": 5.31210994720459, "learning_rate": 1.7724095829293175e-05, "loss": 1.6862, "step": 1188 }, { "epoch": 0.5532310553231056, "grad_norm": 4.83448600769043, "learning_rate": 1.7648991869192375e-05, "loss": 1.6061, "step": 1190 }, { "epoch": 0.5541608554160855, "grad_norm": 4.538383483886719, "learning_rate": 1.7573960460574096e-05, "loss": 1.5818, "step": 1192 }, { "epoch": 0.5550906555090656, "grad_norm": 4.796000957489014, "learning_rate": 1.7499002343968067e-05, "loss": 1.6228, "step": 1194 }, { "epoch": 0.5560204556020456, "grad_norm": 4.555052757263184, "learning_rate": 1.7424118259180624e-05, "loss": 1.5277, "step": 1196 }, { "epoch": 0.5569502556950255, "grad_norm": 5.21728515625, "learning_rate": 1.734930894528745e-05, "loss": 1.6449, "step": 1198 }, { "epoch": 0.5578800557880056, "grad_norm": 4.598663806915283, "learning_rate": 1.7274575140626287e-05, "loss": 1.5523, "step": 1200 }, { "epoch": 0.5588098558809856, "grad_norm": 4.807900428771973, "learning_rate": 1.71999175827896e-05, "loss": 1.65, "step": 1202 }, { "epoch": 0.5597396559739656, "grad_norm": 4.662602424621582, "learning_rate": 1.712533700861735e-05, "loss": 1.6994, "step": 1204 }, { "epoch": 0.5606694560669456, "grad_norm": 5.361908912658691, "learning_rate": 1.7050834154189692e-05, "loss": 1.7072, "step": 1206 }, { "epoch": 0.5615992561599256, "grad_norm": 4.904088020324707, "learning_rate": 1.6976409754819733e-05, "loss": 1.6624, "step": 1208 }, { "epoch": 0.5625290562529056, "grad_norm": 4.768721580505371, "learning_rate": 1.690206454504623e-05, "loss": 1.5137, "step": 1210 }, { "epoch": 0.5634588563458857, "grad_norm": 4.7972917556762695, "learning_rate": 1.6827799258626405e-05, "loss": 1.6011, "step": 1212 }, { "epoch": 0.5643886564388656, "grad_norm": 4.4553022384643555, "learning_rate": 1.6753614628528645e-05, "loss": 1.5175, "step": 1214 }, { "epoch": 0.5653184565318456, "grad_norm": 4.406479835510254, "learning_rate": 1.6679511386925303e-05, "loss": 1.4759, "step": 1216 }, { "epoch": 0.5662482566248257, "grad_norm": 4.911942958831787, "learning_rate": 1.6605490265185455e-05, "loss": 1.6111, "step": 1218 }, { "epoch": 0.5671780567178056, "grad_norm": 4.206980228424072, "learning_rate": 1.6531551993867686e-05, "loss": 1.6006, "step": 1220 }, { "epoch": 0.5681078568107857, "grad_norm": 4.298802375793457, "learning_rate": 1.6457697302712887e-05, "loss": 1.6681, "step": 1222 }, { "epoch": 0.5690376569037657, "grad_norm": 4.497662544250488, "learning_rate": 1.6383926920637046e-05, "loss": 1.5712, "step": 1224 }, { "epoch": 0.5699674569967457, "grad_norm": 4.332131385803223, "learning_rate": 1.631024157572405e-05, "loss": 1.5246, "step": 1226 }, { "epoch": 0.5708972570897257, "grad_norm": 4.2439961433410645, "learning_rate": 1.6236641995218497e-05, "loss": 1.4933, "step": 1228 }, { "epoch": 0.5718270571827058, "grad_norm": 4.836977481842041, "learning_rate": 1.6163128905518547e-05, "loss": 1.4587, "step": 1230 }, { "epoch": 0.5727568572756857, "grad_norm": 4.548823356628418, "learning_rate": 1.6089703032168706e-05, "loss": 1.4639, "step": 1232 }, { "epoch": 0.5736866573686658, "grad_norm": 4.135804176330566, "learning_rate": 1.6016365099852708e-05, "loss": 1.5829, "step": 1234 }, { "epoch": 0.5746164574616457, "grad_norm": 5.185515880584717, "learning_rate": 1.594311583238633e-05, "loss": 1.6363, "step": 1236 }, { "epoch": 0.5755462575546257, "grad_norm": 4.851355075836182, "learning_rate": 1.5869955952710278e-05, "loss": 1.5762, "step": 1238 }, { "epoch": 0.5764760576476058, "grad_norm": 4.732079029083252, "learning_rate": 1.5796886182883022e-05, "loss": 1.4652, "step": 1240 }, { "epoch": 0.5774058577405857, "grad_norm": 4.511694431304932, "learning_rate": 1.5723907244073692e-05, "loss": 1.495, "step": 1242 }, { "epoch": 0.5783356578335658, "grad_norm": 5.433631896972656, "learning_rate": 1.565101985655496e-05, "loss": 1.5547, "step": 1244 }, { "epoch": 0.5792654579265458, "grad_norm": 5.427394390106201, "learning_rate": 1.55782247396959e-05, "loss": 1.7036, "step": 1246 }, { "epoch": 0.5801952580195258, "grad_norm": 4.668501377105713, "learning_rate": 1.5505522611954947e-05, "loss": 1.5027, "step": 1248 }, { "epoch": 0.5811250581125058, "grad_norm": 4.328983306884766, "learning_rate": 1.543291419087273e-05, "loss": 1.6366, "step": 1250 }, { "epoch": 0.5820548582054859, "grad_norm": 4.134218692779541, "learning_rate": 1.536040019306506e-05, "loss": 1.6055, "step": 1252 }, { "epoch": 0.5829846582984658, "grad_norm": 4.820336818695068, "learning_rate": 1.528798133421582e-05, "loss": 1.4852, "step": 1254 }, { "epoch": 0.5839144583914458, "grad_norm": 4.58552885055542, "learning_rate": 1.5215658329069914e-05, "loss": 1.4408, "step": 1256 }, { "epoch": 0.5848442584844259, "grad_norm": 5.24392032623291, "learning_rate": 1.5143431891426191e-05, "loss": 1.5976, "step": 1258 }, { "epoch": 0.5857740585774058, "grad_norm": 4.615086078643799, "learning_rate": 1.507130273413045e-05, "loss": 1.51, "step": 1260 }, { "epoch": 0.5867038586703859, "grad_norm": 4.7751665115356445, "learning_rate": 1.4999271569068353e-05, "loss": 1.5145, "step": 1262 }, { "epoch": 0.5876336587633659, "grad_norm": 4.780058860778809, "learning_rate": 1.4927339107158401e-05, "loss": 1.4814, "step": 1264 }, { "epoch": 0.5885634588563459, "grad_norm": 4.306751251220703, "learning_rate": 1.485550605834497e-05, "loss": 1.4863, "step": 1266 }, { "epoch": 0.5894932589493259, "grad_norm": 4.448248863220215, "learning_rate": 1.4783773131591244e-05, "loss": 1.392, "step": 1268 }, { "epoch": 0.5904230590423059, "grad_norm": 5.197272777557373, "learning_rate": 1.471214103487225e-05, "loss": 1.645, "step": 1270 }, { "epoch": 0.5913528591352859, "grad_norm": 5.075355052947998, "learning_rate": 1.4640610475167868e-05, "loss": 1.669, "step": 1272 }, { "epoch": 0.592282659228266, "grad_norm": 5.091068744659424, "learning_rate": 1.4569182158455844e-05, "loss": 1.5795, "step": 1274 }, { "epoch": 0.5932124593212459, "grad_norm": 4.677691459655762, "learning_rate": 1.4497856789704814e-05, "loss": 1.6244, "step": 1276 }, { "epoch": 0.5941422594142259, "grad_norm": 4.051063537597656, "learning_rate": 1.4426635072867392e-05, "loss": 1.4453, "step": 1278 }, { "epoch": 0.595072059507206, "grad_norm": 4.821958065032959, "learning_rate": 1.4355517710873155e-05, "loss": 1.5466, "step": 1280 }, { "epoch": 0.5960018596001859, "grad_norm": 4.9236955642700195, "learning_rate": 1.4284505405621766e-05, "loss": 1.5388, "step": 1282 }, { "epoch": 0.596931659693166, "grad_norm": 4.755990028381348, "learning_rate": 1.4213598857976007e-05, "loss": 1.4851, "step": 1284 }, { "epoch": 0.597861459786146, "grad_norm": 4.209540367126465, "learning_rate": 1.4142798767754859e-05, "loss": 1.5656, "step": 1286 }, { "epoch": 0.598791259879126, "grad_norm": 4.366684436798096, "learning_rate": 1.4072105833726658e-05, "loss": 1.5027, "step": 1288 }, { "epoch": 0.599721059972106, "grad_norm": 4.372682571411133, "learning_rate": 1.4001520753602095e-05, "loss": 1.509, "step": 1290 }, { "epoch": 0.6006508600650861, "grad_norm": 4.887447357177734, "learning_rate": 1.3931044224027443e-05, "loss": 1.6143, "step": 1292 }, { "epoch": 0.601580660158066, "grad_norm": 4.5104660987854, "learning_rate": 1.3860676940577569e-05, "loss": 1.4845, "step": 1294 }, { "epoch": 0.602510460251046, "grad_norm": 4.36861515045166, "learning_rate": 1.3790419597749174e-05, "loss": 1.6456, "step": 1296 }, { "epoch": 0.603440260344026, "grad_norm": 5.539945125579834, "learning_rate": 1.3720272888953848e-05, "loss": 1.5614, "step": 1298 }, { "epoch": 0.604370060437006, "grad_norm": 4.768337726593018, "learning_rate": 1.3650237506511308e-05, "loss": 1.4683, "step": 1300 }, { "epoch": 0.6052998605299861, "grad_norm": 4.661020755767822, "learning_rate": 1.3580314141642495e-05, "loss": 1.4403, "step": 1302 }, { "epoch": 0.606229660622966, "grad_norm": 5.028249263763428, "learning_rate": 1.3510503484462782e-05, "loss": 1.4917, "step": 1304 }, { "epoch": 0.6071594607159461, "grad_norm": 5.140357494354248, "learning_rate": 1.3440806223975176e-05, "loss": 1.4967, "step": 1306 }, { "epoch": 0.6080892608089261, "grad_norm": 4.626140594482422, "learning_rate": 1.3371223048063519e-05, "loss": 1.4675, "step": 1308 }, { "epoch": 0.609019060901906, "grad_norm": 4.616276741027832, "learning_rate": 1.3301754643485647e-05, "loss": 1.5805, "step": 1310 }, { "epoch": 0.6099488609948861, "grad_norm": 5.121102333068848, "learning_rate": 1.3232401695866674e-05, "loss": 1.5419, "step": 1312 }, { "epoch": 0.6108786610878661, "grad_norm": 4.684119701385498, "learning_rate": 1.3163164889692175e-05, "loss": 1.5715, "step": 1314 }, { "epoch": 0.6118084611808461, "grad_norm": 4.637204170227051, "learning_rate": 1.3094044908301497e-05, "loss": 1.4258, "step": 1316 }, { "epoch": 0.6127382612738261, "grad_norm": 4.866714954376221, "learning_rate": 1.3025042433880955e-05, "loss": 1.4792, "step": 1318 }, { "epoch": 0.6136680613668062, "grad_norm": 5.610158443450928, "learning_rate": 1.2956158147457104e-05, "loss": 1.4585, "step": 1320 }, { "epoch": 0.6145978614597861, "grad_norm": 5.01410436630249, "learning_rate": 1.2887392728890031e-05, "loss": 1.4802, "step": 1322 }, { "epoch": 0.6155276615527662, "grad_norm": 4.581000328063965, "learning_rate": 1.2818746856866677e-05, "loss": 1.4514, "step": 1324 }, { "epoch": 0.6164574616457462, "grad_norm": 4.580539226531982, "learning_rate": 1.2750221208894072e-05, "loss": 1.4619, "step": 1326 }, { "epoch": 0.6173872617387262, "grad_norm": 4.602652549743652, "learning_rate": 1.2681816461292684e-05, "loss": 1.4505, "step": 1328 }, { "epoch": 0.6183170618317062, "grad_norm": 4.463119029998779, "learning_rate": 1.261353328918979e-05, "loss": 1.4538, "step": 1330 }, { "epoch": 0.6192468619246861, "grad_norm": 4.713504791259766, "learning_rate": 1.2545372366512712e-05, "loss": 1.4124, "step": 1332 }, { "epoch": 0.6201766620176662, "grad_norm": 5.088986396789551, "learning_rate": 1.2477334365982241e-05, "loss": 1.5549, "step": 1334 }, { "epoch": 0.6211064621106462, "grad_norm": 4.683056831359863, "learning_rate": 1.2409419959105962e-05, "loss": 1.4386, "step": 1336 }, { "epoch": 0.6220362622036262, "grad_norm": 4.354305267333984, "learning_rate": 1.2341629816171674e-05, "loss": 1.5799, "step": 1338 }, { "epoch": 0.6229660622966062, "grad_norm": 5.031248092651367, "learning_rate": 1.2273964606240699e-05, "loss": 1.5375, "step": 1340 }, { "epoch": 0.6238958623895863, "grad_norm": 4.514839172363281, "learning_rate": 1.2206424997141361e-05, "loss": 1.4592, "step": 1342 }, { "epoch": 0.6248256624825662, "grad_norm": 4.614261627197266, "learning_rate": 1.2139011655462318e-05, "loss": 1.6582, "step": 1344 }, { "epoch": 0.6257554625755463, "grad_norm": 5.420262336730957, "learning_rate": 1.2071725246546054e-05, "loss": 1.6369, "step": 1346 }, { "epoch": 0.6266852626685263, "grad_norm": 4.559269428253174, "learning_rate": 1.2004566434482253e-05, "loss": 1.5157, "step": 1348 }, { "epoch": 0.6276150627615062, "grad_norm": 4.5897955894470215, "learning_rate": 1.1937535882101264e-05, "loss": 1.4461, "step": 1350 }, { "epoch": 0.6285448628544863, "grad_norm": 5.626828193664551, "learning_rate": 1.187063425096759e-05, "loss": 1.624, "step": 1352 }, { "epoch": 0.6294746629474663, "grad_norm": 4.819149971008301, "learning_rate": 1.1803862201373327e-05, "loss": 1.4993, "step": 1354 }, { "epoch": 0.6304044630404463, "grad_norm": 4.999179840087891, "learning_rate": 1.173722039233164e-05, "loss": 1.5006, "step": 1356 }, { "epoch": 0.6313342631334263, "grad_norm": 4.634060382843018, "learning_rate": 1.167070948157027e-05, "loss": 1.6868, "step": 1358 }, { "epoch": 0.6322640632264063, "grad_norm": 4.900086402893066, "learning_rate": 1.1604330125525076e-05, "loss": 1.5231, "step": 1360 }, { "epoch": 0.6331938633193863, "grad_norm": 4.538697719573975, "learning_rate": 1.1538082979333491e-05, "loss": 1.3683, "step": 1362 }, { "epoch": 0.6341236634123664, "grad_norm": 5.081866264343262, "learning_rate": 1.1471968696828094e-05, "loss": 1.4314, "step": 1364 }, { "epoch": 0.6350534635053463, "grad_norm": 4.619677543640137, "learning_rate": 1.1405987930530174e-05, "loss": 1.3661, "step": 1366 }, { "epoch": 0.6359832635983264, "grad_norm": 4.841922760009766, "learning_rate": 1.1340141331643264e-05, "loss": 1.4162, "step": 1368 }, { "epoch": 0.6369130636913064, "grad_norm": 4.799725532531738, "learning_rate": 1.12744295500467e-05, "loss": 1.5387, "step": 1370 }, { "epoch": 0.6378428637842863, "grad_norm": 5.870131492614746, "learning_rate": 1.1208853234289235e-05, "loss": 1.6535, "step": 1372 }, { "epoch": 0.6387726638772664, "grad_norm": 4.434726238250732, "learning_rate": 1.1143413031582643e-05, "loss": 1.4708, "step": 1374 }, { "epoch": 0.6397024639702464, "grad_norm": 4.301927089691162, "learning_rate": 1.107810958779529e-05, "loss": 1.4545, "step": 1376 }, { "epoch": 0.6406322640632264, "grad_norm": 4.830667972564697, "learning_rate": 1.1012943547445819e-05, "loss": 1.5125, "step": 1378 }, { "epoch": 0.6415620641562064, "grad_norm": 5.070117473602295, "learning_rate": 1.0947915553696723e-05, "loss": 1.486, "step": 1380 }, { "epoch": 0.6424918642491865, "grad_norm": 4.656167984008789, "learning_rate": 1.0883026248348068e-05, "loss": 1.4847, "step": 1382 }, { "epoch": 0.6434216643421664, "grad_norm": 4.717787265777588, "learning_rate": 1.0818276271831083e-05, "loss": 1.6142, "step": 1384 }, { "epoch": 0.6443514644351465, "grad_norm": 4.161818981170654, "learning_rate": 1.0753666263201907e-05, "loss": 1.5519, "step": 1386 }, { "epoch": 0.6452812645281265, "grad_norm": 4.8757004737854, "learning_rate": 1.0689196860135215e-05, "loss": 1.5478, "step": 1388 }, { "epoch": 0.6462110646211064, "grad_norm": 5.74949312210083, "learning_rate": 1.0624868698918025e-05, "loss": 1.4742, "step": 1390 }, { "epoch": 0.6471408647140865, "grad_norm": 4.586982727050781, "learning_rate": 1.0560682414443303e-05, "loss": 1.4265, "step": 1392 }, { "epoch": 0.6480706648070664, "grad_norm": 4.4625630378723145, "learning_rate": 1.0496638640203752e-05, "loss": 1.3965, "step": 1394 }, { "epoch": 0.6490004649000465, "grad_norm": 5.521698951721191, "learning_rate": 1.0432738008285588e-05, "loss": 1.6036, "step": 1396 }, { "epoch": 0.6499302649930265, "grad_norm": 4.666378021240234, "learning_rate": 1.036898114936224e-05, "loss": 1.4592, "step": 1398 }, { "epoch": 0.6508600650860065, "grad_norm": 4.393011569976807, "learning_rate": 1.030536869268816e-05, "loss": 1.5611, "step": 1400 }, { "epoch": 0.6517898651789865, "grad_norm": 4.400356769561768, "learning_rate": 1.0241901266092617e-05, "loss": 1.4127, "step": 1402 }, { "epoch": 0.6527196652719666, "grad_norm": 4.56599760055542, "learning_rate": 1.0178579495973504e-05, "loss": 1.5107, "step": 1404 }, { "epoch": 0.6536494653649465, "grad_norm": 5.171332836151123, "learning_rate": 1.0115404007291116e-05, "loss": 1.5374, "step": 1406 }, { "epoch": 0.6545792654579266, "grad_norm": 4.816254615783691, "learning_rate": 1.005237542356202e-05, "loss": 1.4549, "step": 1408 }, { "epoch": 0.6555090655509066, "grad_norm": 5.073785781860352, "learning_rate": 9.989494366852887e-06, "loss": 1.4637, "step": 1410 }, { "epoch": 0.6564388656438865, "grad_norm": 4.863129615783691, "learning_rate": 9.926761457774365e-06, "loss": 1.4323, "step": 1412 }, { "epoch": 0.6573686657368666, "grad_norm": 5.309310436248779, "learning_rate": 9.864177315474953e-06, "loss": 1.5346, "step": 1414 }, { "epoch": 0.6582984658298466, "grad_norm": 4.598511695861816, "learning_rate": 9.80174255763485e-06, "loss": 1.3215, "step": 1416 }, { "epoch": 0.6592282659228266, "grad_norm": 4.709428310394287, "learning_rate": 9.739457800459926e-06, "loss": 1.3169, "step": 1418 }, { "epoch": 0.6601580660158066, "grad_norm": 5.323406219482422, "learning_rate": 9.67732365867558e-06, "loss": 1.4297, "step": 1420 }, { "epoch": 0.6610878661087866, "grad_norm": 4.517735481262207, "learning_rate": 9.615340745520711e-06, "loss": 1.4606, "step": 1422 }, { "epoch": 0.6620176662017666, "grad_norm": 4.762702465057373, "learning_rate": 9.553509672741624e-06, "loss": 1.4317, "step": 1424 }, { "epoch": 0.6629474662947467, "grad_norm": 4.522773742675781, "learning_rate": 9.491831050586088e-06, "loss": 1.3675, "step": 1426 }, { "epoch": 0.6638772663877266, "grad_norm": 4.530869007110596, "learning_rate": 9.43030548779718e-06, "loss": 1.4651, "step": 1428 }, { "epoch": 0.6648070664807066, "grad_norm": 4.852272033691406, "learning_rate": 9.368933591607358e-06, "loss": 1.5527, "step": 1430 }, { "epoch": 0.6657368665736867, "grad_norm": 5.136005878448486, "learning_rate": 9.30771596773248e-06, "loss": 1.4598, "step": 1432 }, { "epoch": 0.6666666666666666, "grad_norm": 4.766890525817871, "learning_rate": 9.246653220365766e-06, "loss": 1.4398, "step": 1434 }, { "epoch": 0.6675964667596467, "grad_norm": 5.467390060424805, "learning_rate": 9.185745952171878e-06, "loss": 1.5098, "step": 1436 }, { "epoch": 0.6685262668526267, "grad_norm": 5.114396095275879, "learning_rate": 9.12499476428097e-06, "loss": 1.5963, "step": 1438 }, { "epoch": 0.6694560669456067, "grad_norm": 5.343481540679932, "learning_rate": 9.064400256282745e-06, "loss": 1.5652, "step": 1440 }, { "epoch": 0.6703858670385867, "grad_norm": 4.492910861968994, "learning_rate": 9.003963026220531e-06, "loss": 1.4518, "step": 1442 }, { "epoch": 0.6713156671315668, "grad_norm": 4.401782035827637, "learning_rate": 8.943683670585385e-06, "loss": 1.4039, "step": 1444 }, { "epoch": 0.6722454672245467, "grad_norm": 5.382906436920166, "learning_rate": 8.883562784310207e-06, "loss": 1.4851, "step": 1446 }, { "epoch": 0.6731752673175267, "grad_norm": 4.8786773681640625, "learning_rate": 8.823600960763876e-06, "loss": 1.3857, "step": 1448 }, { "epoch": 0.6741050674105067, "grad_norm": 5.27847146987915, "learning_rate": 8.763798791745398e-06, "loss": 1.3758, "step": 1450 }, { "epoch": 0.6750348675034867, "grad_norm": 5.5659050941467285, "learning_rate": 8.704156867478014e-06, "loss": 1.6226, "step": 1452 }, { "epoch": 0.6759646675964668, "grad_norm": 4.823101043701172, "learning_rate": 8.644675776603462e-06, "loss": 1.4909, "step": 1454 }, { "epoch": 0.6768944676894467, "grad_norm": 5.192868232727051, "learning_rate": 8.58535610617608e-06, "loss": 1.5019, "step": 1456 }, { "epoch": 0.6778242677824268, "grad_norm": 4.590816497802734, "learning_rate": 8.526198441657073e-06, "loss": 1.4197, "step": 1458 }, { "epoch": 0.6787540678754068, "grad_norm": 4.362811088562012, "learning_rate": 8.467203366908687e-06, "loss": 1.3262, "step": 1460 }, { "epoch": 0.6796838679683868, "grad_norm": 4.706368446350098, "learning_rate": 8.408371464188522e-06, "loss": 1.3786, "step": 1462 }, { "epoch": 0.6806136680613668, "grad_norm": 4.622016906738281, "learning_rate": 8.349703314143697e-06, "loss": 1.4895, "step": 1464 }, { "epoch": 0.6815434681543469, "grad_norm": 4.7902374267578125, "learning_rate": 8.291199495805166e-06, "loss": 1.4369, "step": 1466 }, { "epoch": 0.6824732682473268, "grad_norm": 4.792653560638428, "learning_rate": 8.232860586581986e-06, "loss": 1.4319, "step": 1468 }, { "epoch": 0.6834030683403068, "grad_norm": 4.963321685791016, "learning_rate": 8.174687162255659e-06, "loss": 1.346, "step": 1470 }, { "epoch": 0.6843328684332869, "grad_norm": 5.121315956115723, "learning_rate": 8.116679796974375e-06, "loss": 1.5358, "step": 1472 }, { "epoch": 0.6852626685262668, "grad_norm": 4.567327976226807, "learning_rate": 8.058839063247427e-06, "loss": 1.526, "step": 1474 }, { "epoch": 0.6861924686192469, "grad_norm": 4.648831367492676, "learning_rate": 8.001165531939506e-06, "loss": 1.3497, "step": 1476 }, { "epoch": 0.6871222687122269, "grad_norm": 4.862013339996338, "learning_rate": 7.943659772265077e-06, "loss": 1.4819, "step": 1478 }, { "epoch": 0.6880520688052069, "grad_norm": 4.534616470336914, "learning_rate": 7.886322351782776e-06, "loss": 1.3764, "step": 1480 }, { "epoch": 0.6889818688981869, "grad_norm": 4.6155242919921875, "learning_rate": 7.829153836389783e-06, "loss": 1.4483, "step": 1482 }, { "epoch": 0.6899116689911668, "grad_norm": 5.174537658691406, "learning_rate": 7.772154790316272e-06, "loss": 1.4551, "step": 1484 }, { "epoch": 0.6908414690841469, "grad_norm": 4.733208179473877, "learning_rate": 7.715325776119829e-06, "loss": 1.3343, "step": 1486 }, { "epoch": 0.691771269177127, "grad_norm": 4.483691215515137, "learning_rate": 7.658667354679866e-06, "loss": 1.3043, "step": 1488 }, { "epoch": 0.6927010692701069, "grad_norm": 5.074970245361328, "learning_rate": 7.6021800851921286e-06, "loss": 1.478, "step": 1490 }, { "epoch": 0.6936308693630869, "grad_norm": 4.445642948150635, "learning_rate": 7.545864525163173e-06, "loss": 1.4633, "step": 1492 }, { "epoch": 0.694560669456067, "grad_norm": 4.862878799438477, "learning_rate": 7.489721230404832e-06, "loss": 1.4351, "step": 1494 }, { "epoch": 0.6954904695490469, "grad_norm": 4.790167808532715, "learning_rate": 7.43375075502875e-06, "loss": 1.3882, "step": 1496 }, { "epoch": 0.696420269642027, "grad_norm": 4.855898380279541, "learning_rate": 7.377953651440949e-06, "loss": 1.5054, "step": 1498 }, { "epoch": 0.697350069735007, "grad_norm": 4.540627956390381, "learning_rate": 7.322330470336299e-06, "loss": 1.246, "step": 1500 }, { "epoch": 0.698279869827987, "grad_norm": 5.00856876373291, "learning_rate": 7.266881760693145e-06, "loss": 1.385, "step": 1502 }, { "epoch": 0.699209669920967, "grad_norm": 4.644662380218506, "learning_rate": 7.211608069767853e-06, "loss": 1.3807, "step": 1504 }, { "epoch": 0.700139470013947, "grad_norm": 5.097452163696289, "learning_rate": 7.156509943089458e-06, "loss": 1.4493, "step": 1506 }, { "epoch": 0.701069270106927, "grad_norm": 4.613853454589844, "learning_rate": 7.101587924454208e-06, "loss": 1.3535, "step": 1508 }, { "epoch": 0.701999070199907, "grad_norm": 5.250851154327393, "learning_rate": 7.04684255592027e-06, "loss": 1.3688, "step": 1510 }, { "epoch": 0.702928870292887, "grad_norm": 4.928889751434326, "learning_rate": 6.992274377802315e-06, "loss": 1.435, "step": 1512 }, { "epoch": 0.703858670385867, "grad_norm": 4.715306282043457, "learning_rate": 6.937883928666243e-06, "loss": 1.3482, "step": 1514 }, { "epoch": 0.7047884704788471, "grad_norm": 4.922756195068359, "learning_rate": 6.883671745323827e-06, "loss": 1.368, "step": 1516 }, { "epoch": 0.705718270571827, "grad_norm": 5.271986961364746, "learning_rate": 6.829638362827419e-06, "loss": 1.4472, "step": 1518 }, { "epoch": 0.7066480706648071, "grad_norm": 4.809653282165527, "learning_rate": 6.775784314464697e-06, "loss": 1.3973, "step": 1520 }, { "epoch": 0.7075778707577871, "grad_norm": 4.964958190917969, "learning_rate": 6.722110131753385e-06, "loss": 1.3821, "step": 1522 }, { "epoch": 0.708507670850767, "grad_norm": 4.668941974639893, "learning_rate": 6.6686163444359916e-06, "loss": 1.3214, "step": 1524 }, { "epoch": 0.7094374709437471, "grad_norm": 5.29959774017334, "learning_rate": 6.61530348047459e-06, "loss": 1.4072, "step": 1526 }, { "epoch": 0.7103672710367271, "grad_norm": 4.7238922119140625, "learning_rate": 6.562172066045642e-06, "loss": 1.2894, "step": 1528 }, { "epoch": 0.7112970711297071, "grad_norm": 4.780757904052734, "learning_rate": 6.509222625534751e-06, "loss": 1.4396, "step": 1530 }, { "epoch": 0.7122268712226871, "grad_norm": 4.612026691436768, "learning_rate": 6.45645568153151e-06, "loss": 1.3003, "step": 1532 }, { "epoch": 0.7131566713156672, "grad_norm": 4.611727237701416, "learning_rate": 6.40387175482436e-06, "loss": 1.3883, "step": 1534 }, { "epoch": 0.7140864714086471, "grad_norm": 5.190316677093506, "learning_rate": 6.351471364395435e-06, "loss": 1.3978, "step": 1536 }, { "epoch": 0.7150162715016272, "grad_norm": 4.321396827697754, "learning_rate": 6.299255027415431e-06, "loss": 1.2811, "step": 1538 }, { "epoch": 0.7159460715946072, "grad_norm": 4.881733417510986, "learning_rate": 6.247223259238498e-06, "loss": 1.5083, "step": 1540 }, { "epoch": 0.7168758716875872, "grad_norm": 4.490734577178955, "learning_rate": 6.195376573397206e-06, "loss": 1.3559, "step": 1542 }, { "epoch": 0.7178056717805672, "grad_norm": 4.647560119628906, "learning_rate": 6.143715481597392e-06, "loss": 1.3833, "step": 1544 }, { "epoch": 0.7187354718735471, "grad_norm": 4.564016342163086, "learning_rate": 6.092240493713193e-06, "loss": 1.4187, "step": 1546 }, { "epoch": 0.7196652719665272, "grad_norm": 5.203334808349609, "learning_rate": 6.040952117781941e-06, "loss": 1.4693, "step": 1548 }, { "epoch": 0.7205950720595072, "grad_norm": 5.648266315460205, "learning_rate": 5.989850859999215e-06, "loss": 1.547, "step": 1550 }, { "epoch": 0.7215248721524872, "grad_norm": 4.59027624130249, "learning_rate": 5.938937224713797e-06, "loss": 1.3978, "step": 1552 }, { "epoch": 0.7224546722454672, "grad_norm": 4.740734577178955, "learning_rate": 5.8882117144227064e-06, "loss": 1.4945, "step": 1554 }, { "epoch": 0.7233844723384473, "grad_norm": 4.8459882736206055, "learning_rate": 5.8376748297662405e-06, "loss": 1.446, "step": 1556 }, { "epoch": 0.7243142724314272, "grad_norm": 4.942417621612549, "learning_rate": 5.787327069523074e-06, "loss": 1.3858, "step": 1558 }, { "epoch": 0.7252440725244073, "grad_norm": 4.585515975952148, "learning_rate": 5.7371689306052606e-06, "loss": 1.4456, "step": 1560 }, { "epoch": 0.7261738726173873, "grad_norm": 4.623073101043701, "learning_rate": 5.687200908053377e-06, "loss": 1.2795, "step": 1562 }, { "epoch": 0.7271036727103672, "grad_norm": 4.330341339111328, "learning_rate": 5.637423495031646e-06, "loss": 1.2714, "step": 1564 }, { "epoch": 0.7280334728033473, "grad_norm": 4.800925254821777, "learning_rate": 5.587837182823029e-06, "loss": 1.3816, "step": 1566 }, { "epoch": 0.7289632728963273, "grad_norm": 5.375191688537598, "learning_rate": 5.538442460824405e-06, "loss": 1.3719, "step": 1568 }, { "epoch": 0.7298930729893073, "grad_norm": 4.414741516113281, "learning_rate": 5.489239816541743e-06, "loss": 1.4283, "step": 1570 }, { "epoch": 0.7308228730822873, "grad_norm": 4.871700286865234, "learning_rate": 5.440229735585285e-06, "loss": 1.3049, "step": 1572 }, { "epoch": 0.7317526731752673, "grad_norm": 5.470729827880859, "learning_rate": 5.3914127016647396e-06, "loss": 1.5045, "step": 1574 }, { "epoch": 0.7326824732682473, "grad_norm": 4.6990132331848145, "learning_rate": 5.342789196584523e-06, "loss": 1.4339, "step": 1576 }, { "epoch": 0.7336122733612274, "grad_norm": 4.320725440979004, "learning_rate": 5.29435970023899e-06, "loss": 1.3879, "step": 1578 }, { "epoch": 0.7345420734542073, "grad_norm": 4.5506486892700195, "learning_rate": 5.246124690607728e-06, "loss": 1.424, "step": 1580 }, { "epoch": 0.7354718735471873, "grad_norm": 4.720996379852295, "learning_rate": 5.198084643750814e-06, "loss": 1.2781, "step": 1582 }, { "epoch": 0.7364016736401674, "grad_norm": 4.952864646911621, "learning_rate": 5.1502400338041046e-06, "loss": 1.3866, "step": 1584 }, { "epoch": 0.7373314737331473, "grad_norm": 5.144688129425049, "learning_rate": 5.102591332974593e-06, "loss": 1.5678, "step": 1586 }, { "epoch": 0.7382612738261274, "grad_norm": 4.915472984313965, "learning_rate": 5.055139011535717e-06, "loss": 1.3396, "step": 1588 }, { "epoch": 0.7391910739191074, "grad_norm": 4.6434431076049805, "learning_rate": 5.007883537822733e-06, "loss": 1.3349, "step": 1590 }, { "epoch": 0.7401208740120874, "grad_norm": 4.523582458496094, "learning_rate": 4.960825378228071e-06, "loss": 1.2863, "step": 1592 }, { "epoch": 0.7410506741050674, "grad_norm": 4.149693965911865, "learning_rate": 4.9139649971967995e-06, "loss": 1.3528, "step": 1594 }, { "epoch": 0.7419804741980475, "grad_norm": 4.928431510925293, "learning_rate": 4.867302857221943e-06, "loss": 1.3616, "step": 1596 }, { "epoch": 0.7429102742910274, "grad_norm": 4.783944129943848, "learning_rate": 4.820839418839988e-06, "loss": 1.2603, "step": 1598 }, { "epoch": 0.7438400743840075, "grad_norm": 4.721505165100098, "learning_rate": 4.774575140626307e-06, "loss": 1.3321, "step": 1600 }, { "epoch": 0.7447698744769874, "grad_norm": 4.513464450836182, "learning_rate": 4.728510479190659e-06, "loss": 1.2893, "step": 1602 }, { "epoch": 0.7456996745699674, "grad_norm": 5.0540995597839355, "learning_rate": 4.682645889172643e-06, "loss": 1.4843, "step": 1604 }, { "epoch": 0.7466294746629475, "grad_norm": 5.0999908447265625, "learning_rate": 4.636981823237254e-06, "loss": 1.3634, "step": 1606 }, { "epoch": 0.7475592747559274, "grad_norm": 4.81461238861084, "learning_rate": 4.591518732070393e-06, "loss": 1.4027, "step": 1608 }, { "epoch": 0.7484890748489075, "grad_norm": 4.60006046295166, "learning_rate": 4.54625706437441e-06, "loss": 1.409, "step": 1610 }, { "epoch": 0.7494188749418875, "grad_norm": 4.906893253326416, "learning_rate": 4.501197266863688e-06, "loss": 1.3583, "step": 1612 }, { "epoch": 0.7503486750348675, "grad_norm": 4.647506237030029, "learning_rate": 4.4563397842602385e-06, "loss": 1.4051, "step": 1614 }, { "epoch": 0.7512784751278475, "grad_norm": 5.091315269470215, "learning_rate": 4.4116850592893055e-06, "loss": 1.4682, "step": 1616 }, { "epoch": 0.7522082752208276, "grad_norm": 4.811418533325195, "learning_rate": 4.367233532675003e-06, "loss": 1.3141, "step": 1618 }, { "epoch": 0.7531380753138075, "grad_norm": 4.459003448486328, "learning_rate": 4.3229856431359484e-06, "loss": 1.2632, "step": 1620 }, { "epoch": 0.7540678754067875, "grad_norm": 4.703991889953613, "learning_rate": 4.278941827380944e-06, "loss": 1.4434, "step": 1622 }, { "epoch": 0.7549976754997676, "grad_norm": 4.644518852233887, "learning_rate": 4.235102520104677e-06, "loss": 1.3543, "step": 1624 }, { "epoch": 0.7559274755927475, "grad_norm": 4.769321918487549, "learning_rate": 4.19146815398341e-06, "loss": 1.2012, "step": 1626 }, { "epoch": 0.7568572756857276, "grad_norm": 4.772981643676758, "learning_rate": 4.148039159670708e-06, "loss": 1.2931, "step": 1628 }, { "epoch": 0.7577870757787076, "grad_norm": 4.802246570587158, "learning_rate": 4.104815965793235e-06, "loss": 1.376, "step": 1630 }, { "epoch": 0.7587168758716876, "grad_norm": 4.832859516143799, "learning_rate": 4.06179899894645e-06, "loss": 1.4179, "step": 1632 }, { "epoch": 0.7596466759646676, "grad_norm": 5.499051094055176, "learning_rate": 4.018988683690452e-06, "loss": 1.5073, "step": 1634 }, { "epoch": 0.7605764760576476, "grad_norm": 4.793254852294922, "learning_rate": 3.9763854425457654e-06, "loss": 1.392, "step": 1636 }, { "epoch": 0.7615062761506276, "grad_norm": 4.470448017120361, "learning_rate": 3.93398969598919e-06, "loss": 1.3287, "step": 1638 }, { "epoch": 0.7624360762436077, "grad_norm": 5.334385395050049, "learning_rate": 3.891801862449614e-06, "loss": 1.4588, "step": 1640 }, { "epoch": 0.7633658763365876, "grad_norm": 4.8712873458862305, "learning_rate": 3.849822358303939e-06, "loss": 1.4209, "step": 1642 }, { "epoch": 0.7642956764295676, "grad_norm": 5.272886276245117, "learning_rate": 3.8080515978729107e-06, "loss": 1.459, "step": 1644 }, { "epoch": 0.7652254765225477, "grad_norm": 4.924405097961426, "learning_rate": 3.7664899934170797e-06, "loss": 1.2457, "step": 1646 }, { "epoch": 0.7661552766155276, "grad_norm": 4.533785343170166, "learning_rate": 3.725137955132699e-06, "loss": 1.4606, "step": 1648 }, { "epoch": 0.7670850767085077, "grad_norm": 4.505516052246094, "learning_rate": 3.6839958911476876e-06, "loss": 1.3464, "step": 1650 }, { "epoch": 0.7680148768014877, "grad_norm": 4.526447772979736, "learning_rate": 3.643064207517611e-06, "loss": 1.2343, "step": 1652 }, { "epoch": 0.7689446768944677, "grad_norm": 5.066768646240234, "learning_rate": 3.602343308221668e-06, "loss": 1.305, "step": 1654 }, { "epoch": 0.7698744769874477, "grad_norm": 4.423480987548828, "learning_rate": 3.5618335951586903e-06, "loss": 1.4707, "step": 1656 }, { "epoch": 0.7708042770804278, "grad_norm": 5.181069374084473, "learning_rate": 3.521535468143182e-06, "loss": 1.3157, "step": 1658 }, { "epoch": 0.7717340771734077, "grad_norm": 5.266273498535156, "learning_rate": 3.481449324901405e-06, "loss": 1.4673, "step": 1660 }, { "epoch": 0.7726638772663877, "grad_norm": 5.03460693359375, "learning_rate": 3.4415755610673992e-06, "loss": 1.3968, "step": 1662 }, { "epoch": 0.7735936773593677, "grad_norm": 4.917730808258057, "learning_rate": 3.4019145701791057e-06, "loss": 1.367, "step": 1664 }, { "epoch": 0.7745234774523477, "grad_norm": 4.970690727233887, "learning_rate": 3.362466743674518e-06, "loss": 1.3303, "step": 1666 }, { "epoch": 0.7754532775453278, "grad_norm": 5.337306022644043, "learning_rate": 3.3232324708877427e-06, "loss": 1.4554, "step": 1668 }, { "epoch": 0.7763830776383077, "grad_norm": 4.317877292633057, "learning_rate": 3.2842121390452162e-06, "loss": 1.3544, "step": 1670 }, { "epoch": 0.7773128777312878, "grad_norm": 4.939489364624023, "learning_rate": 3.2454061332618513e-06, "loss": 1.304, "step": 1672 }, { "epoch": 0.7782426778242678, "grad_norm": 5.024317264556885, "learning_rate": 3.2068148365372742e-06, "loss": 1.3361, "step": 1674 }, { "epoch": 0.7791724779172478, "grad_norm": 4.807384014129639, "learning_rate": 3.1684386297519894e-06, "loss": 1.3934, "step": 1676 }, { "epoch": 0.7801022780102278, "grad_norm": 4.339854717254639, "learning_rate": 3.130277891663676e-06, "loss": 1.3155, "step": 1678 }, { "epoch": 0.7810320781032078, "grad_norm": 4.545622825622559, "learning_rate": 3.092332998903404e-06, "loss": 1.2764, "step": 1680 }, { "epoch": 0.7819618781961878, "grad_norm": 5.285658359527588, "learning_rate": 3.0546043259719536e-06, "loss": 1.3491, "step": 1682 }, { "epoch": 0.7828916782891678, "grad_norm": 4.678071975708008, "learning_rate": 3.017092245236091e-06, "loss": 1.3321, "step": 1684 }, { "epoch": 0.7838214783821479, "grad_norm": 4.892689228057861, "learning_rate": 2.979797126924904e-06, "loss": 1.296, "step": 1686 }, { "epoch": 0.7847512784751278, "grad_norm": 4.846113204956055, "learning_rate": 2.9427193391261585e-06, "loss": 1.3693, "step": 1688 }, { "epoch": 0.7856810785681079, "grad_norm": 5.418665885925293, "learning_rate": 2.905859247782657e-06, "loss": 1.4288, "step": 1690 }, { "epoch": 0.7866108786610879, "grad_norm": 4.893185138702393, "learning_rate": 2.869217216688615e-06, "loss": 1.3629, "step": 1692 }, { "epoch": 0.7875406787540679, "grad_norm": 5.181827068328857, "learning_rate": 2.8327936074860804e-06, "loss": 1.4014, "step": 1694 }, { "epoch": 0.7884704788470479, "grad_norm": 5.174293041229248, "learning_rate": 2.796588779661382e-06, "loss": 1.354, "step": 1696 }, { "epoch": 0.7894002789400278, "grad_norm": 4.71166467666626, "learning_rate": 2.760603090541549e-06, "loss": 1.5135, "step": 1698 }, { "epoch": 0.7903300790330079, "grad_norm": 5.304102897644043, "learning_rate": 2.7248368952907994e-06, "loss": 1.4693, "step": 1700 }, { "epoch": 0.7912598791259879, "grad_norm": 4.581517219543457, "learning_rate": 2.689290546907044e-06, "loss": 1.4912, "step": 1702 }, { "epoch": 0.7921896792189679, "grad_norm": 4.6414384841918945, "learning_rate": 2.6539643962183998e-06, "loss": 1.4776, "step": 1704 }, { "epoch": 0.7931194793119479, "grad_norm": 4.39076042175293, "learning_rate": 2.618858791879705e-06, "loss": 1.2701, "step": 1706 }, { "epoch": 0.794049279404928, "grad_norm": 4.355413913726807, "learning_rate": 2.5839740803690973e-06, "loss": 1.4065, "step": 1708 }, { "epoch": 0.7949790794979079, "grad_norm": 4.14140510559082, "learning_rate": 2.549310605984606e-06, "loss": 1.3333, "step": 1710 }, { "epoch": 0.795908879590888, "grad_norm": 5.52841854095459, "learning_rate": 2.5148687108407175e-06, "loss": 1.3966, "step": 1712 }, { "epoch": 0.796838679683868, "grad_norm": 5.082501411437988, "learning_rate": 2.480648734865043e-06, "loss": 1.2828, "step": 1714 }, { "epoch": 0.797768479776848, "grad_norm": 4.7992963790893555, "learning_rate": 2.446651015794923e-06, "loss": 1.4221, "step": 1716 }, { "epoch": 0.798698279869828, "grad_norm": 4.5345306396484375, "learning_rate": 2.4128758891741236e-06, "loss": 1.2781, "step": 1718 }, { "epoch": 0.799628079962808, "grad_norm": 4.555692195892334, "learning_rate": 2.379323688349511e-06, "loss": 1.2787, "step": 1720 }, { "epoch": 0.800557880055788, "grad_norm": 4.980441093444824, "learning_rate": 2.3459947444677533e-06, "loss": 1.4098, "step": 1722 }, { "epoch": 0.801487680148768, "grad_norm": 4.548128128051758, "learning_rate": 2.3128893864720703e-06, "loss": 1.3454, "step": 1724 }, { "epoch": 0.802417480241748, "grad_norm": 4.423843860626221, "learning_rate": 2.280007941098992e-06, "loss": 1.2473, "step": 1726 }, { "epoch": 0.803347280334728, "grad_norm": 4.79723596572876, "learning_rate": 2.2473507328751043e-06, "loss": 1.3978, "step": 1728 }, { "epoch": 0.8042770804277081, "grad_norm": 4.061401844024658, "learning_rate": 2.214918084113864e-06, "loss": 1.3155, "step": 1730 }, { "epoch": 0.805206880520688, "grad_norm": 5.578299045562744, "learning_rate": 2.1827103149124275e-06, "loss": 1.4024, "step": 1732 }, { "epoch": 0.806136680613668, "grad_norm": 4.972945690155029, "learning_rate": 2.150727743148469e-06, "loss": 1.4176, "step": 1734 }, { "epoch": 0.8070664807066481, "grad_norm": 4.645524024963379, "learning_rate": 2.1189706844770578e-06, "loss": 1.445, "step": 1736 }, { "epoch": 0.807996280799628, "grad_norm": 4.740366458892822, "learning_rate": 2.087439452327546e-06, "loss": 1.2666, "step": 1738 }, { "epoch": 0.8089260808926081, "grad_norm": 4.68005895614624, "learning_rate": 2.0561343579004677e-06, "loss": 1.3503, "step": 1740 }, { "epoch": 0.8098558809855881, "grad_norm": 4.873269081115723, "learning_rate": 2.025055710164466e-06, "loss": 1.3806, "step": 1742 }, { "epoch": 0.8107856810785681, "grad_norm": 5.03128719329834, "learning_rate": 1.9942038158532424e-06, "loss": 1.3838, "step": 1744 }, { "epoch": 0.8117154811715481, "grad_norm": 4.752600193023682, "learning_rate": 1.963578979462537e-06, "loss": 1.4468, "step": 1746 }, { "epoch": 0.8126452812645282, "grad_norm": 5.032496929168701, "learning_rate": 1.933181503247124e-06, "loss": 1.4033, "step": 1748 }, { "epoch": 0.8135750813575081, "grad_norm": 4.906529426574707, "learning_rate": 1.9030116872178276e-06, "loss": 1.3372, "step": 1750 }, { "epoch": 0.8145048814504882, "grad_norm": 4.8509745597839355, "learning_rate": 1.8730698291385424e-06, "loss": 1.2776, "step": 1752 }, { "epoch": 0.8154346815434682, "grad_norm": 4.634332180023193, "learning_rate": 1.8433562245233313e-06, "loss": 1.4368, "step": 1754 }, { "epoch": 0.8163644816364481, "grad_norm": 5.257920742034912, "learning_rate": 1.8138711666334647e-06, "loss": 1.3143, "step": 1756 }, { "epoch": 0.8172942817294282, "grad_norm": 4.556576251983643, "learning_rate": 1.7846149464745685e-06, "loss": 1.4108, "step": 1758 }, { "epoch": 0.8182240818224081, "grad_norm": 4.735037803649902, "learning_rate": 1.75558785279371e-06, "loss": 1.3595, "step": 1760 }, { "epoch": 0.8191538819153882, "grad_norm": 4.261061191558838, "learning_rate": 1.7267901720766028e-06, "loss": 1.204, "step": 1762 }, { "epoch": 0.8200836820083682, "grad_norm": 4.399402618408203, "learning_rate": 1.698222188544723e-06, "loss": 1.3651, "step": 1764 }, { "epoch": 0.8210134821013482, "grad_norm": 4.244713306427002, "learning_rate": 1.6698841841525363e-06, "loss": 1.4084, "step": 1766 }, { "epoch": 0.8219432821943282, "grad_norm": 5.027519226074219, "learning_rate": 1.641776438584696e-06, "loss": 1.3003, "step": 1768 }, { "epoch": 0.8228730822873083, "grad_norm": 4.959417343139648, "learning_rate": 1.613899229253315e-06, "loss": 1.3353, "step": 1770 }, { "epoch": 0.8238028823802882, "grad_norm": 4.672348976135254, "learning_rate": 1.5862528312951896e-06, "loss": 1.2939, "step": 1772 }, { "epoch": 0.8247326824732683, "grad_norm": 4.95656681060791, "learning_rate": 1.5588375175691026e-06, "loss": 1.3473, "step": 1774 }, { "epoch": 0.8256624825662483, "grad_norm": 4.148518085479736, "learning_rate": 1.531653558653145e-06, "loss": 1.3317, "step": 1776 }, { "epoch": 0.8265922826592282, "grad_norm": 4.703611850738525, "learning_rate": 1.5047012228420055e-06, "loss": 1.2812, "step": 1778 }, { "epoch": 0.8275220827522083, "grad_norm": 4.388473033905029, "learning_rate": 1.4779807761443634e-06, "loss": 1.1935, "step": 1780 }, { "epoch": 0.8284518828451883, "grad_norm": 4.520838737487793, "learning_rate": 1.4514924822802337e-06, "loss": 1.2793, "step": 1782 }, { "epoch": 0.8293816829381683, "grad_norm": 4.776659965515137, "learning_rate": 1.4252366026783842e-06, "loss": 1.4448, "step": 1784 }, { "epoch": 0.8303114830311483, "grad_norm": 4.750571250915527, "learning_rate": 1.3992133964737559e-06, "loss": 1.4104, "step": 1786 }, { "epoch": 0.8312412831241283, "grad_norm": 4.6424102783203125, "learning_rate": 1.3734231205048798e-06, "loss": 1.2889, "step": 1788 }, { "epoch": 0.8321710832171083, "grad_norm": 4.688456058502197, "learning_rate": 1.3478660293113648e-06, "loss": 1.4233, "step": 1790 }, { "epoch": 0.8331008833100884, "grad_norm": 4.783851623535156, "learning_rate": 1.3225423751313915e-06, "loss": 1.4142, "step": 1792 }, { "epoch": 0.8340306834030683, "grad_norm": 4.887422561645508, "learning_rate": 1.2974524078991997e-06, "loss": 1.3644, "step": 1794 }, { "epoch": 0.8349604834960483, "grad_norm": 5.22616720199585, "learning_rate": 1.2725963752426296e-06, "loss": 1.4178, "step": 1796 }, { "epoch": 0.8358902835890284, "grad_norm": 4.753333568572998, "learning_rate": 1.2479745224807023e-06, "loss": 1.3733, "step": 1798 }, { "epoch": 0.8368200836820083, "grad_norm": 5.13411283493042, "learning_rate": 1.2235870926211591e-06, "loss": 1.4001, "step": 1800 }, { "epoch": 0.8377498837749884, "grad_norm": 4.564548492431641, "learning_rate": 1.1994343263580844e-06, "loss": 1.3668, "step": 1802 }, { "epoch": 0.8386796838679684, "grad_norm": 4.782639980316162, "learning_rate": 1.1755164620695287e-06, "loss": 1.4086, "step": 1804 }, { "epoch": 0.8396094839609484, "grad_norm": 4.684906482696533, "learning_rate": 1.151833735815161e-06, "loss": 1.3576, "step": 1806 }, { "epoch": 0.8405392840539284, "grad_norm": 4.805467128753662, "learning_rate": 1.1283863813339234e-06, "loss": 1.2721, "step": 1808 }, { "epoch": 0.8414690841469085, "grad_norm": 4.601739883422852, "learning_rate": 1.1051746300417442e-06, "loss": 1.3419, "step": 1810 }, { "epoch": 0.8423988842398884, "grad_norm": 4.56768274307251, "learning_rate": 1.0821987110292335e-06, "loss": 1.3414, "step": 1812 }, { "epoch": 0.8433286843328685, "grad_norm": 4.6248579025268555, "learning_rate": 1.0594588510594417e-06, "loss": 1.404, "step": 1814 }, { "epoch": 0.8442584844258484, "grad_norm": 4.81447172164917, "learning_rate": 1.0369552745656015e-06, "loss": 1.3659, "step": 1816 }, { "epoch": 0.8451882845188284, "grad_norm": 4.772368907928467, "learning_rate": 1.0146882036489283e-06, "loss": 1.3565, "step": 1818 }, { "epoch": 0.8461180846118085, "grad_norm": 4.528961181640625, "learning_rate": 9.92657858076421e-07, "loss": 1.3899, "step": 1820 }, { "epoch": 0.8470478847047884, "grad_norm": 4.898530006408691, "learning_rate": 9.708644552787005e-07, "loss": 1.4368, "step": 1822 }, { "epoch": 0.8479776847977685, "grad_norm": 4.728163242340088, "learning_rate": 9.493082103478525e-07, "loss": 1.2556, "step": 1824 }, { "epoch": 0.8489074848907485, "grad_norm": 5.038796424865723, "learning_rate": 9.279893360353072e-07, "loss": 1.3468, "step": 1826 }, { "epoch": 0.8498372849837285, "grad_norm": 4.590108394622803, "learning_rate": 9.069080427497553e-07, "loss": 1.3751, "step": 1828 }, { "epoch": 0.8507670850767085, "grad_norm": 4.6117987632751465, "learning_rate": 8.860645385550489e-07, "loss": 1.3008, "step": 1830 }, { "epoch": 0.8516968851696886, "grad_norm": 4.591699123382568, "learning_rate": 8.65459029168151e-07, "loss": 1.1995, "step": 1832 }, { "epoch": 0.8526266852626685, "grad_norm": 4.765085697174072, "learning_rate": 8.450917179571285e-07, "loss": 1.3482, "step": 1834 }, { "epoch": 0.8535564853556485, "grad_norm": 4.740975379943848, "learning_rate": 8.249628059391231e-07, "loss": 1.2902, "step": 1836 }, { "epoch": 0.8544862854486286, "grad_norm": 5.478446960449219, "learning_rate": 8.050724917783645e-07, "loss": 1.5469, "step": 1838 }, { "epoch": 0.8554160855416085, "grad_norm": 4.140901565551758, "learning_rate": 7.854209717842213e-07, "loss": 1.2229, "step": 1840 }, { "epoch": 0.8563458856345886, "grad_norm": 4.623166084289551, "learning_rate": 7.66008439909267e-07, "loss": 1.39, "step": 1842 }, { "epoch": 0.8572756857275686, "grad_norm": 4.766707897186279, "learning_rate": 7.468350877473535e-07, "loss": 1.2574, "step": 1844 }, { "epoch": 0.8582054858205486, "grad_norm": 4.48606014251709, "learning_rate": 7.279011045317236e-07, "loss": 1.3353, "step": 1846 }, { "epoch": 0.8591352859135286, "grad_norm": 4.4160308837890625, "learning_rate": 7.092066771331491e-07, "loss": 1.1728, "step": 1848 }, { "epoch": 0.8600650860065086, "grad_norm": 4.9244256019592285, "learning_rate": 6.907519900580845e-07, "loss": 1.4899, "step": 1850 }, { "epoch": 0.8609948860994886, "grad_norm": 4.900967597961426, "learning_rate": 6.725372254468355e-07, "loss": 1.3258, "step": 1852 }, { "epoch": 0.8619246861924686, "grad_norm": 5.145378112792969, "learning_rate": 6.545625630717796e-07, "loss": 1.3842, "step": 1854 }, { "epoch": 0.8628544862854486, "grad_norm": 4.574743747711182, "learning_rate": 6.368281803355677e-07, "loss": 1.2075, "step": 1856 }, { "epoch": 0.8637842863784286, "grad_norm": 5.0530686378479, "learning_rate": 6.193342522694143e-07, "loss": 1.2789, "step": 1858 }, { "epoch": 0.8647140864714087, "grad_norm": 5.029927730560303, "learning_rate": 6.020809515313155e-07, "loss": 1.3613, "step": 1860 }, { "epoch": 0.8656438865643886, "grad_norm": 5.098250389099121, "learning_rate": 5.850684484043841e-07, "loss": 1.3453, "step": 1862 }, { "epoch": 0.8665736866573687, "grad_norm": 4.804204940795898, "learning_rate": 5.682969107951616e-07, "loss": 1.5045, "step": 1864 }, { "epoch": 0.8675034867503487, "grad_norm": 5.030449867248535, "learning_rate": 5.517665042319556e-07, "loss": 1.5603, "step": 1866 }, { "epoch": 0.8684332868433287, "grad_norm": 4.464358806610107, "learning_rate": 5.35477391863197e-07, "loss": 1.3782, "step": 1868 }, { "epoch": 0.8693630869363087, "grad_norm": 4.722510814666748, "learning_rate": 5.194297344558522e-07, "loss": 1.484, "step": 1870 }, { "epoch": 0.8702928870292888, "grad_norm": 4.941782474517822, "learning_rate": 5.036236903938271e-07, "loss": 1.4212, "step": 1872 }, { "epoch": 0.8712226871222687, "grad_norm": 4.116756916046143, "learning_rate": 4.88059415676391e-07, "loss": 1.1671, "step": 1874 }, { "epoch": 0.8721524872152487, "grad_norm": 4.484717845916748, "learning_rate": 4.7273706391664934e-07, "loss": 1.3341, "step": 1876 }, { "epoch": 0.8730822873082287, "grad_norm": 4.540302753448486, "learning_rate": 4.5765678634002875e-07, "loss": 1.3493, "step": 1878 }, { "epoch": 0.8740120874012087, "grad_norm": 4.785520076751709, "learning_rate": 4.4281873178278083e-07, "loss": 1.395, "step": 1880 }, { "epoch": 0.8749418874941888, "grad_norm": 4.537782669067383, "learning_rate": 4.282230466905195e-07, "loss": 1.331, "step": 1882 }, { "epoch": 0.8758716875871687, "grad_norm": 4.574783802032471, "learning_rate": 4.1386987511675577e-07, "loss": 1.3435, "step": 1884 }, { "epoch": 0.8768014876801488, "grad_norm": 4.44332218170166, "learning_rate": 3.9975935872150656e-07, "loss": 1.4209, "step": 1886 }, { "epoch": 0.8777312877731288, "grad_norm": 4.974414825439453, "learning_rate": 3.858916367698657e-07, "loss": 1.3786, "step": 1888 }, { "epoch": 0.8786610878661087, "grad_norm": 4.610214710235596, "learning_rate": 3.722668461306523e-07, "loss": 1.364, "step": 1890 }, { "epoch": 0.8795908879590888, "grad_norm": 4.644761085510254, "learning_rate": 3.5888512127504784e-07, "loss": 1.3569, "step": 1892 }, { "epoch": 0.8805206880520688, "grad_norm": 5.01552152633667, "learning_rate": 3.457465942752776e-07, "loss": 1.3568, "step": 1894 }, { "epoch": 0.8814504881450488, "grad_norm": 4.764725685119629, "learning_rate": 3.328513948032982e-07, "loss": 1.3685, "step": 1896 }, { "epoch": 0.8823802882380288, "grad_norm": 4.517758369445801, "learning_rate": 3.2019965012952036e-07, "loss": 1.1963, "step": 1898 }, { "epoch": 0.8833100883310089, "grad_norm": 4.838214874267578, "learning_rate": 3.0779148512155766e-07, "loss": 1.4087, "step": 1900 }, { "epoch": 0.8842398884239888, "grad_norm": 5.264946460723877, "learning_rate": 2.9562702224298824e-07, "loss": 1.3894, "step": 1902 }, { "epoch": 0.8851696885169689, "grad_norm": 5.582790374755859, "learning_rate": 2.837063815521504e-07, "loss": 1.2856, "step": 1904 }, { "epoch": 0.8860994886099489, "grad_norm": 4.567020416259766, "learning_rate": 2.7202968070095453e-07, "loss": 1.2012, "step": 1906 }, { "epoch": 0.8870292887029289, "grad_norm": 4.854571342468262, "learning_rate": 2.605970349337259e-07, "loss": 1.2273, "step": 1908 }, { "epoch": 0.8879590887959089, "grad_norm": 4.735793113708496, "learning_rate": 2.494085570860609e-07, "loss": 1.4455, "step": 1910 }, { "epoch": 0.8888888888888888, "grad_norm": 5.333769798278809, "learning_rate": 2.384643575837197e-07, "loss": 1.4275, "step": 1912 }, { "epoch": 0.8898186889818689, "grad_norm": 4.791116714477539, "learning_rate": 2.2776454444153264e-07, "loss": 1.2934, "step": 1914 }, { "epoch": 0.8907484890748489, "grad_norm": 4.776645660400391, "learning_rate": 2.1730922326233467e-07, "loss": 1.3131, "step": 1916 }, { "epoch": 0.8916782891678289, "grad_norm": 5.164566516876221, "learning_rate": 2.0709849723592964e-07, "loss": 1.3655, "step": 1918 }, { "epoch": 0.8926080892608089, "grad_norm": 5.159448146820068, "learning_rate": 1.9713246713805533e-07, "loss": 1.5583, "step": 1920 }, { "epoch": 0.893537889353789, "grad_norm": 5.375189304351807, "learning_rate": 1.8741123132940632e-07, "loss": 1.51, "step": 1922 }, { "epoch": 0.8944676894467689, "grad_norm": 4.633946418762207, "learning_rate": 1.7793488575465984e-07, "loss": 1.3263, "step": 1924 }, { "epoch": 0.895397489539749, "grad_norm": 5.122285842895508, "learning_rate": 1.687035239415153e-07, "loss": 1.3669, "step": 1926 }, { "epoch": 0.896327289632729, "grad_norm": 4.615383148193359, "learning_rate": 1.5971723699978968e-07, "loss": 1.4714, "step": 1928 }, { "epoch": 0.897257089725709, "grad_norm": 4.539102554321289, "learning_rate": 1.5097611362050693e-07, "loss": 1.3944, "step": 1930 }, { "epoch": 0.898186889818689, "grad_norm": 4.820923328399658, "learning_rate": 1.424802400750265e-07, "loss": 1.2995, "step": 1932 }, { "epoch": 0.899116689911669, "grad_norm": 4.449073791503906, "learning_rate": 1.342297002141914e-07, "loss": 1.303, "step": 1934 }, { "epoch": 0.900046490004649, "grad_norm": 4.870346546173096, "learning_rate": 1.2622457546749527e-07, "loss": 1.3973, "step": 1936 }, { "epoch": 0.900976290097629, "grad_norm": 4.5092387199401855, "learning_rate": 1.1846494484229162e-07, "loss": 1.4956, "step": 1938 }, { "epoch": 0.901906090190609, "grad_norm": 4.536494255065918, "learning_rate": 1.1095088492299975e-07, "loss": 1.3022, "step": 1940 }, { "epoch": 0.902835890283589, "grad_norm": 5.072239875793457, "learning_rate": 1.0368246987035835e-07, "loss": 1.5378, "step": 1942 }, { "epoch": 0.9037656903765691, "grad_norm": 4.756165027618408, "learning_rate": 9.665977142068708e-08, "loss": 1.3545, "step": 1944 }, { "epoch": 0.904695490469549, "grad_norm": 4.723520278930664, "learning_rate": 8.988285888518993e-08, "loss": 1.2916, "step": 1946 }, { "epoch": 0.905625290562529, "grad_norm": 4.511683464050293, "learning_rate": 8.335179914925302e-08, "loss": 1.3607, "step": 1948 }, { "epoch": 0.9065550906555091, "grad_norm": 4.960355758666992, "learning_rate": 7.706665667180066e-08, "loss": 1.266, "step": 1950 }, { "epoch": 0.907484890748489, "grad_norm": 4.8881659507751465, "learning_rate": 7.102749348465141e-08, "loss": 1.2863, "step": 1952 }, { "epoch": 0.9084146908414691, "grad_norm": 4.664922714233398, "learning_rate": 6.52343691919075e-08, "loss": 1.2806, "step": 1954 }, { "epoch": 0.9093444909344491, "grad_norm": 5.50726318359375, "learning_rate": 5.968734096936915e-08, "loss": 1.4384, "step": 1956 }, { "epoch": 0.9102742910274291, "grad_norm": 4.534170150756836, "learning_rate": 5.4386463563962755e-08, "loss": 1.3007, "step": 1958 }, { "epoch": 0.9112040911204091, "grad_norm": 4.159050464630127, "learning_rate": 4.933178929321087e-08, "loss": 1.2189, "step": 1960 }, { "epoch": 0.9121338912133892, "grad_norm": 4.677921295166016, "learning_rate": 4.452336804470477e-08, "loss": 1.1954, "step": 1962 }, { "epoch": 0.9130636913063691, "grad_norm": 4.961037635803223, "learning_rate": 3.9961247275624315e-08, "loss": 1.3724, "step": 1964 }, { "epoch": 0.9139934913993492, "grad_norm": 4.54478120803833, "learning_rate": 3.564547201225498e-08, "loss": 1.2764, "step": 1966 }, { "epoch": 0.9149232914923291, "grad_norm": 4.550593376159668, "learning_rate": 3.157608484956321e-08, "loss": 1.2907, "step": 1968 }, { "epoch": 0.9158530915853091, "grad_norm": 4.716287136077881, "learning_rate": 2.775312595075232e-08, "loss": 1.4163, "step": 1970 }, { "epoch": 0.9167828916782892, "grad_norm": 4.980526924133301, "learning_rate": 2.4176633046882255e-08, "loss": 1.396, "step": 1972 }, { "epoch": 0.9177126917712691, "grad_norm": 5.431740760803223, "learning_rate": 2.0846641436497657e-08, "loss": 1.5211, "step": 1974 }, { "epoch": 0.9186424918642492, "grad_norm": 4.4423723220825195, "learning_rate": 1.7763183985269823e-08, "loss": 1.332, "step": 1976 }, { "epoch": 0.9195722919572292, "grad_norm": 5.735809803009033, "learning_rate": 1.4926291125677512e-08, "loss": 1.6252, "step": 1978 }, { "epoch": 0.9205020920502092, "grad_norm": 4.438413619995117, "learning_rate": 1.233599085670996e-08, "loss": 1.4276, "step": 1980 }, { "epoch": 0.9214318921431892, "grad_norm": 4.731631755828857, "learning_rate": 9.992308743586555e-09, "loss": 1.3148, "step": 1982 }, { "epoch": 0.9223616922361693, "grad_norm": 4.5072550773620605, "learning_rate": 7.895267917501479e-09, "loss": 1.384, "step": 1984 }, { "epoch": 0.9232914923291492, "grad_norm": 4.708464622497559, "learning_rate": 6.044889075398889e-09, "loss": 1.3291, "step": 1986 }, { "epoch": 0.9242212924221292, "grad_norm": 4.627597808837891, "learning_rate": 4.441190479775856e-09, "loss": 1.2293, "step": 1988 }, { "epoch": 0.9251510925151093, "grad_norm": 4.659022808074951, "learning_rate": 3.0841879584852978e-09, "loss": 1.2414, "step": 1990 }, { "epoch": 0.9260808926080892, "grad_norm": 4.632164001464844, "learning_rate": 1.9738949045972006e-09, "loss": 1.3678, "step": 1992 }, { "epoch": 0.9270106927010693, "grad_norm": 5.15008020401001, "learning_rate": 1.1103222762542906e-09, "loss": 1.4622, "step": 1994 }, { "epoch": 0.9279404927940493, "grad_norm": 4.415071487426758, "learning_rate": 4.934785965721151e-10, "loss": 1.2106, "step": 1996 }, { "epoch": 0.9288702928870293, "grad_norm": 4.59063720703125, "learning_rate": 1.2336995354467153e-10, "loss": 1.2147, "step": 1998 }, { "epoch": 0.9298000929800093, "grad_norm": 5.079111576080322, "learning_rate": 0.0, "loss": 1.2845, "step": 2000 }, { "epoch": 0.9298000929800093, "eval_cer": 0.6875736530782697, "eval_loss": 1.2887473106384277, "eval_runtime": 404.984, "eval_samples_per_second": 31.344, "eval_steps_per_second": 0.98, "step": 2000 }, { "epoch": 0.9307298930729893, "grad_norm": 4.900014877319336, "learning_rate": 1.2336995354467197e-10, "loss": 1.3608, "step": 2002 }, { "epoch": 0.9316596931659693, "grad_norm": 4.749481201171875, "learning_rate": 4.934785965721167e-10, "loss": 1.3539, "step": 2004 }, { "epoch": 0.9325894932589494, "grad_norm": 4.630005836486816, "learning_rate": 1.1103222762542941e-09, "loss": 1.4392, "step": 2006 }, { "epoch": 0.9335192933519293, "grad_norm": 4.6264119148254395, "learning_rate": 1.973894904597207e-09, "loss": 1.4407, "step": 2008 }, { "epoch": 0.9344490934449093, "grad_norm": 4.8345537185668945, "learning_rate": 3.0841879584853073e-09, "loss": 1.4894, "step": 2010 }, { "epoch": 0.9353788935378894, "grad_norm": 4.491018772125244, "learning_rate": 4.441190479775869e-09, "loss": 1.2161, "step": 2012 }, { "epoch": 0.9363086936308693, "grad_norm": 4.67607307434082, "learning_rate": 6.0448890753989065e-09, "loss": 1.3389, "step": 2014 }, { "epoch": 0.9372384937238494, "grad_norm": 4.430117130279541, "learning_rate": 7.895267917501502e-09, "loss": 1.2276, "step": 2016 }, { "epoch": 0.9381682938168294, "grad_norm": 4.780663967132568, "learning_rate": 9.992308743586585e-09, "loss": 1.4716, "step": 2018 }, { "epoch": 0.9390980939098094, "grad_norm": 4.62688684463501, "learning_rate": 1.2335990856709996e-08, "loss": 1.3014, "step": 2020 }, { "epoch": 0.9400278940027894, "grad_norm": 4.588647842407227, "learning_rate": 1.4926291125677555e-08, "loss": 1.239, "step": 2022 }, { "epoch": 0.9409576940957695, "grad_norm": 5.103937149047852, "learning_rate": 1.7763183985269876e-08, "loss": 1.3689, "step": 2024 }, { "epoch": 0.9418874941887494, "grad_norm": 4.778083324432373, "learning_rate": 2.084664143649772e-08, "loss": 1.2802, "step": 2026 }, { "epoch": 0.9428172942817294, "grad_norm": 4.508438587188721, "learning_rate": 2.417663304688233e-08, "loss": 1.3346, "step": 2028 }, { "epoch": 0.9437470943747094, "grad_norm": 4.7966179847717285, "learning_rate": 2.7753125950749637e-08, "loss": 1.2942, "step": 2030 }, { "epoch": 0.9446768944676894, "grad_norm": 5.326448917388916, "learning_rate": 3.157608484956332e-08, "loss": 1.2992, "step": 2032 }, { "epoch": 0.9456066945606695, "grad_norm": 4.891195297241211, "learning_rate": 3.56454720122551e-08, "loss": 1.4249, "step": 2034 }, { "epoch": 0.9465364946536494, "grad_norm": 4.844561576843262, "learning_rate": 3.996124727562446e-08, "loss": 1.3039, "step": 2036 }, { "epoch": 0.9474662947466295, "grad_norm": 4.519979476928711, "learning_rate": 4.4523368044704936e-08, "loss": 1.3202, "step": 2038 }, { "epoch": 0.9483960948396095, "grad_norm": 4.489477157592773, "learning_rate": 4.9331789293211046e-08, "loss": 1.3136, "step": 2040 }, { "epoch": 0.9493258949325895, "grad_norm": 4.335144996643066, "learning_rate": 5.4386463563962954e-08, "loss": 1.2554, "step": 2042 }, { "epoch": 0.9502556950255695, "grad_norm": 4.903006076812744, "learning_rate": 5.968734096936936e-08, "loss": 1.2718, "step": 2044 }, { "epoch": 0.9511854951185496, "grad_norm": 5.114201545715332, "learning_rate": 6.523436919190776e-08, "loss": 1.5189, "step": 2046 }, { "epoch": 0.9521152952115295, "grad_norm": 4.555946350097656, "learning_rate": 7.102749348465169e-08, "loss": 1.2742, "step": 2048 }, { "epoch": 0.9530450953045095, "grad_norm": 4.579489231109619, "learning_rate": 7.706665667180096e-08, "loss": 1.3377, "step": 2050 }, { "epoch": 0.9539748953974896, "grad_norm": 4.725649833679199, "learning_rate": 8.335179914925334e-08, "loss": 1.2604, "step": 2052 }, { "epoch": 0.9549046954904695, "grad_norm": 4.956511974334717, "learning_rate": 8.988285888519027e-08, "loss": 1.2279, "step": 2054 }, { "epoch": 0.9558344955834496, "grad_norm": 5.081295013427734, "learning_rate": 9.665977142068746e-08, "loss": 1.3532, "step": 2056 }, { "epoch": 0.9567642956764296, "grad_norm": 4.837268829345703, "learning_rate": 1.0368246987035598e-07, "loss": 1.4377, "step": 2058 }, { "epoch": 0.9576940957694096, "grad_norm": 4.734530925750732, "learning_rate": 1.1095088492300018e-07, "loss": 1.4056, "step": 2060 }, { "epoch": 0.9586238958623896, "grad_norm": 4.514042854309082, "learning_rate": 1.1846494484229208e-07, "loss": 1.3816, "step": 2062 }, { "epoch": 0.9595536959553695, "grad_norm": 4.122927188873291, "learning_rate": 1.2622457546749577e-07, "loss": 1.2365, "step": 2064 }, { "epoch": 0.9604834960483496, "grad_norm": 5.048885822296143, "learning_rate": 1.3422970021418912e-07, "loss": 1.3012, "step": 2066 }, { "epoch": 0.9614132961413296, "grad_norm": 4.928426265716553, "learning_rate": 1.4248024007502702e-07, "loss": 1.4017, "step": 2068 }, { "epoch": 0.9623430962343096, "grad_norm": 4.39892053604126, "learning_rate": 1.5097611362050746e-07, "loss": 1.2772, "step": 2070 }, { "epoch": 0.9632728963272896, "grad_norm": 4.681835174560547, "learning_rate": 1.5971723699979027e-07, "loss": 1.2177, "step": 2072 }, { "epoch": 0.9642026964202697, "grad_norm": 4.758558750152588, "learning_rate": 1.6870352394151597e-07, "loss": 1.3981, "step": 2074 }, { "epoch": 0.9651324965132496, "grad_norm": 5.117108345031738, "learning_rate": 1.7793488575465775e-07, "loss": 1.5051, "step": 2076 }, { "epoch": 0.9660622966062297, "grad_norm": 4.529995918273926, "learning_rate": 1.8741123132940704e-07, "loss": 1.2587, "step": 2078 }, { "epoch": 0.9669920966992097, "grad_norm": 4.765900135040283, "learning_rate": 1.971324671380533e-07, "loss": 1.3477, "step": 2080 }, { "epoch": 0.9679218967921897, "grad_norm": 4.536604404449463, "learning_rate": 2.0709849723592763e-07, "loss": 1.2748, "step": 2082 }, { "epoch": 0.9688516968851697, "grad_norm": 4.8193135261535645, "learning_rate": 2.1730922326233544e-07, "loss": 1.2753, "step": 2084 }, { "epoch": 0.9697814969781497, "grad_norm": 4.623575687408447, "learning_rate": 2.2776454444153346e-07, "loss": 1.3992, "step": 2086 }, { "epoch": 0.9707112970711297, "grad_norm": 4.842583656311035, "learning_rate": 2.3846435758371773e-07, "loss": 1.3307, "step": 2088 }, { "epoch": 0.9716410971641097, "grad_norm": 4.884167194366455, "learning_rate": 2.494085570860618e-07, "loss": 1.3673, "step": 2090 }, { "epoch": 0.9725708972570897, "grad_norm": 5.306212902069092, "learning_rate": 2.605970349337242e-07, "loss": 1.3613, "step": 2092 }, { "epoch": 0.9735006973500697, "grad_norm": 5.1909050941467285, "learning_rate": 2.7202968070095294e-07, "loss": 1.5503, "step": 2094 }, { "epoch": 0.9744304974430498, "grad_norm": 5.097170352935791, "learning_rate": 2.837063815521516e-07, "loss": 1.2965, "step": 2096 }, { "epoch": 0.9753602975360297, "grad_norm": 5.022607803344727, "learning_rate": 2.9562702224298946e-07, "loss": 1.3772, "step": 2098 }, { "epoch": 0.9762900976290098, "grad_norm": 4.992171287536621, "learning_rate": 3.0779148512155613e-07, "loss": 1.2804, "step": 2100 }, { "epoch": 0.9772198977219898, "grad_norm": 4.488611221313477, "learning_rate": 3.201996501295188e-07, "loss": 1.129, "step": 2102 }, { "epoch": 0.9781496978149697, "grad_norm": 4.705414772033691, "learning_rate": 3.328513948032994e-07, "loss": 1.3734, "step": 2104 }, { "epoch": 0.9790794979079498, "grad_norm": 4.827545166015625, "learning_rate": 3.4574659427527895e-07, "loss": 1.4399, "step": 2106 }, { "epoch": 0.9800092980009298, "grad_norm": 4.566982269287109, "learning_rate": 3.5888512127504916e-07, "loss": 1.2404, "step": 2108 }, { "epoch": 0.9809390980939098, "grad_norm": 4.966663360595703, "learning_rate": 3.7226684613065095e-07, "loss": 1.4649, "step": 2110 }, { "epoch": 0.9818688981868898, "grad_norm": 4.883546829223633, "learning_rate": 3.858916367698671e-07, "loss": 1.4489, "step": 2112 }, { "epoch": 0.9827986982798699, "grad_norm": 4.368488788604736, "learning_rate": 3.9975935872150523e-07, "loss": 1.2809, "step": 2114 }, { "epoch": 0.9837284983728498, "grad_norm": 4.563704490661621, "learning_rate": 4.138698751167573e-07, "loss": 1.3964, "step": 2116 }, { "epoch": 0.9846582984658299, "grad_norm": 4.9934844970703125, "learning_rate": 4.282230466905184e-07, "loss": 1.2782, "step": 2118 }, { "epoch": 0.9855880985588099, "grad_norm": 4.664752960205078, "learning_rate": 4.4281873178278523e-07, "loss": 1.3844, "step": 2120 }, { "epoch": 0.9865178986517898, "grad_norm": 4.594139575958252, "learning_rate": 4.576567863400276e-07, "loss": 1.2799, "step": 2122 }, { "epoch": 0.9874476987447699, "grad_norm": 4.692776203155518, "learning_rate": 4.727370639166482e-07, "loss": 1.2788, "step": 2124 }, { "epoch": 0.9883774988377498, "grad_norm": 4.477388381958008, "learning_rate": 4.8805941567639e-07, "loss": 1.2969, "step": 2126 }, { "epoch": 0.9893072989307299, "grad_norm": 6.019404411315918, "learning_rate": 5.036236903938289e-07, "loss": 1.4646, "step": 2128 }, { "epoch": 0.9902370990237099, "grad_norm": 4.627391338348389, "learning_rate": 5.19429734455854e-07, "loss": 1.386, "step": 2130 }, { "epoch": 0.9911668991166899, "grad_norm": 5.143248558044434, "learning_rate": 5.354773918631961e-07, "loss": 1.4925, "step": 2132 }, { "epoch": 0.9920966992096699, "grad_norm": 4.804230213165283, "learning_rate": 5.517665042319547e-07, "loss": 1.3438, "step": 2134 }, { "epoch": 0.99302649930265, "grad_norm": 4.516220569610596, "learning_rate": 5.682969107951635e-07, "loss": 1.3674, "step": 2136 }, { "epoch": 0.9939562993956299, "grad_norm": 4.677893161773682, "learning_rate": 5.850684484043834e-07, "loss": 1.4667, "step": 2138 }, { "epoch": 0.99488609948861, "grad_norm": 4.538728713989258, "learning_rate": 6.020809515313149e-07, "loss": 1.2799, "step": 2140 }, { "epoch": 0.99581589958159, "grad_norm": 4.881726264953613, "learning_rate": 6.193342522694136e-07, "loss": 1.2981, "step": 2142 }, { "epoch": 0.9967456996745699, "grad_norm": 4.641430854797363, "learning_rate": 6.368281803355728e-07, "loss": 1.4361, "step": 2144 }, { "epoch": 0.99767549976755, "grad_norm": 4.900595664978027, "learning_rate": 6.545625630717764e-07, "loss": 1.4961, "step": 2146 }, { "epoch": 0.99860529986053, "grad_norm": 4.476684093475342, "learning_rate": 6.725372254468379e-07, "loss": 1.2466, "step": 2148 }, { "epoch": 0.99953509995351, "grad_norm": 4.857523441314697, "learning_rate": 6.907519900580843e-07, "loss": 1.344, "step": 2150 }, { "epoch": 1.00046490004649, "grad_norm": 4.93610143661499, "learning_rate": 7.092066771331517e-07, "loss": 1.3159, "step": 2152 }, { "epoch": 1.00139470013947, "grad_norm": 4.497437477111816, "learning_rate": 7.279011045317235e-07, "loss": 1.1974, "step": 2154 }, { "epoch": 1.0023245002324501, "grad_norm": 4.453726291656494, "learning_rate": 7.468350877473563e-07, "loss": 1.3422, "step": 2156 }, { "epoch": 1.00325430032543, "grad_norm": 4.5283966064453125, "learning_rate": 7.6600843990927e-07, "loss": 1.2236, "step": 2158 }, { "epoch": 1.00418410041841, "grad_norm": 4.566667556762695, "learning_rate": 7.854209717842216e-07, "loss": 1.2313, "step": 2160 }, { "epoch": 1.00511390051139, "grad_norm": 4.296647071838379, "learning_rate": 8.05072491778365e-07, "loss": 1.3901, "step": 2162 }, { "epoch": 1.00604370060437, "grad_norm": 4.522074222564697, "learning_rate": 8.249628059391239e-07, "loss": 1.3264, "step": 2164 }, { "epoch": 1.0069735006973501, "grad_norm": 4.889230728149414, "learning_rate": 8.450917179571321e-07, "loss": 1.334, "step": 2166 }, { "epoch": 1.00790330079033, "grad_norm": 4.314748764038086, "learning_rate": 8.654590291681546e-07, "loss": 1.2061, "step": 2168 }, { "epoch": 1.00883310088331, "grad_norm": 4.6197285652160645, "learning_rate": 8.860645385550498e-07, "loss": 1.301, "step": 2170 }, { "epoch": 1.00976290097629, "grad_norm": 4.3293232917785645, "learning_rate": 9.069080427497588e-07, "loss": 1.2546, "step": 2172 }, { "epoch": 1.0106927010692701, "grad_norm": 4.793970584869385, "learning_rate": 9.27989336035311e-07, "loss": 1.3062, "step": 2174 }, { "epoch": 1.0116225011622502, "grad_norm": 4.797980308532715, "learning_rate": 9.493082103478537e-07, "loss": 1.3669, "step": 2176 }, { "epoch": 1.0125523012552302, "grad_norm": 4.854022979736328, "learning_rate": 9.70864455278702e-07, "loss": 1.3301, "step": 2178 }, { "epoch": 1.01348210134821, "grad_norm": 4.587277889251709, "learning_rate": 9.926578580764253e-07, "loss": 1.2024, "step": 2180 }, { "epoch": 1.01441190144119, "grad_norm": 4.886118412017822, "learning_rate": 1.01468820364893e-06, "loss": 1.3531, "step": 2182 }, { "epoch": 1.0153417015341701, "grad_norm": 4.77844762802124, "learning_rate": 1.0369552745656062e-06, "loss": 1.3912, "step": 2184 }, { "epoch": 1.0162715016271502, "grad_norm": 5.249661922454834, "learning_rate": 1.0594588510594466e-06, "loss": 1.3466, "step": 2186 }, { "epoch": 1.0172013017201302, "grad_norm": 4.508343696594238, "learning_rate": 1.0821987110292386e-06, "loss": 1.3609, "step": 2188 }, { "epoch": 1.0181311018131103, "grad_norm": 4.734978675842285, "learning_rate": 1.1051746300417463e-06, "loss": 1.4012, "step": 2190 }, { "epoch": 1.01906090190609, "grad_norm": 4.808538436889648, "learning_rate": 1.1283863813339285e-06, "loss": 1.3393, "step": 2192 }, { "epoch": 1.0199907019990702, "grad_norm": 5.106536388397217, "learning_rate": 1.151833735815163e-06, "loss": 1.44, "step": 2194 }, { "epoch": 1.0209205020920502, "grad_norm": 4.674396514892578, "learning_rate": 1.1755164620695306e-06, "loss": 1.2248, "step": 2196 }, { "epoch": 1.0218503021850303, "grad_norm": 4.72011137008667, "learning_rate": 1.1994343263580865e-06, "loss": 1.4089, "step": 2198 }, { "epoch": 1.0227801022780103, "grad_norm": 4.850317001342773, "learning_rate": 1.223587092621161e-06, "loss": 1.2785, "step": 2200 }, { "epoch": 1.0237099023709901, "grad_norm": 4.375977516174316, "learning_rate": 1.2479745224807067e-06, "loss": 1.165, "step": 2202 }, { "epoch": 1.0246397024639702, "grad_norm": 4.8769121170043945, "learning_rate": 1.272596375242637e-06, "loss": 1.2257, "step": 2204 }, { "epoch": 1.0255695025569502, "grad_norm": 4.3674798011779785, "learning_rate": 1.2974524078992044e-06, "loss": 1.2901, "step": 2206 }, { "epoch": 1.0264993026499303, "grad_norm": 4.48744010925293, "learning_rate": 1.3225423751313938e-06, "loss": 1.3035, "step": 2208 }, { "epoch": 1.0274291027429103, "grad_norm": 4.646104335784912, "learning_rate": 1.3478660293113671e-06, "loss": 1.1741, "step": 2210 }, { "epoch": 1.0283589028358904, "grad_norm": 4.976499080657959, "learning_rate": 1.3734231205048793e-06, "loss": 1.3106, "step": 2212 }, { "epoch": 1.0292887029288702, "grad_norm": 4.666265487670898, "learning_rate": 1.3992133964737582e-06, "loss": 1.1813, "step": 2214 }, { "epoch": 1.0302185030218503, "grad_norm": 5.275601863861084, "learning_rate": 1.4252366026783918e-06, "loss": 1.3565, "step": 2216 }, { "epoch": 1.0311483031148303, "grad_norm": 4.361856460571289, "learning_rate": 1.4514924822802362e-06, "loss": 1.2399, "step": 2218 }, { "epoch": 1.0320781032078103, "grad_norm": 4.771942615509033, "learning_rate": 1.4779807761443662e-06, "loss": 1.3226, "step": 2220 }, { "epoch": 1.0330079033007904, "grad_norm": 4.492579936981201, "learning_rate": 1.5047012228420113e-06, "loss": 1.2408, "step": 2222 }, { "epoch": 1.0339377033937704, "grad_norm": 4.709753513336182, "learning_rate": 1.531653558653148e-06, "loss": 1.2687, "step": 2224 }, { "epoch": 1.0348675034867503, "grad_norm": 4.390625, "learning_rate": 1.5588375175691086e-06, "loss": 1.2531, "step": 2226 }, { "epoch": 1.0357973035797303, "grad_norm": 4.240670204162598, "learning_rate": 1.5862528312951953e-06, "loss": 1.2573, "step": 2228 }, { "epoch": 1.0367271036727104, "grad_norm": 4.321803569793701, "learning_rate": 1.6138992292533206e-06, "loss": 1.1918, "step": 2230 }, { "epoch": 1.0376569037656904, "grad_norm": 4.511859893798828, "learning_rate": 1.6417764385846988e-06, "loss": 1.4787, "step": 2232 }, { "epoch": 1.0385867038586705, "grad_norm": 4.752563953399658, "learning_rate": 1.6698841841525332e-06, "loss": 1.2542, "step": 2234 }, { "epoch": 1.0395165039516503, "grad_norm": 4.4177680015563965, "learning_rate": 1.698222188544728e-06, "loss": 1.2261, "step": 2236 }, { "epoch": 1.0404463040446303, "grad_norm": 4.9536452293396, "learning_rate": 1.7267901720766049e-06, "loss": 1.4294, "step": 2238 }, { "epoch": 1.0413761041376104, "grad_norm": 5.321256160736084, "learning_rate": 1.7555878527937153e-06, "loss": 1.3392, "step": 2240 }, { "epoch": 1.0423059042305904, "grad_norm": 4.42864465713501, "learning_rate": 1.784614946474571e-06, "loss": 1.349, "step": 2242 }, { "epoch": 1.0432357043235705, "grad_norm": 4.113982200622559, "learning_rate": 1.8138711666334704e-06, "loss": 1.1773, "step": 2244 }, { "epoch": 1.0441655044165505, "grad_norm": 5.248923301696777, "learning_rate": 1.843356224523334e-06, "loss": 1.2505, "step": 2246 }, { "epoch": 1.0450953045095304, "grad_norm": 4.986355304718018, "learning_rate": 1.8730698291385484e-06, "loss": 1.3154, "step": 2248 }, { "epoch": 1.0460251046025104, "grad_norm": 4.844568252563477, "learning_rate": 1.9030116872178312e-06, "loss": 1.3344, "step": 2250 }, { "epoch": 1.0469549046954905, "grad_norm": 4.857133865356445, "learning_rate": 1.933181503247133e-06, "loss": 1.4058, "step": 2252 }, { "epoch": 1.0478847047884705, "grad_norm": 4.25247859954834, "learning_rate": 1.9635789794625408e-06, "loss": 1.2756, "step": 2254 }, { "epoch": 1.0488145048814506, "grad_norm": 4.160486221313477, "learning_rate": 1.9942038158532403e-06, "loss": 1.1554, "step": 2256 }, { "epoch": 1.0497443049744306, "grad_norm": 4.784586429595947, "learning_rate": 2.0250557101644696e-06, "loss": 1.3717, "step": 2258 }, { "epoch": 1.0506741050674104, "grad_norm": 4.710980415344238, "learning_rate": 2.0561343579004715e-06, "loss": 1.243, "step": 2260 }, { "epoch": 1.0516039051603905, "grad_norm": 4.289318561553955, "learning_rate": 2.0874394523275494e-06, "loss": 1.1416, "step": 2262 }, { "epoch": 1.0525337052533705, "grad_norm": 4.814680099487305, "learning_rate": 2.118970684477067e-06, "loss": 1.3063, "step": 2264 }, { "epoch": 1.0534635053463506, "grad_norm": 4.237391948699951, "learning_rate": 2.150727743148473e-06, "loss": 1.1608, "step": 2266 }, { "epoch": 1.0543933054393306, "grad_norm": 4.638303756713867, "learning_rate": 2.1827103149124317e-06, "loss": 1.3161, "step": 2268 }, { "epoch": 1.0553231055323105, "grad_norm": 4.783177375793457, "learning_rate": 2.214918084113869e-06, "loss": 1.1106, "step": 2270 }, { "epoch": 1.0562529056252905, "grad_norm": 4.590149402618408, "learning_rate": 2.2473507328751124e-06, "loss": 1.1071, "step": 2272 }, { "epoch": 1.0571827057182706, "grad_norm": 5.025089740753174, "learning_rate": 2.280007941098998e-06, "loss": 1.3916, "step": 2274 }, { "epoch": 1.0581125058112506, "grad_norm": 4.017756938934326, "learning_rate": 2.3128893864720796e-06, "loss": 1.0442, "step": 2276 }, { "epoch": 1.0590423059042307, "grad_norm": 4.800849914550781, "learning_rate": 2.345994744467757e-06, "loss": 1.2776, "step": 2278 }, { "epoch": 1.0599721059972107, "grad_norm": 4.8269243240356445, "learning_rate": 2.3793236883495178e-06, "loss": 1.2869, "step": 2280 }, { "epoch": 1.0609019060901905, "grad_norm": 4.670619487762451, "learning_rate": 2.4128758891741312e-06, "loss": 1.1914, "step": 2282 }, { "epoch": 1.0618317061831706, "grad_norm": 4.590518474578857, "learning_rate": 2.4466510157949308e-06, "loss": 1.2614, "step": 2284 }, { "epoch": 1.0627615062761506, "grad_norm": 4.29293966293335, "learning_rate": 2.4806487348650506e-06, "loss": 1.2617, "step": 2286 }, { "epoch": 1.0636913063691307, "grad_norm": 4.701921463012695, "learning_rate": 2.514868710840731e-06, "loss": 1.3056, "step": 2288 }, { "epoch": 1.0646211064621107, "grad_norm": 4.688406944274902, "learning_rate": 2.549310605984614e-06, "loss": 1.3972, "step": 2290 }, { "epoch": 1.0655509065550905, "grad_norm": 4.301065921783447, "learning_rate": 2.5839740803691054e-06, "loss": 1.2517, "step": 2292 }, { "epoch": 1.0664807066480706, "grad_norm": 4.682007789611816, "learning_rate": 2.618858791879713e-06, "loss": 1.3516, "step": 2294 }, { "epoch": 1.0674105067410506, "grad_norm": 4.207897186279297, "learning_rate": 2.6539643962184082e-06, "loss": 1.2365, "step": 2296 }, { "epoch": 1.0683403068340307, "grad_norm": 4.839366436004639, "learning_rate": 2.6892905469070525e-06, "loss": 1.277, "step": 2298 }, { "epoch": 1.0692701069270107, "grad_norm": 4.73009729385376, "learning_rate": 2.724836895290808e-06, "loss": 1.3498, "step": 2300 }, { "epoch": 1.0701999070199908, "grad_norm": 4.327843189239502, "learning_rate": 2.760603090541558e-06, "loss": 1.1533, "step": 2302 }, { "epoch": 1.0711297071129706, "grad_norm": 4.459174156188965, "learning_rate": 2.7965887796613914e-06, "loss": 1.1671, "step": 2304 }, { "epoch": 1.0720595072059507, "grad_norm": 4.664520263671875, "learning_rate": 2.8327936074860898e-06, "loss": 1.4099, "step": 2306 }, { "epoch": 1.0729893072989307, "grad_norm": 4.550124645233154, "learning_rate": 2.869217216688625e-06, "loss": 1.2646, "step": 2308 }, { "epoch": 1.0739191073919108, "grad_norm": 4.617712497711182, "learning_rate": 2.905859247782667e-06, "loss": 1.3102, "step": 2310 }, { "epoch": 1.0748489074848908, "grad_norm": 4.206188678741455, "learning_rate": 2.942719339126174e-06, "loss": 1.0634, "step": 2312 }, { "epoch": 1.0757787075778706, "grad_norm": 4.829810619354248, "learning_rate": 2.979797126924914e-06, "loss": 1.2609, "step": 2314 }, { "epoch": 1.0767085076708507, "grad_norm": 4.491220951080322, "learning_rate": 3.017092245236101e-06, "loss": 1.1332, "step": 2316 }, { "epoch": 1.0776383077638307, "grad_norm": 4.925967693328857, "learning_rate": 3.0546043259719647e-06, "loss": 1.1476, "step": 2318 }, { "epoch": 1.0785681078568108, "grad_norm": 4.692814826965332, "learning_rate": 3.0923329989034154e-06, "loss": 1.2821, "step": 2320 }, { "epoch": 1.0794979079497908, "grad_norm": 5.102067470550537, "learning_rate": 3.1302778916636824e-06, "loss": 1.3832, "step": 2322 }, { "epoch": 1.0804277080427709, "grad_norm": 5.392603397369385, "learning_rate": 3.168438629752007e-06, "loss": 1.2678, "step": 2324 }, { "epoch": 1.0813575081357507, "grad_norm": 4.562141418457031, "learning_rate": 3.206814836537286e-06, "loss": 1.2959, "step": 2326 }, { "epoch": 1.0822873082287308, "grad_norm": 4.947735786437988, "learning_rate": 3.245406133261863e-06, "loss": 1.3618, "step": 2328 }, { "epoch": 1.0832171083217108, "grad_norm": 5.246885776519775, "learning_rate": 3.284212139045228e-06, "loss": 1.2991, "step": 2330 }, { "epoch": 1.0841469084146909, "grad_norm": 5.183762073516846, "learning_rate": 3.323232470887754e-06, "loss": 1.4209, "step": 2332 }, { "epoch": 1.085076708507671, "grad_norm": 4.322397232055664, "learning_rate": 3.3624667436745298e-06, "loss": 1.253, "step": 2334 }, { "epoch": 1.086006508600651, "grad_norm": 4.801977157592773, "learning_rate": 3.401914570179123e-06, "loss": 1.2269, "step": 2336 }, { "epoch": 1.0869363086936308, "grad_norm": 4.630431175231934, "learning_rate": 3.4415755610674107e-06, "loss": 1.1943, "step": 2338 }, { "epoch": 1.0878661087866108, "grad_norm": 4.324160575866699, "learning_rate": 3.4814493249014167e-06, "loss": 1.1609, "step": 2340 }, { "epoch": 1.0887959088795909, "grad_norm": 4.772155284881592, "learning_rate": 3.5215354681431943e-06, "loss": 1.2729, "step": 2342 }, { "epoch": 1.089725708972571, "grad_norm": 4.431854724884033, "learning_rate": 3.5618335951586945e-06, "loss": 1.2936, "step": 2344 }, { "epoch": 1.090655509065551, "grad_norm": 4.608092308044434, "learning_rate": 3.6023433082216806e-06, "loss": 1.2757, "step": 2346 }, { "epoch": 1.091585309158531, "grad_norm": 4.503789901733398, "learning_rate": 3.643064207517629e-06, "loss": 1.2869, "step": 2348 }, { "epoch": 1.0925151092515109, "grad_norm": 4.827126502990723, "learning_rate": 3.6839958911477003e-06, "loss": 1.2796, "step": 2350 }, { "epoch": 1.093444909344491, "grad_norm": 4.389601230621338, "learning_rate": 3.725137955132712e-06, "loss": 1.2556, "step": 2352 }, { "epoch": 1.094374709437471, "grad_norm": 4.649837970733643, "learning_rate": 3.766489993417093e-06, "loss": 1.1349, "step": 2354 }, { "epoch": 1.095304509530451, "grad_norm": 5.119424343109131, "learning_rate": 3.8080515978729243e-06, "loss": 1.3259, "step": 2356 }, { "epoch": 1.096234309623431, "grad_norm": 4.915770053863525, "learning_rate": 3.8498223583039484e-06, "loss": 1.2709, "step": 2358 }, { "epoch": 1.0971641097164109, "grad_norm": 5.101198196411133, "learning_rate": 3.891801862449635e-06, "loss": 1.3919, "step": 2360 }, { "epoch": 1.098093909809391, "grad_norm": 4.393911838531494, "learning_rate": 3.933989695989202e-06, "loss": 1.233, "step": 2362 }, { "epoch": 1.099023709902371, "grad_norm": 4.028720855712891, "learning_rate": 3.97638544254578e-06, "loss": 1.2263, "step": 2364 }, { "epoch": 1.099953509995351, "grad_norm": 3.9663443565368652, "learning_rate": 4.018988683690461e-06, "loss": 1.0956, "step": 2366 }, { "epoch": 1.100883310088331, "grad_norm": 4.626169204711914, "learning_rate": 4.061798998946465e-06, "loss": 1.3175, "step": 2368 }, { "epoch": 1.1018131101813111, "grad_norm": 5.1949782371521, "learning_rate": 4.104815965793251e-06, "loss": 1.329, "step": 2370 }, { "epoch": 1.102742910274291, "grad_norm": 4.548689365386963, "learning_rate": 4.148039159670729e-06, "loss": 1.3004, "step": 2372 }, { "epoch": 1.103672710367271, "grad_norm": 4.81545352935791, "learning_rate": 4.191468153983426e-06, "loss": 1.2039, "step": 2374 }, { "epoch": 1.104602510460251, "grad_norm": 4.636904716491699, "learning_rate": 4.2351025201046934e-06, "loss": 1.3677, "step": 2376 }, { "epoch": 1.105532310553231, "grad_norm": 4.439990520477295, "learning_rate": 4.278941827380961e-06, "loss": 1.1609, "step": 2378 }, { "epoch": 1.1064621106462111, "grad_norm": 5.081040382385254, "learning_rate": 4.32298564313596e-06, "loss": 1.1282, "step": 2380 }, { "epoch": 1.107391910739191, "grad_norm": 5.129424571990967, "learning_rate": 4.36723353267502e-06, "loss": 1.3283, "step": 2382 }, { "epoch": 1.108321710832171, "grad_norm": 5.386360168457031, "learning_rate": 4.4116850592893276e-06, "loss": 1.2593, "step": 2384 }, { "epoch": 1.109251510925151, "grad_norm": 5.097179889678955, "learning_rate": 4.456339784260255e-06, "loss": 1.3093, "step": 2386 }, { "epoch": 1.1101813110181311, "grad_norm": 4.081212520599365, "learning_rate": 4.5011972668636986e-06, "loss": 1.1374, "step": 2388 }, { "epoch": 1.1111111111111112, "grad_norm": 4.627336025238037, "learning_rate": 4.5462570643744225e-06, "loss": 1.2064, "step": 2390 }, { "epoch": 1.1120409112040912, "grad_norm": 4.454087734222412, "learning_rate": 4.5915187320704085e-06, "loss": 1.0982, "step": 2392 }, { "epoch": 1.112970711297071, "grad_norm": 4.698849678039551, "learning_rate": 4.6369818232372705e-06, "loss": 1.193, "step": 2394 }, { "epoch": 1.113900511390051, "grad_norm": 4.189038276672363, "learning_rate": 4.682645889172665e-06, "loss": 1.163, "step": 2396 }, { "epoch": 1.1148303114830311, "grad_norm": 4.42707633972168, "learning_rate": 4.728510479190675e-06, "loss": 1.1888, "step": 2398 }, { "epoch": 1.1157601115760112, "grad_norm": 4.7200541496276855, "learning_rate": 4.774575140626321e-06, "loss": 1.2829, "step": 2400 }, { "epoch": 1.1166899116689912, "grad_norm": 4.346938133239746, "learning_rate": 4.8208394188399985e-06, "loss": 1.2324, "step": 2402 }, { "epoch": 1.1176197117619713, "grad_norm": 4.999886512756348, "learning_rate": 4.867302857221959e-06, "loss": 1.2703, "step": 2404 }, { "epoch": 1.118549511854951, "grad_norm": 5.047433853149414, "learning_rate": 4.913964997196816e-06, "loss": 1.1616, "step": 2406 }, { "epoch": 1.1194793119479312, "grad_norm": 5.050668716430664, "learning_rate": 4.960825378228093e-06, "loss": 1.2795, "step": 2408 }, { "epoch": 1.1204091120409112, "grad_norm": 4.264236927032471, "learning_rate": 5.0078835378227416e-06, "loss": 1.2222, "step": 2410 }, { "epoch": 1.1213389121338913, "grad_norm": 4.5412211418151855, "learning_rate": 5.055139011535735e-06, "loss": 1.1537, "step": 2412 }, { "epoch": 1.1222687122268713, "grad_norm": 4.9575324058532715, "learning_rate": 5.102591332974611e-06, "loss": 1.2975, "step": 2414 }, { "epoch": 1.1231985123198513, "grad_norm": 4.294879913330078, "learning_rate": 5.150240033804119e-06, "loss": 1.1513, "step": 2416 }, { "epoch": 1.1241283124128312, "grad_norm": 4.309078693389893, "learning_rate": 5.198084643750832e-06, "loss": 1.2861, "step": 2418 }, { "epoch": 1.1250581125058112, "grad_norm": 4.717819690704346, "learning_rate": 5.246124690607752e-06, "loss": 1.2656, "step": 2420 }, { "epoch": 1.1259879125987913, "grad_norm": 5.010506629943848, "learning_rate": 5.2943597002390086e-06, "loss": 1.2368, "step": 2422 }, { "epoch": 1.1269177126917713, "grad_norm": 5.501163482666016, "learning_rate": 5.342789196584534e-06, "loss": 1.1874, "step": 2424 }, { "epoch": 1.1278475127847514, "grad_norm": 5.373035430908203, "learning_rate": 5.3914127016647565e-06, "loss": 1.3012, "step": 2426 }, { "epoch": 1.1287773128777312, "grad_norm": 4.478996276855469, "learning_rate": 5.440229735585304e-06, "loss": 1.2135, "step": 2428 }, { "epoch": 1.1297071129707112, "grad_norm": 4.729777812957764, "learning_rate": 5.489239816541759e-06, "loss": 1.2976, "step": 2430 }, { "epoch": 1.1306369130636913, "grad_norm": 4.975554943084717, "learning_rate": 5.53844246082442e-06, "loss": 1.1488, "step": 2432 }, { "epoch": 1.1315667131566713, "grad_norm": 4.964880466461182, "learning_rate": 5.5878371828230455e-06, "loss": 1.2584, "step": 2434 }, { "epoch": 1.1324965132496514, "grad_norm": 4.955070972442627, "learning_rate": 5.637423495031661e-06, "loss": 1.372, "step": 2436 }, { "epoch": 1.1334263133426314, "grad_norm": 4.7306060791015625, "learning_rate": 5.6872009080533926e-06, "loss": 1.2678, "step": 2438 }, { "epoch": 1.1343561134356113, "grad_norm": 4.710080146789551, "learning_rate": 5.737168930605278e-06, "loss": 1.2363, "step": 2440 }, { "epoch": 1.1352859135285913, "grad_norm": 4.4780755043029785, "learning_rate": 5.787327069523091e-06, "loss": 1.1776, "step": 2442 }, { "epoch": 1.1362157136215714, "grad_norm": 4.809199810028076, "learning_rate": 5.837674829766265e-06, "loss": 1.2637, "step": 2444 }, { "epoch": 1.1371455137145514, "grad_norm": 4.881836414337158, "learning_rate": 5.888211714422717e-06, "loss": 1.2343, "step": 2446 }, { "epoch": 1.1380753138075315, "grad_norm": 5.239185810089111, "learning_rate": 5.938937224713813e-06, "loss": 1.2055, "step": 2448 }, { "epoch": 1.1390051139005113, "grad_norm": 5.130786895751953, "learning_rate": 5.989850859999234e-06, "loss": 1.4217, "step": 2450 }, { "epoch": 1.1399349139934913, "grad_norm": 4.354568958282471, "learning_rate": 6.040952117781957e-06, "loss": 1.2394, "step": 2452 }, { "epoch": 1.1408647140864714, "grad_norm": 5.11206579208374, "learning_rate": 6.09224049371321e-06, "loss": 1.2655, "step": 2454 }, { "epoch": 1.1417945141794514, "grad_norm": 5.1859917640686035, "learning_rate": 6.143715481597417e-06, "loss": 1.2814, "step": 2456 }, { "epoch": 1.1427243142724315, "grad_norm": 4.767697811126709, "learning_rate": 6.195376573397223e-06, "loss": 1.1031, "step": 2458 }, { "epoch": 1.1436541143654115, "grad_norm": 4.305976390838623, "learning_rate": 6.247223259238518e-06, "loss": 1.183, "step": 2460 }, { "epoch": 1.1445839144583914, "grad_norm": 4.58760404586792, "learning_rate": 6.299255027415448e-06, "loss": 1.3201, "step": 2462 }, { "epoch": 1.1455137145513714, "grad_norm": 4.729018688201904, "learning_rate": 6.351471364395455e-06, "loss": 1.1573, "step": 2464 }, { "epoch": 1.1464435146443515, "grad_norm": 4.5736165046691895, "learning_rate": 6.403871754824376e-06, "loss": 1.3802, "step": 2466 }, { "epoch": 1.1473733147373315, "grad_norm": 4.416676998138428, "learning_rate": 6.4564556815315255e-06, "loss": 1.2197, "step": 2468 }, { "epoch": 1.1483031148303116, "grad_norm": 4.385786056518555, "learning_rate": 6.509222625534769e-06, "loss": 1.2441, "step": 2470 }, { "epoch": 1.1492329149232914, "grad_norm": 4.97844934463501, "learning_rate": 6.562172066045659e-06, "loss": 1.2561, "step": 2472 }, { "epoch": 1.1501627150162714, "grad_norm": 4.542789936065674, "learning_rate": 6.615303480474606e-06, "loss": 1.1139, "step": 2474 }, { "epoch": 1.1510925151092515, "grad_norm": 5.214751243591309, "learning_rate": 6.668616344436006e-06, "loss": 1.2762, "step": 2476 }, { "epoch": 1.1520223152022315, "grad_norm": 4.677986145019531, "learning_rate": 6.7221101317534e-06, "loss": 1.3097, "step": 2478 }, { "epoch": 1.1529521152952116, "grad_norm": 4.739894390106201, "learning_rate": 6.77578431446472e-06, "loss": 1.3109, "step": 2480 }, { "epoch": 1.1538819153881916, "grad_norm": 4.717963695526123, "learning_rate": 6.829638362827434e-06, "loss": 1.2293, "step": 2482 }, { "epoch": 1.1548117154811715, "grad_norm": 4.74904203414917, "learning_rate": 6.8836717453238446e-06, "loss": 1.2207, "step": 2484 }, { "epoch": 1.1557415155741515, "grad_norm": 5.416865825653076, "learning_rate": 6.937883928666261e-06, "loss": 1.4103, "step": 2486 }, { "epoch": 1.1566713156671315, "grad_norm": 4.800599098205566, "learning_rate": 6.99227437780233e-06, "loss": 1.3148, "step": 2488 }, { "epoch": 1.1576011157601116, "grad_norm": 4.98972749710083, "learning_rate": 7.0468425559202805e-06, "loss": 1.3446, "step": 2490 }, { "epoch": 1.1585309158530916, "grad_norm": 4.694331169128418, "learning_rate": 7.101587924454235e-06, "loss": 1.246, "step": 2492 }, { "epoch": 1.1594607159460715, "grad_norm": 4.700960159301758, "learning_rate": 7.156509943089477e-06, "loss": 1.3226, "step": 2494 }, { "epoch": 1.1603905160390515, "grad_norm": 4.462610721588135, "learning_rate": 7.2116080697678735e-06, "loss": 1.1178, "step": 2496 }, { "epoch": 1.1613203161320316, "grad_norm": 4.224729537963867, "learning_rate": 7.2668817606931615e-06, "loss": 1.1758, "step": 2498 }, { "epoch": 1.1622501162250116, "grad_norm": 4.653846263885498, "learning_rate": 7.322330470336317e-06, "loss": 1.0774, "step": 2500 }, { "epoch": 1.1631799163179917, "grad_norm": 4.488913059234619, "learning_rate": 7.377953651440968e-06, "loss": 1.2536, "step": 2502 }, { "epoch": 1.1641097164109717, "grad_norm": 5.068818092346191, "learning_rate": 7.433750755028776e-06, "loss": 1.2893, "step": 2504 }, { "epoch": 1.1650395165039518, "grad_norm": 4.328025817871094, "learning_rate": 7.489721230404852e-06, "loss": 1.0814, "step": 2506 }, { "epoch": 1.1659693165969316, "grad_norm": 4.684333801269531, "learning_rate": 7.54586452516319e-06, "loss": 1.271, "step": 2508 }, { "epoch": 1.1668991166899116, "grad_norm": 4.398114204406738, "learning_rate": 7.6021800851921455e-06, "loss": 1.0893, "step": 2510 }, { "epoch": 1.1678289167828917, "grad_norm": 5.371665000915527, "learning_rate": 7.658667354679878e-06, "loss": 1.3174, "step": 2512 }, { "epoch": 1.1687587168758717, "grad_norm": 4.838919162750244, "learning_rate": 7.715325776119846e-06, "loss": 1.1343, "step": 2514 }, { "epoch": 1.1696885169688518, "grad_norm": 4.763671398162842, "learning_rate": 7.772154790316298e-06, "loss": 1.2702, "step": 2516 }, { "epoch": 1.1706183170618316, "grad_norm": 4.907008647918701, "learning_rate": 7.829153836389801e-06, "loss": 1.2247, "step": 2518 }, { "epoch": 1.1715481171548117, "grad_norm": 4.807231426239014, "learning_rate": 7.886322351782796e-06, "loss": 1.2784, "step": 2520 }, { "epoch": 1.1724779172477917, "grad_norm": 5.223166465759277, "learning_rate": 7.9436597722651e-06, "loss": 1.3266, "step": 2522 }, { "epoch": 1.1734077173407718, "grad_norm": 4.775054454803467, "learning_rate": 8.001165531939525e-06, "loss": 1.3545, "step": 2524 }, { "epoch": 1.1743375174337518, "grad_norm": 5.115091800689697, "learning_rate": 8.058839063247445e-06, "loss": 1.3679, "step": 2526 }, { "epoch": 1.1752673175267319, "grad_norm": 5.22568416595459, "learning_rate": 8.116679796974405e-06, "loss": 1.2929, "step": 2528 }, { "epoch": 1.1761971176197117, "grad_norm": 5.468040943145752, "learning_rate": 8.174687162255679e-06, "loss": 1.2713, "step": 2530 }, { "epoch": 1.1771269177126917, "grad_norm": 4.24010705947876, "learning_rate": 8.23286058658201e-06, "loss": 1.1126, "step": 2532 }, { "epoch": 1.1780567178056718, "grad_norm": 4.895482540130615, "learning_rate": 8.291199495805181e-06, "loss": 1.2516, "step": 2534 }, { "epoch": 1.1789865178986518, "grad_norm": 4.583105087280273, "learning_rate": 8.349703314143721e-06, "loss": 1.1359, "step": 2536 }, { "epoch": 1.1799163179916319, "grad_norm": 5.676013946533203, "learning_rate": 8.408371464188544e-06, "loss": 1.3681, "step": 2538 }, { "epoch": 1.1808461180846117, "grad_norm": 5.650251388549805, "learning_rate": 8.467203366908712e-06, "loss": 1.2338, "step": 2540 }, { "epoch": 1.1817759181775918, "grad_norm": 4.522214412689209, "learning_rate": 8.526198441657091e-06, "loss": 1.2226, "step": 2542 }, { "epoch": 1.1827057182705718, "grad_norm": 4.770697116851807, "learning_rate": 8.585356106176102e-06, "loss": 1.0644, "step": 2544 }, { "epoch": 1.1836355183635519, "grad_norm": 4.412100791931152, "learning_rate": 8.644675776603483e-06, "loss": 1.1793, "step": 2546 }, { "epoch": 1.184565318456532, "grad_norm": 4.551174163818359, "learning_rate": 8.704156867478037e-06, "loss": 1.245, "step": 2548 }, { "epoch": 1.185495118549512, "grad_norm": 4.72280216217041, "learning_rate": 8.763798791745426e-06, "loss": 1.2704, "step": 2550 }, { "epoch": 1.1864249186424918, "grad_norm": 5.146851539611816, "learning_rate": 8.823600960763908e-06, "loss": 1.1661, "step": 2552 }, { "epoch": 1.1873547187354718, "grad_norm": 4.759897708892822, "learning_rate": 8.883562784310229e-06, "loss": 1.2653, "step": 2554 }, { "epoch": 1.1882845188284519, "grad_norm": 5.416276454925537, "learning_rate": 8.943683670585399e-06, "loss": 1.3188, "step": 2556 }, { "epoch": 1.189214318921432, "grad_norm": 4.354958534240723, "learning_rate": 9.003963026220561e-06, "loss": 1.2347, "step": 2558 }, { "epoch": 1.190144119014412, "grad_norm": 4.965172290802002, "learning_rate": 9.064400256282779e-06, "loss": 1.2837, "step": 2560 }, { "epoch": 1.1910739191073918, "grad_norm": 4.95180606842041, "learning_rate": 9.124994764281e-06, "loss": 1.2356, "step": 2562 }, { "epoch": 1.1920037192003718, "grad_norm": 4.674472808837891, "learning_rate": 9.1857459521719e-06, "loss": 1.1323, "step": 2564 }, { "epoch": 1.192933519293352, "grad_norm": 4.737056732177734, "learning_rate": 9.246653220365778e-06, "loss": 1.1576, "step": 2566 }, { "epoch": 1.193863319386332, "grad_norm": 4.7623138427734375, "learning_rate": 9.30771596773249e-06, "loss": 1.1704, "step": 2568 }, { "epoch": 1.194793119479312, "grad_norm": 4.867915630340576, "learning_rate": 9.368933591607385e-06, "loss": 1.3133, "step": 2570 }, { "epoch": 1.195722919572292, "grad_norm": 4.811182498931885, "learning_rate": 9.430305487797198e-06, "loss": 1.1643, "step": 2572 }, { "epoch": 1.196652719665272, "grad_norm": 4.959022521972656, "learning_rate": 9.491831050586106e-06, "loss": 1.227, "step": 2574 }, { "epoch": 1.197582519758252, "grad_norm": 4.75521183013916, "learning_rate": 9.553509672741643e-06, "loss": 1.2824, "step": 2576 }, { "epoch": 1.198512319851232, "grad_norm": 5.0314435958862305, "learning_rate": 9.615340745520713e-06, "loss": 1.2591, "step": 2578 }, { "epoch": 1.199442119944212, "grad_norm": 4.401142120361328, "learning_rate": 9.6773236586756e-06, "loss": 1.1737, "step": 2580 }, { "epoch": 1.200371920037192, "grad_norm": 4.4815826416015625, "learning_rate": 9.739457800459946e-06, "loss": 1.1468, "step": 2582 }, { "epoch": 1.201301720130172, "grad_norm": 4.831883907318115, "learning_rate": 9.80174255763487e-06, "loss": 1.3159, "step": 2584 }, { "epoch": 1.202231520223152, "grad_norm": 5.470362186431885, "learning_rate": 9.864177315474983e-06, "loss": 1.2516, "step": 2586 }, { "epoch": 1.203161320316132, "grad_norm": 5.147711753845215, "learning_rate": 9.926761457774399e-06, "loss": 1.0966, "step": 2588 }, { "epoch": 1.204091120409112, "grad_norm": 5.1059675216674805, "learning_rate": 9.989494366852909e-06, "loss": 1.1542, "step": 2590 }, { "epoch": 1.205020920502092, "grad_norm": 4.474038124084473, "learning_rate": 1.0052375423562036e-05, "loss": 1.1934, "step": 2592 }, { "epoch": 1.2059507205950721, "grad_norm": 4.888648986816406, "learning_rate": 1.0115404007291147e-05, "loss": 1.284, "step": 2594 }, { "epoch": 1.2068805206880522, "grad_norm": 4.5540924072265625, "learning_rate": 1.0178579495973537e-05, "loss": 1.1828, "step": 2596 }, { "epoch": 1.207810320781032, "grad_norm": 5.169167995452881, "learning_rate": 1.024190126609265e-05, "loss": 1.2797, "step": 2598 }, { "epoch": 1.208740120874012, "grad_norm": 4.976434707641602, "learning_rate": 1.0305368692688181e-05, "loss": 1.2636, "step": 2600 }, { "epoch": 1.209669920966992, "grad_norm": 4.925263404846191, "learning_rate": 1.0368981149362253e-05, "loss": 1.1935, "step": 2602 }, { "epoch": 1.2105997210599722, "grad_norm": 4.6420440673828125, "learning_rate": 1.0432738008285597e-05, "loss": 1.1716, "step": 2604 }, { "epoch": 1.2115295211529522, "grad_norm": 5.2540602684021, "learning_rate": 1.0496638640203777e-05, "loss": 1.3119, "step": 2606 }, { "epoch": 1.212459321245932, "grad_norm": 5.016450881958008, "learning_rate": 1.0560682414443319e-05, "loss": 1.3822, "step": 2608 }, { "epoch": 1.213389121338912, "grad_norm": 4.988767623901367, "learning_rate": 1.0624868698918042e-05, "loss": 1.1119, "step": 2610 }, { "epoch": 1.2143189214318921, "grad_norm": 3.9508097171783447, "learning_rate": 1.068919686013525e-05, "loss": 1.0356, "step": 2612 }, { "epoch": 1.2152487215248722, "grad_norm": 5.096374988555908, "learning_rate": 1.0753666263201906e-05, "loss": 1.127, "step": 2614 }, { "epoch": 1.2161785216178522, "grad_norm": 4.83661413192749, "learning_rate": 1.0818276271831103e-05, "loss": 1.2382, "step": 2616 }, { "epoch": 1.2171083217108323, "grad_norm": 5.144566059112549, "learning_rate": 1.0883026248348085e-05, "loss": 1.2669, "step": 2618 }, { "epoch": 1.218038121803812, "grad_norm": 5.1389851570129395, "learning_rate": 1.094791555369674e-05, "loss": 1.3741, "step": 2620 }, { "epoch": 1.2189679218967922, "grad_norm": 5.356754779815674, "learning_rate": 1.1012943547445847e-05, "loss": 1.1771, "step": 2622 }, { "epoch": 1.2198977219897722, "grad_norm": 4.921077251434326, "learning_rate": 1.10781095877953e-05, "loss": 1.3711, "step": 2624 }, { "epoch": 1.2208275220827522, "grad_norm": 4.654454708099365, "learning_rate": 1.1143413031582663e-05, "loss": 1.3082, "step": 2626 }, { "epoch": 1.2217573221757323, "grad_norm": 4.913512706756592, "learning_rate": 1.1208853234289256e-05, "loss": 1.1967, "step": 2628 }, { "epoch": 1.2226871222687121, "grad_norm": 4.937932968139648, "learning_rate": 1.127442955004671e-05, "loss": 1.3873, "step": 2630 }, { "epoch": 1.2236169223616922, "grad_norm": 5.086702346801758, "learning_rate": 1.134014133164329e-05, "loss": 1.2917, "step": 2632 }, { "epoch": 1.2245467224546722, "grad_norm": 4.595028877258301, "learning_rate": 1.1405987930530201e-05, "loss": 1.1613, "step": 2634 }, { "epoch": 1.2254765225476523, "grad_norm": 4.369033336639404, "learning_rate": 1.1471968696828114e-05, "loss": 1.0997, "step": 2636 }, { "epoch": 1.2264063226406323, "grad_norm": 5.513169288635254, "learning_rate": 1.15380829793335e-05, "loss": 1.4484, "step": 2638 }, { "epoch": 1.2273361227336124, "grad_norm": 5.168580055236816, "learning_rate": 1.1604330125525082e-05, "loss": 1.3133, "step": 2640 }, { "epoch": 1.2282659228265922, "grad_norm": 4.803833961486816, "learning_rate": 1.1670709481570299e-05, "loss": 1.1795, "step": 2642 }, { "epoch": 1.2291957229195722, "grad_norm": 5.185527801513672, "learning_rate": 1.1737220392331658e-05, "loss": 1.1877, "step": 2644 }, { "epoch": 1.2301255230125523, "grad_norm": 5.1975297927856445, "learning_rate": 1.1803862201373344e-05, "loss": 1.185, "step": 2646 }, { "epoch": 1.2310553231055323, "grad_norm": 4.879311561584473, "learning_rate": 1.1870634250967606e-05, "loss": 1.2836, "step": 2648 }, { "epoch": 1.2319851231985124, "grad_norm": 4.802438735961914, "learning_rate": 1.1937535882101293e-05, "loss": 1.182, "step": 2650 }, { "epoch": 1.2329149232914922, "grad_norm": 5.253076553344727, "learning_rate": 1.2004566434482254e-05, "loss": 1.1891, "step": 2652 }, { "epoch": 1.2338447233844723, "grad_norm": 4.850290775299072, "learning_rate": 1.2071725246546076e-05, "loss": 1.1617, "step": 2654 }, { "epoch": 1.2347745234774523, "grad_norm": 4.284195423126221, "learning_rate": 1.2139011655462359e-05, "loss": 1.1079, "step": 2656 }, { "epoch": 1.2357043235704324, "grad_norm": 5.430441379547119, "learning_rate": 1.2206424997141376e-05, "loss": 1.191, "step": 2658 }, { "epoch": 1.2366341236634124, "grad_norm": 4.907715320587158, "learning_rate": 1.2273964606240733e-05, "loss": 1.3447, "step": 2660 }, { "epoch": 1.2375639237563925, "grad_norm": 4.860969543457031, "learning_rate": 1.234162981617168e-05, "loss": 1.2024, "step": 2662 }, { "epoch": 1.2384937238493725, "grad_norm": 4.921666145324707, "learning_rate": 1.2409419959105986e-05, "loss": 1.1592, "step": 2664 }, { "epoch": 1.2394235239423523, "grad_norm": 4.997951507568359, "learning_rate": 1.2477334365982275e-05, "loss": 1.245, "step": 2666 }, { "epoch": 1.2403533240353324, "grad_norm": 5.211683750152588, "learning_rate": 1.254537236651273e-05, "loss": 1.4251, "step": 2668 }, { "epoch": 1.2412831241283124, "grad_norm": 5.16604471206665, "learning_rate": 1.2613533289189829e-05, "loss": 1.2516, "step": 2670 }, { "epoch": 1.2422129242212925, "grad_norm": 5.529092788696289, "learning_rate": 1.2681816461292723e-05, "loss": 1.2189, "step": 2672 }, { "epoch": 1.2431427243142723, "grad_norm": 4.743631839752197, "learning_rate": 1.2750221208894094e-05, "loss": 1.1054, "step": 2674 }, { "epoch": 1.2440725244072524, "grad_norm": 5.187104225158691, "learning_rate": 1.2818746856866697e-05, "loss": 1.2687, "step": 2676 }, { "epoch": 1.2450023245002324, "grad_norm": 4.995729923248291, "learning_rate": 1.2887392728890072e-05, "loss": 1.2572, "step": 2678 }, { "epoch": 1.2459321245932125, "grad_norm": 4.892362117767334, "learning_rate": 1.2956158147457137e-05, "loss": 1.1379, "step": 2680 }, { "epoch": 1.2468619246861925, "grad_norm": 4.721160888671875, "learning_rate": 1.3025042433880989e-05, "loss": 1.174, "step": 2682 }, { "epoch": 1.2477917247791726, "grad_norm": 5.127843856811523, "learning_rate": 1.3094044908301533e-05, "loss": 1.1658, "step": 2684 }, { "epoch": 1.2487215248721526, "grad_norm": 4.93134069442749, "learning_rate": 1.3163164889692203e-05, "loss": 1.2105, "step": 2686 }, { "epoch": 1.2496513249651324, "grad_norm": 4.924200534820557, "learning_rate": 1.3232401695866707e-05, "loss": 1.1758, "step": 2688 }, { "epoch": 1.2505811250581125, "grad_norm": 5.115179538726807, "learning_rate": 1.3301754643485681e-05, "loss": 1.2171, "step": 2690 }, { "epoch": 1.2515109251510925, "grad_norm": 5.592020511627197, "learning_rate": 1.3371223048063553e-05, "loss": 1.4001, "step": 2692 }, { "epoch": 1.2524407252440726, "grad_norm": 4.881713390350342, "learning_rate": 1.344080622397523e-05, "loss": 1.0937, "step": 2694 }, { "epoch": 1.2533705253370524, "grad_norm": 4.971542835235596, "learning_rate": 1.3510503484462802e-05, "loss": 1.2038, "step": 2696 }, { "epoch": 1.2543003254300324, "grad_norm": 4.963320732116699, "learning_rate": 1.358031414164253e-05, "loss": 1.2802, "step": 2698 }, { "epoch": 1.2552301255230125, "grad_norm": 5.0115580558776855, "learning_rate": 1.3650237506511343e-05, "loss": 1.1964, "step": 2700 }, { "epoch": 1.2561599256159925, "grad_norm": 5.132814407348633, "learning_rate": 1.3720272888953885e-05, "loss": 1.295, "step": 2702 }, { "epoch": 1.2570897257089726, "grad_norm": 5.002631664276123, "learning_rate": 1.3790419597749221e-05, "loss": 1.2003, "step": 2704 }, { "epoch": 1.2580195258019526, "grad_norm": 5.426677227020264, "learning_rate": 1.3860676940577596e-05, "loss": 1.1678, "step": 2706 }, { "epoch": 1.2589493258949327, "grad_norm": 4.606490612030029, "learning_rate": 1.3931044224027483e-05, "loss": 1.2411, "step": 2708 }, { "epoch": 1.2598791259879125, "grad_norm": 5.140821933746338, "learning_rate": 1.400152075360214e-05, "loss": 1.2058, "step": 2710 }, { "epoch": 1.2608089260808926, "grad_norm": 5.464770317077637, "learning_rate": 1.4072105833726694e-05, "loss": 1.4083, "step": 2712 }, { "epoch": 1.2617387261738726, "grad_norm": 5.324878692626953, "learning_rate": 1.4142798767754918e-05, "loss": 1.2275, "step": 2714 }, { "epoch": 1.2626685262668527, "grad_norm": 4.6957011222839355, "learning_rate": 1.4213598857976056e-05, "loss": 1.2021, "step": 2716 }, { "epoch": 1.2635983263598327, "grad_norm": 5.21569299697876, "learning_rate": 1.4284505405621817e-05, "loss": 1.2732, "step": 2718 }, { "epoch": 1.2645281264528125, "grad_norm": 4.493168830871582, "learning_rate": 1.4355517710873206e-05, "loss": 1.0952, "step": 2720 }, { "epoch": 1.2654579265457926, "grad_norm": 5.005002498626709, "learning_rate": 1.4426635072867457e-05, "loss": 1.3123, "step": 2722 }, { "epoch": 1.2663877266387726, "grad_norm": 4.6291961669921875, "learning_rate": 1.4497856789704856e-05, "loss": 1.1761, "step": 2724 }, { "epoch": 1.2673175267317527, "grad_norm": 4.2360639572143555, "learning_rate": 1.4569182158455895e-05, "loss": 1.1688, "step": 2726 }, { "epoch": 1.2682473268247327, "grad_norm": 4.3340163230896, "learning_rate": 1.4640610475167918e-05, "loss": 0.9594, "step": 2728 }, { "epoch": 1.2691771269177128, "grad_norm": 4.878227710723877, "learning_rate": 1.4712141034872299e-05, "loss": 1.1686, "step": 2730 }, { "epoch": 1.2701069270106928, "grad_norm": 4.710108757019043, "learning_rate": 1.4783773131591307e-05, "loss": 1.0983, "step": 2732 }, { "epoch": 1.2710367271036727, "grad_norm": 5.1245269775390625, "learning_rate": 1.4855506058345008e-05, "loss": 1.1754, "step": 2734 }, { "epoch": 1.2719665271966527, "grad_norm": 4.623619556427002, "learning_rate": 1.4927339107158459e-05, "loss": 1.0983, "step": 2736 }, { "epoch": 1.2728963272896328, "grad_norm": 5.210716247558594, "learning_rate": 1.4999271569068424e-05, "loss": 1.2225, "step": 2738 }, { "epoch": 1.2738261273826128, "grad_norm": 5.101555347442627, "learning_rate": 1.5071302734130497e-05, "loss": 1.214, "step": 2740 }, { "epoch": 1.2747559274755926, "grad_norm": 6.030285358428955, "learning_rate": 1.5143431891426255e-05, "loss": 1.1247, "step": 2742 }, { "epoch": 1.2756857275685727, "grad_norm": 5.349708080291748, "learning_rate": 1.5215658329069965e-05, "loss": 1.0705, "step": 2744 }, { "epoch": 1.2766155276615527, "grad_norm": 5.331021785736084, "learning_rate": 1.5287981334215868e-05, "loss": 1.1448, "step": 2746 }, { "epoch": 1.2775453277545328, "grad_norm": 4.90387487411499, "learning_rate": 1.53604001930651e-05, "loss": 1.143, "step": 2748 }, { "epoch": 1.2784751278475128, "grad_norm": 5.752413749694824, "learning_rate": 1.5432914190872767e-05, "loss": 1.3571, "step": 2750 }, { "epoch": 1.2794049279404929, "grad_norm": 4.490790367126465, "learning_rate": 1.5505522611954998e-05, "loss": 1.2342, "step": 2752 }, { "epoch": 1.280334728033473, "grad_norm": 4.604039669036865, "learning_rate": 1.557822473969595e-05, "loss": 1.2435, "step": 2754 }, { "epoch": 1.2812645281264528, "grad_norm": 4.845996856689453, "learning_rate": 1.5651019856555002e-05, "loss": 1.1681, "step": 2756 }, { "epoch": 1.2821943282194328, "grad_norm": 5.0998077392578125, "learning_rate": 1.5723907244073736e-05, "loss": 1.2574, "step": 2758 }, { "epoch": 1.2831241283124128, "grad_norm": 4.535227298736572, "learning_rate": 1.5796886182883076e-05, "loss": 1.1199, "step": 2760 }, { "epoch": 1.284053928405393, "grad_norm": 4.654929161071777, "learning_rate": 1.5869955952710322e-05, "loss": 1.1008, "step": 2762 }, { "epoch": 1.2849837284983727, "grad_norm": 4.271506309509277, "learning_rate": 1.594311583238637e-05, "loss": 1.0629, "step": 2764 }, { "epoch": 1.2859135285913528, "grad_norm": 4.679157257080078, "learning_rate": 1.6016365099852756e-05, "loss": 1.0621, "step": 2766 }, { "epoch": 1.2868433286843328, "grad_norm": 5.480685234069824, "learning_rate": 1.6089703032168723e-05, "loss": 1.1196, "step": 2768 }, { "epoch": 1.2877731287773129, "grad_norm": 5.479466915130615, "learning_rate": 1.6163128905518585e-05, "loss": 1.1564, "step": 2770 }, { "epoch": 1.288702928870293, "grad_norm": 5.298022747039795, "learning_rate": 1.623664199521851e-05, "loss": 1.1802, "step": 2772 }, { "epoch": 1.289632728963273, "grad_norm": 5.20311164855957, "learning_rate": 1.6310241575724073e-05, "loss": 1.1606, "step": 2774 }, { "epoch": 1.290562529056253, "grad_norm": 5.320673942565918, "learning_rate": 1.638392692063709e-05, "loss": 1.2223, "step": 2776 }, { "epoch": 1.2914923291492328, "grad_norm": 5.155202865600586, "learning_rate": 1.6457697302712904e-05, "loss": 1.2534, "step": 2778 }, { "epoch": 1.292422129242213, "grad_norm": 5.058764934539795, "learning_rate": 1.653155199386772e-05, "loss": 1.2933, "step": 2780 }, { "epoch": 1.293351929335193, "grad_norm": 4.745816707611084, "learning_rate": 1.6605490265185485e-05, "loss": 1.0491, "step": 2782 }, { "epoch": 1.294281729428173, "grad_norm": 5.189685344696045, "learning_rate": 1.6679511386925326e-05, "loss": 1.2583, "step": 2784 }, { "epoch": 1.2952115295211528, "grad_norm": 4.791103839874268, "learning_rate": 1.6753614628528686e-05, "loss": 1.1993, "step": 2786 }, { "epoch": 1.2961413296141329, "grad_norm": 5.287075996398926, "learning_rate": 1.682779925862644e-05, "loss": 1.2078, "step": 2788 }, { "epoch": 1.297071129707113, "grad_norm": 4.514937400817871, "learning_rate": 1.690206454504626e-05, "loss": 1.0436, "step": 2790 }, { "epoch": 1.298000929800093, "grad_norm": 4.465831756591797, "learning_rate": 1.697640975481975e-05, "loss": 1.2318, "step": 2792 }, { "epoch": 1.298930729893073, "grad_norm": 4.77617883682251, "learning_rate": 1.7050834154189706e-05, "loss": 1.0492, "step": 2794 }, { "epoch": 1.299860529986053, "grad_norm": 5.134628772735596, "learning_rate": 1.7125337008617355e-05, "loss": 1.3317, "step": 2796 }, { "epoch": 1.300790330079033, "grad_norm": 5.154414653778076, "learning_rate": 1.7199917582789626e-05, "loss": 1.3338, "step": 2798 }, { "epoch": 1.301720130172013, "grad_norm": 5.106240272521973, "learning_rate": 1.7274575140626308e-05, "loss": 1.1777, "step": 2800 }, { "epoch": 1.302649930264993, "grad_norm": 4.867135524749756, "learning_rate": 1.7349308945287473e-05, "loss": 1.1765, "step": 2802 }, { "epoch": 1.303579730357973, "grad_norm": 4.9918599128723145, "learning_rate": 1.7424118259180665e-05, "loss": 1.0735, "step": 2804 }, { "epoch": 1.304509530450953, "grad_norm": 4.9455718994140625, "learning_rate": 1.749900234396808e-05, "loss": 1.0456, "step": 2806 }, { "epoch": 1.3054393305439331, "grad_norm": 4.706122398376465, "learning_rate": 1.7573960460574126e-05, "loss": 1.1656, "step": 2808 }, { "epoch": 1.306369130636913, "grad_norm": 5.768701553344727, "learning_rate": 1.764899186919242e-05, "loss": 1.1558, "step": 2810 }, { "epoch": 1.307298930729893, "grad_norm": 5.282050609588623, "learning_rate": 1.7724095829293196e-05, "loss": 1.2629, "step": 2812 }, { "epoch": 1.308228730822873, "grad_norm": 5.139875411987305, "learning_rate": 1.7799271599630776e-05, "loss": 1.243, "step": 2814 }, { "epoch": 1.309158530915853, "grad_norm": 4.897935390472412, "learning_rate": 1.7874518438250577e-05, "loss": 1.1029, "step": 2816 }, { "epoch": 1.3100883310088332, "grad_norm": 5.10130500793457, "learning_rate": 1.7949835602496763e-05, "loss": 1.1738, "step": 2818 }, { "epoch": 1.3110181311018132, "grad_norm": 4.790376663208008, "learning_rate": 1.802522234901926e-05, "loss": 1.1546, "step": 2820 }, { "epoch": 1.3119479311947932, "grad_norm": 5.686679840087891, "learning_rate": 1.8100677933781347e-05, "loss": 1.3262, "step": 2822 }, { "epoch": 1.312877731287773, "grad_norm": 5.34269380569458, "learning_rate": 1.8176201612066875e-05, "loss": 1.2405, "step": 2824 }, { "epoch": 1.3138075313807531, "grad_norm": 5.101320266723633, "learning_rate": 1.8251792638487593e-05, "loss": 1.1207, "step": 2826 }, { "epoch": 1.3147373314737332, "grad_norm": 5.8089070320129395, "learning_rate": 1.832745026699061e-05, "loss": 1.2189, "step": 2828 }, { "epoch": 1.3156671315667132, "grad_norm": 5.589970588684082, "learning_rate": 1.8403173750865675e-05, "loss": 1.1766, "step": 2830 }, { "epoch": 1.316596931659693, "grad_norm": 5.040368556976318, "learning_rate": 1.847896234275259e-05, "loss": 1.0912, "step": 2832 }, { "epoch": 1.317526731752673, "grad_norm": 5.040251731872559, "learning_rate": 1.8554815294648513e-05, "loss": 1.1635, "step": 2834 }, { "epoch": 1.3184565318456531, "grad_norm": 5.18376350402832, "learning_rate": 1.863073185791545e-05, "loss": 1.2248, "step": 2836 }, { "epoch": 1.3193863319386332, "grad_norm": 4.9214935302734375, "learning_rate": 1.8706711283287596e-05, "loss": 1.1706, "step": 2838 }, { "epoch": 1.3203161320316132, "grad_norm": 4.849795818328857, "learning_rate": 1.8782752820878628e-05, "loss": 1.0433, "step": 2840 }, { "epoch": 1.3212459321245933, "grad_norm": 4.87441349029541, "learning_rate": 1.8858855720189357e-05, "loss": 1.0761, "step": 2842 }, { "epoch": 1.3221757322175733, "grad_norm": 4.859393119812012, "learning_rate": 1.8935019230114802e-05, "loss": 1.1425, "step": 2844 }, { "epoch": 1.3231055323105532, "grad_norm": 5.174680709838867, "learning_rate": 1.901124259895196e-05, "loss": 1.0953, "step": 2846 }, { "epoch": 1.3240353324035332, "grad_norm": 5.521208763122559, "learning_rate": 1.9087525074406903e-05, "loss": 1.3356, "step": 2848 }, { "epoch": 1.3249651324965133, "grad_norm": 5.828486442565918, "learning_rate": 1.916386590360236e-05, "loss": 1.3567, "step": 2850 }, { "epoch": 1.3258949325894933, "grad_norm": 4.975980758666992, "learning_rate": 1.924026433308525e-05, "loss": 1.1193, "step": 2852 }, { "epoch": 1.3268247326824731, "grad_norm": 5.088500499725342, "learning_rate": 1.931671960883385e-05, "loss": 1.0585, "step": 2854 }, { "epoch": 1.3277545327754532, "grad_norm": 5.186136722564697, "learning_rate": 1.939323097626548e-05, "loss": 1.2541, "step": 2856 }, { "epoch": 1.3286843328684332, "grad_norm": 5.467367172241211, "learning_rate": 1.946979768024385e-05, "loss": 1.1389, "step": 2858 }, { "epoch": 1.3296141329614133, "grad_norm": 5.041043281555176, "learning_rate": 1.954641896508646e-05, "loss": 1.2798, "step": 2860 }, { "epoch": 1.3305439330543933, "grad_norm": 4.891330242156982, "learning_rate": 1.9623094074572183e-05, "loss": 1.1374, "step": 2862 }, { "epoch": 1.3314737331473734, "grad_norm": 5.071079730987549, "learning_rate": 1.9699822251948647e-05, "loss": 1.2487, "step": 2864 }, { "epoch": 1.3324035332403534, "grad_norm": 4.717793941497803, "learning_rate": 1.9776602739939717e-05, "loss": 1.1226, "step": 2866 }, { "epoch": 1.3333333333333333, "grad_norm": 5.083615303039551, "learning_rate": 1.9853434780752977e-05, "loss": 1.1046, "step": 2868 }, { "epoch": 1.3342631334263133, "grad_norm": 5.200608730316162, "learning_rate": 1.9930317616087216e-05, "loss": 1.1576, "step": 2870 }, { "epoch": 1.3351929335192934, "grad_norm": 4.673445224761963, "learning_rate": 2.0007250487139846e-05, "loss": 1.1488, "step": 2872 }, { "epoch": 1.3361227336122734, "grad_norm": 5.642797946929932, "learning_rate": 2.0084232634614517e-05, "loss": 1.1882, "step": 2874 }, { "epoch": 1.3370525337052532, "grad_norm": 5.819192409515381, "learning_rate": 2.0161263298728532e-05, "loss": 1.1774, "step": 2876 }, { "epoch": 1.3379823337982333, "grad_norm": 6.18271017074585, "learning_rate": 2.0238341719220264e-05, "loss": 1.1891, "step": 2878 }, { "epoch": 1.3389121338912133, "grad_norm": 5.071002960205078, "learning_rate": 2.0315467135356907e-05, "loss": 1.1132, "step": 2880 }, { "epoch": 1.3398419339841934, "grad_norm": 4.575469017028809, "learning_rate": 2.0392638785941705e-05, "loss": 1.024, "step": 2882 }, { "epoch": 1.3407717340771734, "grad_norm": 5.191923141479492, "learning_rate": 2.0469855909321574e-05, "loss": 1.2863, "step": 2884 }, { "epoch": 1.3417015341701535, "grad_norm": 4.922296047210693, "learning_rate": 2.0547117743394775e-05, "loss": 1.0818, "step": 2886 }, { "epoch": 1.3426313342631335, "grad_norm": 6.058284759521484, "learning_rate": 2.0624423525618098e-05, "loss": 1.1762, "step": 2888 }, { "epoch": 1.3435611343561136, "grad_norm": 5.041345596313477, "learning_rate": 2.0701772493014777e-05, "loss": 1.1999, "step": 2890 }, { "epoch": 1.3444909344490934, "grad_norm": 6.192108154296875, "learning_rate": 2.0779163882181665e-05, "loss": 1.3505, "step": 2892 }, { "epoch": 1.3454207345420734, "grad_norm": 4.918419361114502, "learning_rate": 2.0856596929297013e-05, "loss": 1.0981, "step": 2894 }, { "epoch": 1.3463505346350535, "grad_norm": 5.401242256164551, "learning_rate": 2.0934070870127935e-05, "loss": 1.1568, "step": 2896 }, { "epoch": 1.3472803347280335, "grad_norm": 4.95237398147583, "learning_rate": 2.101158494003786e-05, "loss": 1.185, "step": 2898 }, { "epoch": 1.3482101348210134, "grad_norm": 5.196049213409424, "learning_rate": 2.1089138373994237e-05, "loss": 1.1414, "step": 2900 }, { "epoch": 1.3491399349139934, "grad_norm": 5.7716474533081055, "learning_rate": 2.1166730406575988e-05, "loss": 1.4393, "step": 2902 }, { "epoch": 1.3500697350069735, "grad_norm": 5.66400671005249, "learning_rate": 2.1244360271981097e-05, "loss": 1.1905, "step": 2904 }, { "epoch": 1.3509995350999535, "grad_norm": 4.920319557189941, "learning_rate": 2.1322027204034086e-05, "loss": 1.0746, "step": 2906 }, { "epoch": 1.3519293351929336, "grad_norm": 4.577380657196045, "learning_rate": 2.1399730436193714e-05, "loss": 1.0323, "step": 2908 }, { "epoch": 1.3528591352859136, "grad_norm": 5.331800937652588, "learning_rate": 2.1477469201560452e-05, "loss": 1.197, "step": 2910 }, { "epoch": 1.3537889353788937, "grad_norm": 4.311004638671875, "learning_rate": 2.155524273288406e-05, "loss": 1.0878, "step": 2912 }, { "epoch": 1.3547187354718735, "grad_norm": 4.5656280517578125, "learning_rate": 2.1633050262571216e-05, "loss": 1.0828, "step": 2914 }, { "epoch": 1.3556485355648535, "grad_norm": 4.730776309967041, "learning_rate": 2.1710891022692942e-05, "loss": 1.037, "step": 2916 }, { "epoch": 1.3565783356578336, "grad_norm": 4.679433822631836, "learning_rate": 2.178876424499245e-05, "loss": 1.1127, "step": 2918 }, { "epoch": 1.3575081357508136, "grad_norm": 5.123257637023926, "learning_rate": 2.1866669160892435e-05, "loss": 1.1668, "step": 2920 }, { "epoch": 1.3584379358437935, "grad_norm": 5.06130838394165, "learning_rate": 2.194460500150278e-05, "loss": 1.0332, "step": 2922 }, { "epoch": 1.3593677359367735, "grad_norm": 4.774113178253174, "learning_rate": 2.2022570997628283e-05, "loss": 1.0132, "step": 2924 }, { "epoch": 1.3602975360297536, "grad_norm": 5.107748985290527, "learning_rate": 2.210056637977599e-05, "loss": 1.0496, "step": 2926 }, { "epoch": 1.3612273361227336, "grad_norm": 4.962146759033203, "learning_rate": 2.217859037816298e-05, "loss": 0.9626, "step": 2928 }, { "epoch": 1.3621571362157137, "grad_norm": 4.403737545013428, "learning_rate": 2.2256642222723906e-05, "loss": 1.032, "step": 2930 }, { "epoch": 1.3630869363086937, "grad_norm": 5.611242771148682, "learning_rate": 2.2334721143118514e-05, "loss": 1.2342, "step": 2932 }, { "epoch": 1.3640167364016738, "grad_norm": 4.959303379058838, "learning_rate": 2.2412826368739467e-05, "loss": 1.1437, "step": 2934 }, { "epoch": 1.3649465364946536, "grad_norm": 5.300534248352051, "learning_rate": 2.2490957128719654e-05, "loss": 1.0764, "step": 2936 }, { "epoch": 1.3658763365876336, "grad_norm": 5.047192096710205, "learning_rate": 2.256911265194004e-05, "loss": 1.1564, "step": 2938 }, { "epoch": 1.3668061366806137, "grad_norm": 5.239954471588135, "learning_rate": 2.264729216703716e-05, "loss": 1.0488, "step": 2940 }, { "epoch": 1.3677359367735937, "grad_norm": 4.805636405944824, "learning_rate": 2.2725494902410787e-05, "loss": 1.0878, "step": 2942 }, { "epoch": 1.3686657368665736, "grad_norm": 4.43528413772583, "learning_rate": 2.280372008623145e-05, "loss": 1.0113, "step": 2944 }, { "epoch": 1.3695955369595536, "grad_norm": 5.437524795532227, "learning_rate": 2.2881966946448187e-05, "loss": 1.1783, "step": 2946 }, { "epoch": 1.3705253370525337, "grad_norm": 5.100215435028076, "learning_rate": 2.2960234710796108e-05, "loss": 1.0287, "step": 2948 }, { "epoch": 1.3714551371455137, "grad_norm": 6.394347667694092, "learning_rate": 2.30385226068039e-05, "loss": 1.1048, "step": 2950 }, { "epoch": 1.3723849372384938, "grad_norm": 4.9175639152526855, "learning_rate": 2.311682986180172e-05, "loss": 1.0871, "step": 2952 }, { "epoch": 1.3733147373314738, "grad_norm": 4.925195693969727, "learning_rate": 2.3195155702928494e-05, "loss": 1.0186, "step": 2954 }, { "epoch": 1.3742445374244538, "grad_norm": 5.217380523681641, "learning_rate": 2.3273499357139883e-05, "loss": 1.137, "step": 2956 }, { "epoch": 1.3751743375174337, "grad_norm": 4.965904712677002, "learning_rate": 2.3351860051215593e-05, "loss": 1.187, "step": 2958 }, { "epoch": 1.3761041376104137, "grad_norm": 5.620102405548096, "learning_rate": 2.3430237011767174e-05, "loss": 1.2665, "step": 2960 }, { "epoch": 1.3770339377033938, "grad_norm": 4.79713773727417, "learning_rate": 2.3508629465245758e-05, "loss": 1.118, "step": 2962 }, { "epoch": 1.3779637377963738, "grad_norm": 4.932215690612793, "learning_rate": 2.358703663794941e-05, "loss": 1.0887, "step": 2964 }, { "epoch": 1.3788935378893539, "grad_norm": 4.671698570251465, "learning_rate": 2.366545775603101e-05, "loss": 1.1066, "step": 2966 }, { "epoch": 1.3798233379823337, "grad_norm": 4.693244934082031, "learning_rate": 2.37438920455058e-05, "loss": 1.1186, "step": 2968 }, { "epoch": 1.3807531380753137, "grad_norm": 5.177333354949951, "learning_rate": 2.382233873225897e-05, "loss": 1.0964, "step": 2970 }, { "epoch": 1.3816829381682938, "grad_norm": 4.88640022277832, "learning_rate": 2.390079704205341e-05, "loss": 1.1538, "step": 2972 }, { "epoch": 1.3826127382612738, "grad_norm": 4.982922554016113, "learning_rate": 2.3979266200537276e-05, "loss": 1.1851, "step": 2974 }, { "epoch": 1.383542538354254, "grad_norm": 5.2384538650512695, "learning_rate": 2.405774543325166e-05, "loss": 1.2449, "step": 2976 }, { "epoch": 1.384472338447234, "grad_norm": 4.8283796310424805, "learning_rate": 2.413623396563824e-05, "loss": 1.2045, "step": 2978 }, { "epoch": 1.385402138540214, "grad_norm": 5.76344108581543, "learning_rate": 2.4214731023046837e-05, "loss": 1.2523, "step": 2980 }, { "epoch": 1.3863319386331938, "grad_norm": 4.919862747192383, "learning_rate": 2.429323583074321e-05, "loss": 0.947, "step": 2982 }, { "epoch": 1.3872617387261739, "grad_norm": 4.769815444946289, "learning_rate": 2.43717476139166e-05, "loss": 1.1013, "step": 2984 }, { "epoch": 1.388191538819154, "grad_norm": 4.870710372924805, "learning_rate": 2.4450265597687426e-05, "loss": 1.0636, "step": 2986 }, { "epoch": 1.389121338912134, "grad_norm": 5.308908462524414, "learning_rate": 2.4528789007114817e-05, "loss": 1.0381, "step": 2988 }, { "epoch": 1.3900511390051138, "grad_norm": 5.188927173614502, "learning_rate": 2.4607317067204523e-05, "loss": 0.9891, "step": 2990 }, { "epoch": 1.3909809390980938, "grad_norm": 5.301298141479492, "learning_rate": 2.4685849002916244e-05, "loss": 1.2056, "step": 2992 }, { "epoch": 1.3919107391910739, "grad_norm": 4.843328952789307, "learning_rate": 2.4764384039171434e-05, "loss": 1.067, "step": 2994 }, { "epoch": 1.392840539284054, "grad_norm": 5.218242168426514, "learning_rate": 2.4842921400861076e-05, "loss": 1.1394, "step": 2996 }, { "epoch": 1.393770339377034, "grad_norm": 5.947934150695801, "learning_rate": 2.4921460312853028e-05, "loss": 1.2414, "step": 2998 }, { "epoch": 1.394700139470014, "grad_norm": 5.1968536376953125, "learning_rate": 2.5000000000000035e-05, "loss": 1.0295, "step": 3000 }, { "epoch": 1.394700139470014, "eval_cer": 0.6145233487289695, "eval_loss": 1.1089744567871094, "eval_runtime": 404.1444, "eval_samples_per_second": 31.41, "eval_steps_per_second": 0.982, "step": 3000 }, { "epoch": 1.395629939562994, "grad_norm": 5.149818420410156, "learning_rate": 2.507853968714704e-05, "loss": 0.9977, "step": 3002 }, { "epoch": 1.396559739655974, "grad_norm": 4.989321708679199, "learning_rate": 2.5157078599138997e-05, "loss": 1.2353, "step": 3004 }, { "epoch": 1.397489539748954, "grad_norm": 5.093796730041504, "learning_rate": 2.5235615960828643e-05, "loss": 1.1091, "step": 3006 }, { "epoch": 1.398419339841934, "grad_norm": 4.9392571449279785, "learning_rate": 2.531415099708385e-05, "loss": 1.1454, "step": 3008 }, { "epoch": 1.399349139934914, "grad_norm": 4.813900470733643, "learning_rate": 2.5392682932795544e-05, "loss": 1.0398, "step": 3010 }, { "epoch": 1.4002789400278939, "grad_norm": 4.479961395263672, "learning_rate": 2.547121099288523e-05, "loss": 1.0213, "step": 3012 }, { "epoch": 1.401208740120874, "grad_norm": 5.218460559844971, "learning_rate": 2.5549734402312674e-05, "loss": 1.0089, "step": 3014 }, { "epoch": 1.402138540213854, "grad_norm": 4.49621057510376, "learning_rate": 2.5628252386083477e-05, "loss": 0.9682, "step": 3016 }, { "epoch": 1.403068340306834, "grad_norm": 5.286466121673584, "learning_rate": 2.570676416925687e-05, "loss": 1.1304, "step": 3018 }, { "epoch": 1.403998140399814, "grad_norm": 5.234724998474121, "learning_rate": 2.5785268976953237e-05, "loss": 1.1129, "step": 3020 }, { "epoch": 1.4049279404927941, "grad_norm": 5.126001834869385, "learning_rate": 2.586376603436183e-05, "loss": 1.046, "step": 3022 }, { "epoch": 1.4058577405857742, "grad_norm": 5.563232421875, "learning_rate": 2.594225456674841e-05, "loss": 1.163, "step": 3024 }, { "epoch": 1.406787540678754, "grad_norm": 4.934899806976318, "learning_rate": 2.602073379946277e-05, "loss": 0.9063, "step": 3026 }, { "epoch": 1.407717340771734, "grad_norm": 5.015291690826416, "learning_rate": 2.6099202957946665e-05, "loss": 1.0925, "step": 3028 }, { "epoch": 1.408647140864714, "grad_norm": 6.031887054443359, "learning_rate": 2.6177661267741123e-05, "loss": 1.1456, "step": 3030 }, { "epoch": 1.4095769409576941, "grad_norm": 5.152888298034668, "learning_rate": 2.625610795449427e-05, "loss": 1.0067, "step": 3032 }, { "epoch": 1.410506741050674, "grad_norm": 5.335072040557861, "learning_rate": 2.6334542243969055e-05, "loss": 1.1896, "step": 3034 }, { "epoch": 1.411436541143654, "grad_norm": 4.910932540893555, "learning_rate": 2.6412963362050655e-05, "loss": 1.1583, "step": 3036 }, { "epoch": 1.412366341236634, "grad_norm": 4.646839618682861, "learning_rate": 2.64913705347543e-05, "loss": 1.0143, "step": 3038 }, { "epoch": 1.4132961413296141, "grad_norm": 5.369105815887451, "learning_rate": 2.6569762988232883e-05, "loss": 1.1172, "step": 3040 }, { "epoch": 1.4142259414225942, "grad_norm": 5.991391181945801, "learning_rate": 2.664813994878447e-05, "loss": 1.1845, "step": 3042 }, { "epoch": 1.4151557415155742, "grad_norm": 5.242664813995361, "learning_rate": 2.672650064286019e-05, "loss": 1.1201, "step": 3044 }, { "epoch": 1.4160855416085543, "grad_norm": 5.1294050216674805, "learning_rate": 2.6804844297071546e-05, "loss": 1.0976, "step": 3046 }, { "epoch": 1.417015341701534, "grad_norm": 5.853127479553223, "learning_rate": 2.6883170138198343e-05, "loss": 1.0407, "step": 3048 }, { "epoch": 1.4179451417945141, "grad_norm": 5.087705135345459, "learning_rate": 2.696147739319617e-05, "loss": 1.1361, "step": 3050 }, { "epoch": 1.4188749418874942, "grad_norm": 4.916391372680664, "learning_rate": 2.7039765289203983e-05, "loss": 1.1277, "step": 3052 }, { "epoch": 1.4198047419804742, "grad_norm": 4.580591201782227, "learning_rate": 2.7118033053551866e-05, "loss": 1.0484, "step": 3054 }, { "epoch": 1.4207345420734543, "grad_norm": 5.200559616088867, "learning_rate": 2.7196279913768608e-05, "loss": 1.1064, "step": 3056 }, { "epoch": 1.4216643421664341, "grad_norm": 5.027926921844482, "learning_rate": 2.7274505097589293e-05, "loss": 1.0466, "step": 3058 }, { "epoch": 1.4225941422594142, "grad_norm": 5.296930313110352, "learning_rate": 2.7352707832962875e-05, "loss": 1.1543, "step": 3060 }, { "epoch": 1.4235239423523942, "grad_norm": 5.199962139129639, "learning_rate": 2.743088734806002e-05, "loss": 1.0791, "step": 3062 }, { "epoch": 1.4244537424453743, "grad_norm": 5.3956170082092285, "learning_rate": 2.75090428712804e-05, "loss": 1.1714, "step": 3064 }, { "epoch": 1.4253835425383543, "grad_norm": 5.247348308563232, "learning_rate": 2.758717363126059e-05, "loss": 1.2049, "step": 3066 }, { "epoch": 1.4263133426313344, "grad_norm": 5.731723308563232, "learning_rate": 2.766527885688154e-05, "loss": 1.2186, "step": 3068 }, { "epoch": 1.4272431427243144, "grad_norm": 5.230646133422852, "learning_rate": 2.7743357777276147e-05, "loss": 1.1046, "step": 3070 }, { "epoch": 1.4281729428172942, "grad_norm": 5.530446529388428, "learning_rate": 2.7821409621837074e-05, "loss": 1.0216, "step": 3072 }, { "epoch": 1.4291027429102743, "grad_norm": 5.456628799438477, "learning_rate": 2.789943362022408e-05, "loss": 0.918, "step": 3074 }, { "epoch": 1.4300325430032543, "grad_norm": 5.341097831726074, "learning_rate": 2.7977429002371753e-05, "loss": 1.1011, "step": 3076 }, { "epoch": 1.4309623430962344, "grad_norm": 5.391219615936279, "learning_rate": 2.8055394998497267e-05, "loss": 1.1374, "step": 3078 }, { "epoch": 1.4318921431892142, "grad_norm": 5.5867509841918945, "learning_rate": 2.8133330839107632e-05, "loss": 1.1284, "step": 3080 }, { "epoch": 1.4328219432821943, "grad_norm": 4.960585594177246, "learning_rate": 2.8211235755007596e-05, "loss": 1.1829, "step": 3082 }, { "epoch": 1.4337517433751743, "grad_norm": 4.983640193939209, "learning_rate": 2.828910897730708e-05, "loss": 1.2394, "step": 3084 }, { "epoch": 1.4346815434681544, "grad_norm": 5.227949619293213, "learning_rate": 2.8366949737428827e-05, "loss": 1.006, "step": 3086 }, { "epoch": 1.4356113435611344, "grad_norm": 4.727424621582031, "learning_rate": 2.8444757267115985e-05, "loss": 1.1175, "step": 3088 }, { "epoch": 1.4365411436541144, "grad_norm": 5.240176200866699, "learning_rate": 2.8522530798439598e-05, "loss": 1.0487, "step": 3090 }, { "epoch": 1.4374709437470945, "grad_norm": 4.893446445465088, "learning_rate": 2.860026956380633e-05, "loss": 0.9622, "step": 3092 }, { "epoch": 1.4384007438400743, "grad_norm": 5.487582683563232, "learning_rate": 2.8677972795965957e-05, "loss": 1.075, "step": 3094 }, { "epoch": 1.4393305439330544, "grad_norm": 5.058816909790039, "learning_rate": 2.875563972801897e-05, "loss": 1.0264, "step": 3096 }, { "epoch": 1.4402603440260344, "grad_norm": 5.545629978179932, "learning_rate": 2.8833269593424028e-05, "loss": 1.0507, "step": 3098 }, { "epoch": 1.4411901441190145, "grad_norm": 5.647602081298828, "learning_rate": 2.8910861626005806e-05, "loss": 1.073, "step": 3100 }, { "epoch": 1.4421199442119943, "grad_norm": 5.051565647125244, "learning_rate": 2.8988415059962204e-05, "loss": 1.0907, "step": 3102 }, { "epoch": 1.4430497443049743, "grad_norm": 5.32147216796875, "learning_rate": 2.9065929129872118e-05, "loss": 1.052, "step": 3104 }, { "epoch": 1.4439795443979544, "grad_norm": 4.771831512451172, "learning_rate": 2.9143403070703034e-05, "loss": 1.0794, "step": 3106 }, { "epoch": 1.4449093444909344, "grad_norm": 4.781806468963623, "learning_rate": 2.9220836117818358e-05, "loss": 1.0563, "step": 3108 }, { "epoch": 1.4458391445839145, "grad_norm": 5.3230109214782715, "learning_rate": 2.9298227506985263e-05, "loss": 1.042, "step": 3110 }, { "epoch": 1.4467689446768945, "grad_norm": 4.5146050453186035, "learning_rate": 2.9375576474381942e-05, "loss": 1.1086, "step": 3112 }, { "epoch": 1.4476987447698746, "grad_norm": 4.788575172424316, "learning_rate": 2.9452882256605268e-05, "loss": 1.0949, "step": 3114 }, { "epoch": 1.4486285448628544, "grad_norm": 5.219273567199707, "learning_rate": 2.9530144090678462e-05, "loss": 1.1909, "step": 3116 }, { "epoch": 1.4495583449558345, "grad_norm": 5.269089698791504, "learning_rate": 2.960736121405836e-05, "loss": 1.0846, "step": 3118 }, { "epoch": 1.4504881450488145, "grad_norm": 5.024764060974121, "learning_rate": 2.9684532864643136e-05, "loss": 1.1, "step": 3120 }, { "epoch": 1.4514179451417946, "grad_norm": 5.369356632232666, "learning_rate": 2.9761658280779786e-05, "loss": 1.1118, "step": 3122 }, { "epoch": 1.4523477452347744, "grad_norm": 5.090078830718994, "learning_rate": 2.9838736701271545e-05, "loss": 1.0649, "step": 3124 }, { "epoch": 1.4532775453277544, "grad_norm": 5.536306858062744, "learning_rate": 2.991576736538553e-05, "loss": 1.0519, "step": 3126 }, { "epoch": 1.4542073454207345, "grad_norm": 4.661305904388428, "learning_rate": 2.99927495128602e-05, "loss": 0.9886, "step": 3128 }, { "epoch": 1.4551371455137145, "grad_norm": 4.952541351318359, "learning_rate": 3.0069682383912827e-05, "loss": 1.0513, "step": 3130 }, { "epoch": 1.4560669456066946, "grad_norm": 5.350820541381836, "learning_rate": 3.014656521924705e-05, "loss": 1.1904, "step": 3132 }, { "epoch": 1.4569967456996746, "grad_norm": 4.446784496307373, "learning_rate": 3.0223397260060322e-05, "loss": 0.8695, "step": 3134 }, { "epoch": 1.4579265457926547, "grad_norm": 5.544947147369385, "learning_rate": 3.0300177748051386e-05, "loss": 1.0838, "step": 3136 }, { "epoch": 1.4588563458856345, "grad_norm": 5.204753398895264, "learning_rate": 3.037690592542785e-05, "loss": 1.0744, "step": 3138 }, { "epoch": 1.4597861459786146, "grad_norm": 4.987783908843994, "learning_rate": 3.0453581034913598e-05, "loss": 1.014, "step": 3140 }, { "epoch": 1.4607159460715946, "grad_norm": 5.284018516540527, "learning_rate": 3.0530202319756184e-05, "loss": 1.1087, "step": 3142 }, { "epoch": 1.4616457461645747, "grad_norm": 5.258944511413574, "learning_rate": 3.060676902373455e-05, "loss": 1.035, "step": 3144 }, { "epoch": 1.4625755462575547, "grad_norm": 4.552896022796631, "learning_rate": 3.06832803911662e-05, "loss": 1.0169, "step": 3146 }, { "epoch": 1.4635053463505345, "grad_norm": 5.346978187561035, "learning_rate": 3.0759735666914785e-05, "loss": 1.0211, "step": 3148 }, { "epoch": 1.4644351464435146, "grad_norm": 5.458957672119141, "learning_rate": 3.0836134096397676e-05, "loss": 1.1412, "step": 3150 }, { "epoch": 1.4653649465364946, "grad_norm": 4.6493916511535645, "learning_rate": 3.091247492559313e-05, "loss": 1.0553, "step": 3152 }, { "epoch": 1.4662947466294747, "grad_norm": 5.0740156173706055, "learning_rate": 3.098875740104807e-05, "loss": 1.082, "step": 3154 }, { "epoch": 1.4672245467224547, "grad_norm": 5.248569965362549, "learning_rate": 3.10649807698852e-05, "loss": 1.1019, "step": 3156 }, { "epoch": 1.4681543468154348, "grad_norm": 4.60385799407959, "learning_rate": 3.114114427981067e-05, "loss": 1.0187, "step": 3158 }, { "epoch": 1.4690841469084148, "grad_norm": 5.039738655090332, "learning_rate": 3.121724717912139e-05, "loss": 1.0586, "step": 3160 }, { "epoch": 1.4700139470013946, "grad_norm": 4.566694259643555, "learning_rate": 3.1293288716712444e-05, "loss": 1.049, "step": 3162 }, { "epoch": 1.4709437470943747, "grad_norm": 5.481674671173096, "learning_rate": 3.136926814208456e-05, "loss": 1.0623, "step": 3164 }, { "epoch": 1.4718735471873547, "grad_norm": 4.920689582824707, "learning_rate": 3.144518470535149e-05, "loss": 1.0188, "step": 3166 }, { "epoch": 1.4728033472803348, "grad_norm": 5.155655384063721, "learning_rate": 3.1521037657247436e-05, "loss": 0.9985, "step": 3168 }, { "epoch": 1.4737331473733146, "grad_norm": 5.141946792602539, "learning_rate": 3.1596826249134304e-05, "loss": 0.9708, "step": 3170 }, { "epoch": 1.4746629474662947, "grad_norm": 5.406745910644531, "learning_rate": 3.167254973300938e-05, "loss": 1.1806, "step": 3172 }, { "epoch": 1.4755927475592747, "grad_norm": 4.879103660583496, "learning_rate": 3.1748207361512396e-05, "loss": 0.9859, "step": 3174 }, { "epoch": 1.4765225476522548, "grad_norm": 4.96028995513916, "learning_rate": 3.182379838793311e-05, "loss": 0.9563, "step": 3176 }, { "epoch": 1.4774523477452348, "grad_norm": 4.546140193939209, "learning_rate": 3.1899322066218645e-05, "loss": 0.9244, "step": 3178 }, { "epoch": 1.4783821478382149, "grad_norm": 5.219915866851807, "learning_rate": 3.197477765098071e-05, "loss": 0.9685, "step": 3180 }, { "epoch": 1.479311947931195, "grad_norm": 4.657711505889893, "learning_rate": 3.205016439750323e-05, "loss": 0.9697, "step": 3182 }, { "epoch": 1.4802417480241747, "grad_norm": 5.090729713439941, "learning_rate": 3.2125481561749415e-05, "loss": 1.0865, "step": 3184 }, { "epoch": 1.4811715481171548, "grad_norm": 5.89409875869751, "learning_rate": 3.220072840036922e-05, "loss": 1.0084, "step": 3186 }, { "epoch": 1.4821013482101348, "grad_norm": 4.5132880210876465, "learning_rate": 3.2275904170706797e-05, "loss": 1.0703, "step": 3188 }, { "epoch": 1.4830311483031149, "grad_norm": 5.714105606079102, "learning_rate": 3.2351008130807594e-05, "loss": 1.1391, "step": 3190 }, { "epoch": 1.4839609483960947, "grad_norm": 4.1493401527404785, "learning_rate": 3.242603953942587e-05, "loss": 0.9967, "step": 3192 }, { "epoch": 1.4848907484890748, "grad_norm": 5.2989420890808105, "learning_rate": 3.2500997656031915e-05, "loss": 1.1453, "step": 3194 }, { "epoch": 1.4858205485820548, "grad_norm": 4.4770426750183105, "learning_rate": 3.2575881740819334e-05, "loss": 0.8142, "step": 3196 }, { "epoch": 1.4867503486750349, "grad_norm": 5.259965896606445, "learning_rate": 3.265069105471252e-05, "loss": 1.0431, "step": 3198 }, { "epoch": 1.487680148768015, "grad_norm": 5.191216468811035, "learning_rate": 3.2725424859373684e-05, "loss": 0.9414, "step": 3200 }, { "epoch": 1.488609948860995, "grad_norm": 4.114098072052002, "learning_rate": 3.280008241721037e-05, "loss": 0.8857, "step": 3202 }, { "epoch": 1.489539748953975, "grad_norm": 4.737986087799072, "learning_rate": 3.287466299138261e-05, "loss": 1.0424, "step": 3204 }, { "epoch": 1.4904695490469548, "grad_norm": 4.887531757354736, "learning_rate": 3.294916584581028e-05, "loss": 1.0273, "step": 3206 }, { "epoch": 1.4913993491399349, "grad_norm": 6.15742301940918, "learning_rate": 3.3023590245180245e-05, "loss": 1.167, "step": 3208 }, { "epoch": 1.492329149232915, "grad_norm": 4.165219306945801, "learning_rate": 3.3097935454953745e-05, "loss": 0.8427, "step": 3210 }, { "epoch": 1.493258949325895, "grad_norm": 4.762811660766602, "learning_rate": 3.3172200741373584e-05, "loss": 1.0134, "step": 3212 }, { "epoch": 1.4941887494188748, "grad_norm": 4.703677177429199, "learning_rate": 3.324638537147132e-05, "loss": 1.0141, "step": 3214 }, { "epoch": 1.4951185495118549, "grad_norm": 4.322537899017334, "learning_rate": 3.3320488613074676e-05, "loss": 0.9141, "step": 3216 }, { "epoch": 1.496048349604835, "grad_norm": 4.899826526641846, "learning_rate": 3.3394509734814504e-05, "loss": 1.0674, "step": 3218 }, { "epoch": 1.496978149697815, "grad_norm": 5.298437118530273, "learning_rate": 3.346844800613229e-05, "loss": 0.92, "step": 3220 }, { "epoch": 1.497907949790795, "grad_norm": 5.369009494781494, "learning_rate": 3.3542302697287095e-05, "loss": 1.1185, "step": 3222 }, { "epoch": 1.498837749883775, "grad_norm": 5.017469882965088, "learning_rate": 3.3616073079362906e-05, "loss": 0.9589, "step": 3224 }, { "epoch": 1.499767549976755, "grad_norm": 4.888718128204346, "learning_rate": 3.368975842427592e-05, "loss": 0.8881, "step": 3226 }, { "epoch": 1.5006973500697351, "grad_norm": 5.29546594619751, "learning_rate": 3.376335800478147e-05, "loss": 1.0717, "step": 3228 }, { "epoch": 1.501627150162715, "grad_norm": 4.752091407775879, "learning_rate": 3.383687109448141e-05, "loss": 0.8848, "step": 3230 }, { "epoch": 1.502556950255695, "grad_norm": 5.550063610076904, "learning_rate": 3.3910296967831266e-05, "loss": 1.007, "step": 3232 }, { "epoch": 1.503486750348675, "grad_norm": 5.632531642913818, "learning_rate": 3.3983634900147264e-05, "loss": 1.001, "step": 3234 }, { "epoch": 1.504416550441655, "grad_norm": 5.5728678703308105, "learning_rate": 3.4056884167613636e-05, "loss": 1.0582, "step": 3236 }, { "epoch": 1.505346350534635, "grad_norm": 5.696204662322998, "learning_rate": 3.413004404728969e-05, "loss": 1.0385, "step": 3238 }, { "epoch": 1.506276150627615, "grad_norm": 5.017465591430664, "learning_rate": 3.4203113817116936e-05, "loss": 0.9531, "step": 3240 }, { "epoch": 1.507205950720595, "grad_norm": 5.693276405334473, "learning_rate": 3.427609275592626e-05, "loss": 0.8687, "step": 3242 }, { "epoch": 1.508135750813575, "grad_norm": 4.9389872550964355, "learning_rate": 3.434898014344501e-05, "loss": 1.0134, "step": 3244 }, { "epoch": 1.5090655509065551, "grad_norm": 5.384556770324707, "learning_rate": 3.442177526030406e-05, "loss": 1.109, "step": 3246 }, { "epoch": 1.5099953509995352, "grad_norm": 4.37221622467041, "learning_rate": 3.449447738804502e-05, "loss": 0.8834, "step": 3248 }, { "epoch": 1.5109251510925152, "grad_norm": 5.607419013977051, "learning_rate": 3.456708580912725e-05, "loss": 1.151, "step": 3250 }, { "epoch": 1.511854951185495, "grad_norm": 5.951454162597656, "learning_rate": 3.463959980693489e-05, "loss": 1.0981, "step": 3252 }, { "epoch": 1.5127847512784751, "grad_norm": 4.383877277374268, "learning_rate": 3.4712018665784144e-05, "loss": 0.8677, "step": 3254 }, { "epoch": 1.5137145513714552, "grad_norm": 4.402925491333008, "learning_rate": 3.4784341670930065e-05, "loss": 1.0733, "step": 3256 }, { "epoch": 1.514644351464435, "grad_norm": 4.927363395690918, "learning_rate": 3.485656810857375e-05, "loss": 1.0562, "step": 3258 }, { "epoch": 1.515574151557415, "grad_norm": 5.725814342498779, "learning_rate": 3.49286972658695e-05, "loss": 1.0257, "step": 3260 }, { "epoch": 1.516503951650395, "grad_norm": 4.768974781036377, "learning_rate": 3.500072843093158e-05, "loss": 1.0003, "step": 3262 }, { "epoch": 1.5174337517433751, "grad_norm": 4.772510051727295, "learning_rate": 3.507266089284154e-05, "loss": 1.0954, "step": 3264 }, { "epoch": 1.5183635518363552, "grad_norm": 5.08818244934082, "learning_rate": 3.514449394165499e-05, "loss": 0.9038, "step": 3266 }, { "epoch": 1.5192933519293352, "grad_norm": 5.1562700271606445, "learning_rate": 3.52162268684087e-05, "loss": 1.014, "step": 3268 }, { "epoch": 1.5202231520223153, "grad_norm": 4.9693603515625, "learning_rate": 3.5287858965127714e-05, "loss": 0.9474, "step": 3270 }, { "epoch": 1.5211529521152953, "grad_norm": 5.255979061126709, "learning_rate": 3.535938952483209e-05, "loss": 0.8783, "step": 3272 }, { "epoch": 1.5220827522082754, "grad_norm": 4.938564300537109, "learning_rate": 3.543081784154411e-05, "loss": 1.0849, "step": 3274 }, { "epoch": 1.5230125523012552, "grad_norm": 4.973381519317627, "learning_rate": 3.550214321029513e-05, "loss": 1.0605, "step": 3276 }, { "epoch": 1.5239423523942353, "grad_norm": 4.732978343963623, "learning_rate": 3.5573364927132566e-05, "loss": 0.9619, "step": 3278 }, { "epoch": 1.524872152487215, "grad_norm": 5.4166975021362305, "learning_rate": 3.56444822891268e-05, "loss": 1.0953, "step": 3280 }, { "epoch": 1.5258019525801951, "grad_norm": 4.546645641326904, "learning_rate": 3.5715494594378195e-05, "loss": 1.0675, "step": 3282 }, { "epoch": 1.5267317526731752, "grad_norm": 4.636300086975098, "learning_rate": 3.578640114202395e-05, "loss": 1.0165, "step": 3284 }, { "epoch": 1.5276615527661552, "grad_norm": 4.877590656280518, "learning_rate": 3.5857201232245086e-05, "loss": 0.9857, "step": 3286 }, { "epoch": 1.5285913528591353, "grad_norm": 5.400458812713623, "learning_rate": 3.592789416627331e-05, "loss": 1.0003, "step": 3288 }, { "epoch": 1.5295211529521153, "grad_norm": 4.925442218780518, "learning_rate": 3.5998479246397846e-05, "loss": 1.1217, "step": 3290 }, { "epoch": 1.5304509530450954, "grad_norm": 5.98251485824585, "learning_rate": 3.606895577597252e-05, "loss": 1.1337, "step": 3292 }, { "epoch": 1.5313807531380754, "grad_norm": 5.529360771179199, "learning_rate": 3.613932305942241e-05, "loss": 0.8835, "step": 3294 }, { "epoch": 1.5323105532310555, "grad_norm": 4.576425075531006, "learning_rate": 3.6209580402250795e-05, "loss": 0.8952, "step": 3296 }, { "epoch": 1.5332403533240353, "grad_norm": 4.927560329437256, "learning_rate": 3.6279727111046126e-05, "loss": 1.0914, "step": 3298 }, { "epoch": 1.5341701534170153, "grad_norm": 5.8177618980407715, "learning_rate": 3.634976249348868e-05, "loss": 1.1259, "step": 3300 }, { "epoch": 1.5350999535099954, "grad_norm": 4.810030460357666, "learning_rate": 3.6419685858357485e-05, "loss": 1.0251, "step": 3302 }, { "epoch": 1.5360297536029752, "grad_norm": 5.290113925933838, "learning_rate": 3.648949651553721e-05, "loss": 0.9793, "step": 3304 }, { "epoch": 1.5369595536959553, "grad_norm": 4.821476936340332, "learning_rate": 3.655919377602478e-05, "loss": 1.1111, "step": 3306 }, { "epoch": 1.5378893537889353, "grad_norm": 4.327188491821289, "learning_rate": 3.6628776951936465e-05, "loss": 0.9279, "step": 3308 }, { "epoch": 1.5388191538819154, "grad_norm": 5.092374324798584, "learning_rate": 3.669824535651432e-05, "loss": 0.8852, "step": 3310 }, { "epoch": 1.5397489539748954, "grad_norm": 4.613977909088135, "learning_rate": 3.6767598304133303e-05, "loss": 1.0289, "step": 3312 }, { "epoch": 1.5406787540678755, "grad_norm": 5.224510192871094, "learning_rate": 3.683683511030778e-05, "loss": 0.9287, "step": 3314 }, { "epoch": 1.5416085541608555, "grad_norm": 4.52159309387207, "learning_rate": 3.690595509169847e-05, "loss": 0.8724, "step": 3316 }, { "epoch": 1.5425383542538356, "grad_norm": 6.188925266265869, "learning_rate": 3.697495756611901e-05, "loss": 1.2514, "step": 3318 }, { "epoch": 1.5434681543468154, "grad_norm": 5.123960018157959, "learning_rate": 3.704384185254287e-05, "loss": 1.0667, "step": 3320 }, { "epoch": 1.5443979544397954, "grad_norm": 4.356557846069336, "learning_rate": 3.7112607271109944e-05, "loss": 0.8139, "step": 3322 }, { "epoch": 1.5453277545327755, "grad_norm": 4.9716410636901855, "learning_rate": 3.718125314313328e-05, "loss": 1.0672, "step": 3324 }, { "epoch": 1.5462575546257553, "grad_norm": 5.455291748046875, "learning_rate": 3.7249778791105906e-05, "loss": 1.029, "step": 3326 }, { "epoch": 1.5471873547187354, "grad_norm": 4.438457012176514, "learning_rate": 3.731818353870731e-05, "loss": 1.0383, "step": 3328 }, { "epoch": 1.5481171548117154, "grad_norm": 4.635798454284668, "learning_rate": 3.738646671081019e-05, "loss": 1.0535, "step": 3330 }, { "epoch": 1.5490469549046955, "grad_norm": 4.891788482666016, "learning_rate": 3.745462763348729e-05, "loss": 1.0838, "step": 3332 }, { "epoch": 1.5499767549976755, "grad_norm": 4.482544422149658, "learning_rate": 3.7522665634017744e-05, "loss": 0.8947, "step": 3334 }, { "epoch": 1.5509065550906556, "grad_norm": 4.564555644989014, "learning_rate": 3.759058004089402e-05, "loss": 0.9481, "step": 3336 }, { "epoch": 1.5518363551836356, "grad_norm": 5.37042236328125, "learning_rate": 3.765837018382834e-05, "loss": 1.0099, "step": 3338 }, { "epoch": 1.5527661552766157, "grad_norm": 4.487417221069336, "learning_rate": 3.7726035393759285e-05, "loss": 1.0414, "step": 3340 }, { "epoch": 1.5536959553695955, "grad_norm": 4.6354875564575195, "learning_rate": 3.7793575002858636e-05, "loss": 0.9178, "step": 3342 }, { "epoch": 1.5546257554625755, "grad_norm": 4.358200550079346, "learning_rate": 3.786098834453767e-05, "loss": 0.965, "step": 3344 }, { "epoch": 1.5555555555555556, "grad_norm": 5.255397319793701, "learning_rate": 3.7928274753453936e-05, "loss": 1.0568, "step": 3346 }, { "epoch": 1.5564853556485354, "grad_norm": 4.2289814949035645, "learning_rate": 3.799543356551774e-05, "loss": 0.9238, "step": 3348 }, { "epoch": 1.5574151557415155, "grad_norm": 4.825039863586426, "learning_rate": 3.806246411789874e-05, "loss": 1.0622, "step": 3350 }, { "epoch": 1.5583449558344955, "grad_norm": 4.283927917480469, "learning_rate": 3.8129365749032406e-05, "loss": 0.8568, "step": 3352 }, { "epoch": 1.5592747559274756, "grad_norm": 5.9034576416015625, "learning_rate": 3.819613779862666e-05, "loss": 1.0229, "step": 3354 }, { "epoch": 1.5602045560204556, "grad_norm": 4.737551212310791, "learning_rate": 3.8262779607668346e-05, "loss": 0.8972, "step": 3356 }, { "epoch": 1.5611343561134357, "grad_norm": 7.020808696746826, "learning_rate": 3.8329290518429705e-05, "loss": 0.9634, "step": 3358 }, { "epoch": 1.5620641562064157, "grad_norm": 4.638332843780518, "learning_rate": 3.839566987447492e-05, "loss": 0.9417, "step": 3360 }, { "epoch": 1.5629939562993957, "grad_norm": 4.515368461608887, "learning_rate": 3.84619170206665e-05, "loss": 0.7768, "step": 3362 }, { "epoch": 1.5639237563923758, "grad_norm": 4.9595465660095215, "learning_rate": 3.852803130317191e-05, "loss": 0.9196, "step": 3364 }, { "epoch": 1.5648535564853556, "grad_norm": 4.667225360870361, "learning_rate": 3.859401206946983e-05, "loss": 1.0863, "step": 3366 }, { "epoch": 1.5657833565783357, "grad_norm": 3.8130064010620117, "learning_rate": 3.865985866835673e-05, "loss": 0.7965, "step": 3368 }, { "epoch": 1.5667131566713157, "grad_norm": 4.966075897216797, "learning_rate": 3.872557044995332e-05, "loss": 0.9306, "step": 3370 }, { "epoch": 1.5676429567642955, "grad_norm": 5.077073574066162, "learning_rate": 3.879114676571078e-05, "loss": 0.9611, "step": 3372 }, { "epoch": 1.5685727568572756, "grad_norm": 4.5130205154418945, "learning_rate": 3.8856586968417375e-05, "loss": 0.9065, "step": 3374 }, { "epoch": 1.5695025569502556, "grad_norm": 4.803729057312012, "learning_rate": 3.892189041220473e-05, "loss": 0.9459, "step": 3376 }, { "epoch": 1.5704323570432357, "grad_norm": 4.80552339553833, "learning_rate": 3.8987056452554184e-05, "loss": 0.9082, "step": 3378 }, { "epoch": 1.5713621571362157, "grad_norm": 5.126314163208008, "learning_rate": 3.905208444630329e-05, "loss": 0.9122, "step": 3380 }, { "epoch": 1.5722919572291958, "grad_norm": 4.612276077270508, "learning_rate": 3.911697375165195e-05, "loss": 1.1333, "step": 3382 }, { "epoch": 1.5732217573221758, "grad_norm": 5.003444671630859, "learning_rate": 3.918172372816893e-05, "loss": 0.9989, "step": 3384 }, { "epoch": 1.5741515574151559, "grad_norm": 5.194639682769775, "learning_rate": 3.92463337367981e-05, "loss": 0.895, "step": 3386 }, { "epoch": 1.5750813575081357, "grad_norm": 4.977005481719971, "learning_rate": 3.9310803139864795e-05, "loss": 0.9377, "step": 3388 }, { "epoch": 1.5760111576011158, "grad_norm": 5.081730842590332, "learning_rate": 3.937513130108199e-05, "loss": 0.9096, "step": 3390 }, { "epoch": 1.5769409576940958, "grad_norm": 4.56480598449707, "learning_rate": 3.94393175855567e-05, "loss": 0.8711, "step": 3392 }, { "epoch": 1.5778707577870756, "grad_norm": 5.370744705200195, "learning_rate": 3.950336135979626e-05, "loss": 0.9224, "step": 3394 }, { "epoch": 1.5788005578800557, "grad_norm": 4.7622551918029785, "learning_rate": 3.9567261991714404e-05, "loss": 1.0324, "step": 3396 }, { "epoch": 1.5797303579730357, "grad_norm": 5.224269866943359, "learning_rate": 3.9631018850637774e-05, "loss": 0.8666, "step": 3398 }, { "epoch": 1.5806601580660158, "grad_norm": 4.981451511383057, "learning_rate": 3.969463130731184e-05, "loss": 0.9819, "step": 3400 }, { "epoch": 1.5815899581589958, "grad_norm": 4.958683013916016, "learning_rate": 3.975809873390737e-05, "loss": 0.8978, "step": 3402 }, { "epoch": 1.5825197582519759, "grad_norm": 4.960472106933594, "learning_rate": 3.98214205040265e-05, "loss": 0.9762, "step": 3404 }, { "epoch": 1.583449558344956, "grad_norm": 5.064519882202148, "learning_rate": 3.988459599270888e-05, "loss": 0.9119, "step": 3406 }, { "epoch": 1.584379358437936, "grad_norm": 4.764677047729492, "learning_rate": 3.994762457643799e-05, "loss": 0.9203, "step": 3408 }, { "epoch": 1.5853091585309158, "grad_norm": 4.779769420623779, "learning_rate": 4.001050563314713e-05, "loss": 0.9997, "step": 3410 }, { "epoch": 1.5862389586238959, "grad_norm": 5.270796298980713, "learning_rate": 4.007323854222563e-05, "loss": 1.0589, "step": 3412 }, { "epoch": 1.587168758716876, "grad_norm": 5.491512775421143, "learning_rate": 4.013582268452506e-05, "loss": 0.8646, "step": 3414 }, { "epoch": 1.5880985588098557, "grad_norm": 5.278047561645508, "learning_rate": 4.019825744236516e-05, "loss": 1.0265, "step": 3416 }, { "epoch": 1.5890283589028358, "grad_norm": 5.302541732788086, "learning_rate": 4.026054219954008e-05, "loss": 0.9467, "step": 3418 }, { "epoch": 1.5899581589958158, "grad_norm": 5.287991523742676, "learning_rate": 4.032267634132442e-05, "loss": 0.9685, "step": 3420 }, { "epoch": 1.5908879590887959, "grad_norm": 4.389565944671631, "learning_rate": 4.03846592544793e-05, "loss": 0.9788, "step": 3422 }, { "epoch": 1.591817759181776, "grad_norm": 4.957857131958008, "learning_rate": 4.044649032725838e-05, "loss": 0.8908, "step": 3424 }, { "epoch": 1.592747559274756, "grad_norm": 4.675930023193359, "learning_rate": 4.050816894941393e-05, "loss": 0.8759, "step": 3426 }, { "epoch": 1.593677359367736, "grad_norm": 5.030235767364502, "learning_rate": 4.0569694512202825e-05, "loss": 0.9612, "step": 3428 }, { "epoch": 1.594607159460716, "grad_norm": 3.9802279472351074, "learning_rate": 4.0631066408392644e-05, "loss": 0.8696, "step": 3430 }, { "epoch": 1.5955369595536961, "grad_norm": 5.283146858215332, "learning_rate": 4.0692284032267544e-05, "loss": 0.9102, "step": 3432 }, { "epoch": 1.596466759646676, "grad_norm": 4.207881927490234, "learning_rate": 4.075334677963424e-05, "loss": 0.9411, "step": 3434 }, { "epoch": 1.597396559739656, "grad_norm": 4.636220932006836, "learning_rate": 4.081425404782812e-05, "loss": 0.9169, "step": 3436 }, { "epoch": 1.5983263598326358, "grad_norm": 5.301253795623779, "learning_rate": 4.087500523571904e-05, "loss": 0.9452, "step": 3438 }, { "epoch": 1.5992561599256159, "grad_norm": 4.819558620452881, "learning_rate": 4.0935599743717254e-05, "loss": 0.8925, "step": 3440 }, { "epoch": 1.600185960018596, "grad_norm": 5.001340389251709, "learning_rate": 4.0996036973779485e-05, "loss": 0.8205, "step": 3442 }, { "epoch": 1.601115760111576, "grad_norm": 4.875282287597656, "learning_rate": 4.1056316329414616e-05, "loss": 0.8769, "step": 3444 }, { "epoch": 1.602045560204556, "grad_norm": 4.704746723175049, "learning_rate": 4.1116437215689804e-05, "loss": 0.9416, "step": 3446 }, { "epoch": 1.602975360297536, "grad_norm": 4.515520095825195, "learning_rate": 4.117639903923614e-05, "loss": 0.8595, "step": 3448 }, { "epoch": 1.6039051603905161, "grad_norm": 5.541014671325684, "learning_rate": 4.12362012082546e-05, "loss": 0.9977, "step": 3450 }, { "epoch": 1.6048349604834962, "grad_norm": 4.2960052490234375, "learning_rate": 4.1295843132521994e-05, "loss": 0.8906, "step": 3452 }, { "epoch": 1.6057647605764762, "grad_norm": 4.5632476806640625, "learning_rate": 4.1355324223396537e-05, "loss": 0.8809, "step": 3454 }, { "epoch": 1.606694560669456, "grad_norm": 4.220998764038086, "learning_rate": 4.141464389382392e-05, "loss": 0.9125, "step": 3456 }, { "epoch": 1.607624360762436, "grad_norm": 5.290791034698486, "learning_rate": 4.147380155834293e-05, "loss": 1.0099, "step": 3458 }, { "epoch": 1.6085541608554161, "grad_norm": 4.372595310211182, "learning_rate": 4.153279663309131e-05, "loss": 0.928, "step": 3460 }, { "epoch": 1.609483960948396, "grad_norm": 4.724396228790283, "learning_rate": 4.159162853581148e-05, "loss": 0.9663, "step": 3462 }, { "epoch": 1.610413761041376, "grad_norm": 4.598977565765381, "learning_rate": 4.16502966858563e-05, "loss": 1.0, "step": 3464 }, { "epoch": 1.611343561134356, "grad_norm": 5.204286575317383, "learning_rate": 4.170880050419483e-05, "loss": 1.0726, "step": 3466 }, { "epoch": 1.612273361227336, "grad_norm": 4.182427883148193, "learning_rate": 4.176713941341799e-05, "loss": 0.8877, "step": 3468 }, { "epoch": 1.6132031613203162, "grad_norm": 5.108072280883789, "learning_rate": 4.182531283774433e-05, "loss": 1.0672, "step": 3470 }, { "epoch": 1.6141329614132962, "grad_norm": 4.404242515563965, "learning_rate": 4.1883320203025606e-05, "loss": 0.8733, "step": 3472 }, { "epoch": 1.6150627615062763, "grad_norm": 4.572476387023926, "learning_rate": 4.1941160936752556e-05, "loss": 0.9651, "step": 3474 }, { "epoch": 1.6159925615992563, "grad_norm": 4.2744975090026855, "learning_rate": 4.199883446806049e-05, "loss": 0.8034, "step": 3476 }, { "epoch": 1.6169223616922361, "grad_norm": 5.5566630363464355, "learning_rate": 4.20563402277349e-05, "loss": 1.044, "step": 3478 }, { "epoch": 1.6178521617852162, "grad_norm": 4.366683483123779, "learning_rate": 4.211367764821722e-05, "loss": 0.9041, "step": 3480 }, { "epoch": 1.6187819618781962, "grad_norm": 4.917232513427734, "learning_rate": 4.217084616361022e-05, "loss": 0.8497, "step": 3482 }, { "epoch": 1.619711761971176, "grad_norm": 4.624403953552246, "learning_rate": 4.22278452096837e-05, "loss": 0.9058, "step": 3484 }, { "epoch": 1.620641562064156, "grad_norm": 4.183825969696045, "learning_rate": 4.228467422388016e-05, "loss": 0.9176, "step": 3486 }, { "epoch": 1.6215713621571362, "grad_norm": 4.175707817077637, "learning_rate": 4.23413326453201e-05, "loss": 0.7308, "step": 3488 }, { "epoch": 1.6225011622501162, "grad_norm": 4.2621870040893555, "learning_rate": 4.239781991480785e-05, "loss": 0.7594, "step": 3490 }, { "epoch": 1.6234309623430963, "grad_norm": 4.8946356773376465, "learning_rate": 4.24541354748368e-05, "loss": 0.8865, "step": 3492 }, { "epoch": 1.6243607624360763, "grad_norm": 4.26042366027832, "learning_rate": 4.251027876959515e-05, "loss": 0.8811, "step": 3494 }, { "epoch": 1.6252905625290563, "grad_norm": 5.5929274559021, "learning_rate": 4.256624924497123e-05, "loss": 0.9739, "step": 3496 }, { "epoch": 1.6262203626220364, "grad_norm": 4.950803279876709, "learning_rate": 4.262204634855903e-05, "loss": 0.8889, "step": 3498 }, { "epoch": 1.6271501627150162, "grad_norm": 4.478362083435059, "learning_rate": 4.267766952966368e-05, "loss": 0.8453, "step": 3500 }, { "epoch": 1.6280799628079963, "grad_norm": 4.825216293334961, "learning_rate": 4.273311823930684e-05, "loss": 0.853, "step": 3502 }, { "epoch": 1.6290097629009763, "grad_norm": 4.899900436401367, "learning_rate": 4.278839193023215e-05, "loss": 0.9007, "step": 3504 }, { "epoch": 1.6299395629939561, "grad_norm": 5.445573329925537, "learning_rate": 4.284349005691052e-05, "loss": 0.9836, "step": 3506 }, { "epoch": 1.6308693630869362, "grad_norm": 4.970978260040283, "learning_rate": 4.2898412075545786e-05, "loss": 0.8733, "step": 3508 }, { "epoch": 1.6317991631799162, "grad_norm": 4.47289514541626, "learning_rate": 4.2953157444079726e-05, "loss": 0.8201, "step": 3510 }, { "epoch": 1.6327289632728963, "grad_norm": 4.302659511566162, "learning_rate": 4.300772562219767e-05, "loss": 0.8247, "step": 3512 }, { "epoch": 1.6336587633658763, "grad_norm": 4.386494159698486, "learning_rate": 4.306211607133375e-05, "loss": 0.851, "step": 3514 }, { "epoch": 1.6345885634588564, "grad_norm": 5.774094104766846, "learning_rate": 4.311632825467615e-05, "loss": 1.0084, "step": 3516 }, { "epoch": 1.6355183635518364, "grad_norm": 4.667821884155273, "learning_rate": 4.3170361637172575e-05, "loss": 0.92, "step": 3518 }, { "epoch": 1.6364481636448165, "grad_norm": 4.420036315917969, "learning_rate": 4.322421568553531e-05, "loss": 0.8182, "step": 3520 }, { "epoch": 1.6373779637377965, "grad_norm": 4.949415683746338, "learning_rate": 4.327788986824661e-05, "loss": 0.934, "step": 3522 }, { "epoch": 1.6383077638307764, "grad_norm": 4.524384021759033, "learning_rate": 4.333138365556401e-05, "loss": 1.0644, "step": 3524 }, { "epoch": 1.6392375639237564, "grad_norm": 5.352021217346191, "learning_rate": 4.33846965195254e-05, "loss": 0.9659, "step": 3526 }, { "epoch": 1.6401673640167362, "grad_norm": 4.081556797027588, "learning_rate": 4.343782793395434e-05, "loss": 0.7163, "step": 3528 }, { "epoch": 1.6410971641097163, "grad_norm": 4.194980621337891, "learning_rate": 4.3490777374465234e-05, "loss": 0.8608, "step": 3530 }, { "epoch": 1.6420269642026963, "grad_norm": 4.564061641693115, "learning_rate": 4.3543544318468465e-05, "loss": 0.8468, "step": 3532 }, { "epoch": 1.6429567642956764, "grad_norm": 4.419355869293213, "learning_rate": 4.359612824517562e-05, "loss": 0.8209, "step": 3534 }, { "epoch": 1.6438865643886564, "grad_norm": 4.478606224060059, "learning_rate": 4.364852863560455e-05, "loss": 0.8829, "step": 3536 }, { "epoch": 1.6448163644816365, "grad_norm": 4.408851623535156, "learning_rate": 4.3700744972584555e-05, "loss": 0.9214, "step": 3538 }, { "epoch": 1.6457461645746165, "grad_norm": 4.673376560211182, "learning_rate": 4.375277674076147e-05, "loss": 0.858, "step": 3540 }, { "epoch": 1.6466759646675966, "grad_norm": 5.179042339324951, "learning_rate": 4.380462342660278e-05, "loss": 0.8619, "step": 3542 }, { "epoch": 1.6476057647605766, "grad_norm": 6.00623083114624, "learning_rate": 4.385628451840259e-05, "loss": 0.9335, "step": 3544 }, { "epoch": 1.6485355648535565, "grad_norm": 4.4586052894592285, "learning_rate": 4.390775950628679e-05, "loss": 0.8673, "step": 3546 }, { "epoch": 1.6494653649465365, "grad_norm": 5.264186859130859, "learning_rate": 4.3959047882218044e-05, "loss": 0.842, "step": 3548 }, { "epoch": 1.6503951650395166, "grad_norm": 4.109957218170166, "learning_rate": 4.401014914000076e-05, "loss": 0.8808, "step": 3550 }, { "epoch": 1.6513249651324964, "grad_norm": 5.401610374450684, "learning_rate": 4.406106277528619e-05, "loss": 0.8705, "step": 3552 }, { "epoch": 1.6522547652254764, "grad_norm": 4.347593784332275, "learning_rate": 4.411178828557726e-05, "loss": 0.7998, "step": 3554 }, { "epoch": 1.6531845653184565, "grad_norm": 5.493409633636475, "learning_rate": 4.416232517023373e-05, "loss": 0.9396, "step": 3556 }, { "epoch": 1.6541143654114365, "grad_norm": 5.0399065017700195, "learning_rate": 4.421267293047691e-05, "loss": 0.9039, "step": 3558 }, { "epoch": 1.6550441655044166, "grad_norm": 4.303567886352539, "learning_rate": 4.42628310693947e-05, "loss": 0.7218, "step": 3560 }, { "epoch": 1.6559739655973966, "grad_norm": 4.822845458984375, "learning_rate": 4.4312799091946596e-05, "loss": 0.8711, "step": 3562 }, { "epoch": 1.6569037656903767, "grad_norm": 4.712618827819824, "learning_rate": 4.436257650496834e-05, "loss": 0.7601, "step": 3564 }, { "epoch": 1.6578335657833567, "grad_norm": 5.288684368133545, "learning_rate": 4.441216281717696e-05, "loss": 0.9567, "step": 3566 }, { "epoch": 1.6587633658763365, "grad_norm": 4.014641284942627, "learning_rate": 4.446155753917559e-05, "loss": 0.7499, "step": 3568 }, { "epoch": 1.6596931659693166, "grad_norm": 4.1380510330200195, "learning_rate": 4.451076018345824e-05, "loss": 0.8134, "step": 3570 }, { "epoch": 1.6606229660622966, "grad_norm": 5.209433078765869, "learning_rate": 4.45597702644147e-05, "loss": 0.8807, "step": 3572 }, { "epoch": 1.6615527661552765, "grad_norm": 6.624484539031982, "learning_rate": 4.4608587298335246e-05, "loss": 0.8939, "step": 3574 }, { "epoch": 1.6624825662482565, "grad_norm": 4.833741188049316, "learning_rate": 4.465721080341547e-05, "loss": 0.8237, "step": 3576 }, { "epoch": 1.6634123663412366, "grad_norm": 6.339111804962158, "learning_rate": 4.4705640299761007e-05, "loss": 0.8466, "step": 3578 }, { "epoch": 1.6643421664342166, "grad_norm": 5.669918060302734, "learning_rate": 4.475387530939227e-05, "loss": 1.0994, "step": 3580 }, { "epoch": 1.6652719665271967, "grad_norm": 5.67817497253418, "learning_rate": 4.480191535624919e-05, "loss": 0.9408, "step": 3582 }, { "epoch": 1.6662017666201767, "grad_norm": 5.028122901916504, "learning_rate": 4.4849759966195894e-05, "loss": 0.8956, "step": 3584 }, { "epoch": 1.6671315667131568, "grad_norm": 3.9478189945220947, "learning_rate": 4.489740866702542e-05, "loss": 0.84, "step": 3586 }, { "epoch": 1.6680613668061368, "grad_norm": 4.833270072937012, "learning_rate": 4.4944860988464276e-05, "loss": 0.6698, "step": 3588 }, { "epoch": 1.6689911668991166, "grad_norm": 5.770461082458496, "learning_rate": 4.4992116462177276e-05, "loss": 0.9798, "step": 3590 }, { "epoch": 1.6699209669920967, "grad_norm": 4.734210014343262, "learning_rate": 4.503917462177194e-05, "loss": 0.813, "step": 3592 }, { "epoch": 1.6708507670850767, "grad_norm": 4.709973335266113, "learning_rate": 4.5086035002803195e-05, "loss": 0.9069, "step": 3594 }, { "epoch": 1.6717805671780566, "grad_norm": 4.484506130218506, "learning_rate": 4.513269714277808e-05, "loss": 0.9894, "step": 3596 }, { "epoch": 1.6727103672710366, "grad_norm": 5.021079063415527, "learning_rate": 4.517916058116002e-05, "loss": 0.7357, "step": 3598 }, { "epoch": 1.6736401673640167, "grad_norm": 4.80076265335083, "learning_rate": 4.522542485937372e-05, "loss": 0.7699, "step": 3600 }, { "epoch": 1.6745699674569967, "grad_norm": 4.828639507293701, "learning_rate": 4.527148952080936e-05, "loss": 0.8888, "step": 3602 }, { "epoch": 1.6754997675499768, "grad_norm": 4.485340118408203, "learning_rate": 4.531735411082738e-05, "loss": 0.7774, "step": 3604 }, { "epoch": 1.6764295676429568, "grad_norm": 4.316926956176758, "learning_rate": 4.5363018176762774e-05, "loss": 0.7943, "step": 3606 }, { "epoch": 1.6773593677359369, "grad_norm": 4.5371623039245605, "learning_rate": 4.5408481267929646e-05, "loss": 0.8454, "step": 3608 }, { "epoch": 1.678289167828917, "grad_norm": 3.981923818588257, "learning_rate": 4.5453742935625624e-05, "loss": 0.7192, "step": 3610 }, { "epoch": 1.679218967921897, "grad_norm": 4.913796901702881, "learning_rate": 4.549880273313635e-05, "loss": 0.7252, "step": 3612 }, { "epoch": 1.6801487680148768, "grad_norm": 5.11160945892334, "learning_rate": 4.554366021573981e-05, "loss": 0.8821, "step": 3614 }, { "epoch": 1.6810785681078568, "grad_norm": 4.807749271392822, "learning_rate": 4.5588314940710736e-05, "loss": 0.9423, "step": 3616 }, { "epoch": 1.6820083682008367, "grad_norm": 4.907371997833252, "learning_rate": 4.563276646732503e-05, "loss": 0.8839, "step": 3618 }, { "epoch": 1.6829381682938167, "grad_norm": 4.961659908294678, "learning_rate": 4.567701435686408e-05, "loss": 0.7658, "step": 3620 }, { "epoch": 1.6838679683867968, "grad_norm": 4.29898738861084, "learning_rate": 4.5721058172619084e-05, "loss": 0.8034, "step": 3622 }, { "epoch": 1.6847977684797768, "grad_norm": 5.431276798248291, "learning_rate": 4.5764897479895364e-05, "loss": 0.9538, "step": 3624 }, { "epoch": 1.6857275685727569, "grad_norm": 4.461325168609619, "learning_rate": 4.580853184601662e-05, "loss": 1.0534, "step": 3626 }, { "epoch": 1.686657368665737, "grad_norm": 4.355841159820557, "learning_rate": 4.585196084032932e-05, "loss": 0.828, "step": 3628 }, { "epoch": 1.687587168758717, "grad_norm": 4.904228210449219, "learning_rate": 4.5895184034206806e-05, "loss": 0.9759, "step": 3630 }, { "epoch": 1.688516968851697, "grad_norm": 4.689454555511475, "learning_rate": 4.5938201001053576e-05, "loss": 0.7948, "step": 3632 }, { "epoch": 1.689446768944677, "grad_norm": 4.731905937194824, "learning_rate": 4.598101131630958e-05, "loss": 0.7372, "step": 3634 }, { "epoch": 1.6903765690376569, "grad_norm": 5.18217134475708, "learning_rate": 4.6023614557454266e-05, "loss": 0.8692, "step": 3636 }, { "epoch": 1.691306369130637, "grad_norm": 4.744353294372559, "learning_rate": 4.6066010304010855e-05, "loss": 0.8567, "step": 3638 }, { "epoch": 1.692236169223617, "grad_norm": 4.264333248138428, "learning_rate": 4.610819813755044e-05, "loss": 0.7901, "step": 3640 }, { "epoch": 1.6931659693165968, "grad_norm": 4.133110046386719, "learning_rate": 4.615017764169611e-05, "loss": 0.7347, "step": 3642 }, { "epoch": 1.6940957694095768, "grad_norm": 4.99716329574585, "learning_rate": 4.619194840212715e-05, "loss": 0.8704, "step": 3644 }, { "epoch": 1.695025569502557, "grad_norm": 4.164409160614014, "learning_rate": 4.6233510006582975e-05, "loss": 0.92, "step": 3646 }, { "epoch": 1.695955369595537, "grad_norm": 4.432873249053955, "learning_rate": 4.627486204486736e-05, "loss": 0.8873, "step": 3648 }, { "epoch": 1.696885169688517, "grad_norm": 4.644623279571533, "learning_rate": 4.6316004108852366e-05, "loss": 0.814, "step": 3650 }, { "epoch": 1.697814969781497, "grad_norm": 4.5987725257873535, "learning_rate": 4.635693579248245e-05, "loss": 0.7354, "step": 3652 }, { "epoch": 1.698744769874477, "grad_norm": 4.441370964050293, "learning_rate": 4.639765669177839e-05, "loss": 0.7559, "step": 3654 }, { "epoch": 1.6996745699674571, "grad_norm": 4.301394939422607, "learning_rate": 4.643816640484137e-05, "loss": 0.7254, "step": 3656 }, { "epoch": 1.700604370060437, "grad_norm": 4.048686981201172, "learning_rate": 4.647846453185688e-05, "loss": 0.7482, "step": 3658 }, { "epoch": 1.701534170153417, "grad_norm": 4.108611583709717, "learning_rate": 4.651855067509865e-05, "loss": 0.7426, "step": 3660 }, { "epoch": 1.702463970246397, "grad_norm": 4.447368621826172, "learning_rate": 4.655842443893267e-05, "loss": 0.8361, "step": 3662 }, { "epoch": 1.703393770339377, "grad_norm": 4.492021560668945, "learning_rate": 4.6598085429820956e-05, "loss": 0.7031, "step": 3664 }, { "epoch": 1.704323570432357, "grad_norm": 4.665550231933594, "learning_rate": 4.6637533256325545e-05, "loss": 0.8571, "step": 3666 }, { "epoch": 1.705253370525337, "grad_norm": 5.317662239074707, "learning_rate": 4.667676752911233e-05, "loss": 0.863, "step": 3668 }, { "epoch": 1.706183170618317, "grad_norm": 4.312678813934326, "learning_rate": 4.6715787860954844e-05, "loss": 0.8057, "step": 3670 }, { "epoch": 1.707112970711297, "grad_norm": 4.693243503570557, "learning_rate": 4.67545938667382e-05, "loss": 0.872, "step": 3672 }, { "epoch": 1.7080427708042771, "grad_norm": 5.308189868927002, "learning_rate": 4.6793185163462787e-05, "loss": 0.8146, "step": 3674 }, { "epoch": 1.7089725708972572, "grad_norm": 4.6869797706604, "learning_rate": 4.683156137024807e-05, "loss": 0.8545, "step": 3676 }, { "epoch": 1.7099023709902372, "grad_norm": 4.637169361114502, "learning_rate": 4.68697221083364e-05, "loss": 0.7365, "step": 3678 }, { "epoch": 1.710832171083217, "grad_norm": 4.683815956115723, "learning_rate": 4.690766700109666e-05, "loss": 0.9321, "step": 3680 }, { "epoch": 1.711761971176197, "grad_norm": 4.2368974685668945, "learning_rate": 4.6945395674028114e-05, "loss": 0.813, "step": 3682 }, { "epoch": 1.7126917712691772, "grad_norm": 5.663618087768555, "learning_rate": 4.698290775476398e-05, "loss": 0.9166, "step": 3684 }, { "epoch": 1.713621571362157, "grad_norm": 4.423084735870361, "learning_rate": 4.702020287307515e-05, "loss": 0.7515, "step": 3686 }, { "epoch": 1.714551371455137, "grad_norm": 4.578973293304443, "learning_rate": 4.705728066087391e-05, "loss": 0.7961, "step": 3688 }, { "epoch": 1.715481171548117, "grad_norm": 5.241172790527344, "learning_rate": 4.709414075221742e-05, "loss": 0.9397, "step": 3690 }, { "epoch": 1.7164109716410971, "grad_norm": 4.480432987213135, "learning_rate": 4.713078278331146e-05, "loss": 0.8232, "step": 3692 }, { "epoch": 1.7173407717340772, "grad_norm": 4.256040573120117, "learning_rate": 4.716720639251399e-05, "loss": 0.9541, "step": 3694 }, { "epoch": 1.7182705718270572, "grad_norm": 4.257062911987305, "learning_rate": 4.72034112203387e-05, "loss": 0.781, "step": 3696 }, { "epoch": 1.7192003719200373, "grad_norm": 4.085724830627441, "learning_rate": 4.723939690945853e-05, "loss": 0.7363, "step": 3698 }, { "epoch": 1.7201301720130173, "grad_norm": 3.926591157913208, "learning_rate": 4.727516310470928e-05, "loss": 0.7837, "step": 3700 }, { "epoch": 1.7210599721059974, "grad_norm": 4.956902503967285, "learning_rate": 4.731070945309303e-05, "loss": 0.8765, "step": 3702 }, { "epoch": 1.7219897721989772, "grad_norm": 4.836958408355713, "learning_rate": 4.7346035603781666e-05, "loss": 0.7906, "step": 3704 }, { "epoch": 1.7229195722919572, "grad_norm": 3.844367027282715, "learning_rate": 4.7381141208120364e-05, "loss": 0.6981, "step": 3706 }, { "epoch": 1.723849372384937, "grad_norm": 4.612549304962158, "learning_rate": 4.741602591963096e-05, "loss": 0.6287, "step": 3708 }, { "epoch": 1.7247791724779171, "grad_norm": 4.6950531005859375, "learning_rate": 4.745068939401546e-05, "loss": 0.7695, "step": 3710 }, { "epoch": 1.7257089725708972, "grad_norm": 5.582300662994385, "learning_rate": 4.748513128915935e-05, "loss": 0.7535, "step": 3712 }, { "epoch": 1.7266387726638772, "grad_norm": 5.290600299835205, "learning_rate": 4.751935126513502e-05, "loss": 0.9097, "step": 3714 }, { "epoch": 1.7275685727568573, "grad_norm": 5.0371623039245605, "learning_rate": 4.755334898420514e-05, "loss": 0.7374, "step": 3716 }, { "epoch": 1.7284983728498373, "grad_norm": 4.114850997924805, "learning_rate": 4.7587124110825936e-05, "loss": 0.7232, "step": 3718 }, { "epoch": 1.7294281729428174, "grad_norm": 5.4571146965026855, "learning_rate": 4.762067631165055e-05, "loss": 0.9865, "step": 3720 }, { "epoch": 1.7303579730357974, "grad_norm": 5.294018745422363, "learning_rate": 4.76540052555323e-05, "loss": 0.8046, "step": 3722 }, { "epoch": 1.7312877731287775, "grad_norm": 5.11043119430542, "learning_rate": 4.768711061352799e-05, "loss": 0.7526, "step": 3724 }, { "epoch": 1.7322175732217573, "grad_norm": 5.516180992126465, "learning_rate": 4.771999205890106e-05, "loss": 0.912, "step": 3726 }, { "epoch": 1.7331473733147373, "grad_norm": 3.796712636947632, "learning_rate": 4.7752649267124954e-05, "loss": 0.7466, "step": 3728 }, { "epoch": 1.7340771734077174, "grad_norm": 4.519735336303711, "learning_rate": 4.7785081915886194e-05, "loss": 0.783, "step": 3730 }, { "epoch": 1.7350069735006972, "grad_norm": 4.461170673370361, "learning_rate": 4.7817289685087624e-05, "loss": 0.7507, "step": 3732 }, { "epoch": 1.7359367735936773, "grad_norm": 4.6826605796813965, "learning_rate": 4.7849272256851594e-05, "loss": 0.9919, "step": 3734 }, { "epoch": 1.7368665736866573, "grad_norm": 4.948098659515381, "learning_rate": 4.7881029315523e-05, "loss": 0.8047, "step": 3736 }, { "epoch": 1.7377963737796374, "grad_norm": 4.882993221282959, "learning_rate": 4.791256054767251e-05, "loss": 0.7414, "step": 3738 }, { "epoch": 1.7387261738726174, "grad_norm": 3.9424962997436523, "learning_rate": 4.79438656420996e-05, "loss": 0.7088, "step": 3740 }, { "epoch": 1.7396559739655975, "grad_norm": 5.094344139099121, "learning_rate": 4.79749442898356e-05, "loss": 0.8265, "step": 3742 }, { "epoch": 1.7405857740585775, "grad_norm": 4.1772236824035645, "learning_rate": 4.800579618414683e-05, "loss": 0.7717, "step": 3744 }, { "epoch": 1.7415155741515576, "grad_norm": 4.843547344207764, "learning_rate": 4.803642102053754e-05, "loss": 0.888, "step": 3746 }, { "epoch": 1.7424453742445374, "grad_norm": 3.8922102451324463, "learning_rate": 4.806681849675294e-05, "loss": 0.654, "step": 3748 }, { "epoch": 1.7433751743375174, "grad_norm": 4.160580158233643, "learning_rate": 4.8096988312782255e-05, "loss": 0.8078, "step": 3750 }, { "epoch": 1.7443049744304975, "grad_norm": 3.8663642406463623, "learning_rate": 4.8126930170861525e-05, "loss": 0.682, "step": 3752 }, { "epoch": 1.7452347745234773, "grad_norm": 4.910378456115723, "learning_rate": 4.8156643775476746e-05, "loss": 0.6952, "step": 3754 }, { "epoch": 1.7461645746164574, "grad_norm": 4.405901908874512, "learning_rate": 4.8186128833366605e-05, "loss": 0.7333, "step": 3756 }, { "epoch": 1.7470943747094374, "grad_norm": 5.373934745788574, "learning_rate": 4.821538505352551e-05, "loss": 0.8483, "step": 3758 }, { "epoch": 1.7480241748024175, "grad_norm": 5.019838809967041, "learning_rate": 4.8244412147206365e-05, "loss": 0.8412, "step": 3760 }, { "epoch": 1.7489539748953975, "grad_norm": 4.303089141845703, "learning_rate": 4.8273209827923466e-05, "loss": 0.7403, "step": 3762 }, { "epoch": 1.7498837749883775, "grad_norm": 5.045379161834717, "learning_rate": 4.8301777811455344e-05, "loss": 0.7547, "step": 3764 }, { "epoch": 1.7508135750813576, "grad_norm": 4.460341930389404, "learning_rate": 4.833011581584753e-05, "loss": 0.7636, "step": 3766 }, { "epoch": 1.7517433751743376, "grad_norm": 4.606701850891113, "learning_rate": 4.8358223561415365e-05, "loss": 0.825, "step": 3768 }, { "epoch": 1.7526731752673175, "grad_norm": 4.881974220275879, "learning_rate": 4.838610077074674e-05, "loss": 0.8146, "step": 3770 }, { "epoch": 1.7536029753602975, "grad_norm": 4.741976261138916, "learning_rate": 4.8413747168704876e-05, "loss": 0.6881, "step": 3772 }, { "epoch": 1.7545327754532776, "grad_norm": 4.724553108215332, "learning_rate": 4.844116248243096e-05, "loss": 0.7366, "step": 3774 }, { "epoch": 1.7554625755462574, "grad_norm": 3.984670877456665, "learning_rate": 4.846834644134691e-05, "loss": 0.725, "step": 3776 }, { "epoch": 1.7563923756392374, "grad_norm": 4.601729869842529, "learning_rate": 4.849529877715806e-05, "loss": 0.7989, "step": 3778 }, { "epoch": 1.7573221757322175, "grad_norm": 4.223592758178711, "learning_rate": 4.85220192238557e-05, "loss": 0.7587, "step": 3780 }, { "epoch": 1.7582519758251975, "grad_norm": 4.84320592880249, "learning_rate": 4.854850751771984e-05, "loss": 0.9119, "step": 3782 }, { "epoch": 1.7591817759181776, "grad_norm": 4.103198051452637, "learning_rate": 4.85747633973217e-05, "loss": 0.7407, "step": 3784 }, { "epoch": 1.7601115760111576, "grad_norm": 4.587448596954346, "learning_rate": 4.8600786603526315e-05, "loss": 0.8317, "step": 3786 }, { "epoch": 1.7610413761041377, "grad_norm": 4.286382675170898, "learning_rate": 4.86265768794952e-05, "loss": 0.6827, "step": 3788 }, { "epoch": 1.7619711761971177, "grad_norm": 4.496561050415039, "learning_rate": 4.865213397068872e-05, "loss": 0.799, "step": 3790 }, { "epoch": 1.7629009762900978, "grad_norm": 4.171448707580566, "learning_rate": 4.867745762486869e-05, "loss": 0.8067, "step": 3792 }, { "epoch": 1.7638307763830776, "grad_norm": 4.29033899307251, "learning_rate": 4.8702547592100884e-05, "loss": 0.7667, "step": 3794 }, { "epoch": 1.7647605764760577, "grad_norm": 4.0382256507873535, "learning_rate": 4.872740362475744e-05, "loss": 0.8511, "step": 3796 }, { "epoch": 1.7656903765690377, "grad_norm": 5.026391506195068, "learning_rate": 4.875202547751937e-05, "loss": 0.8304, "step": 3798 }, { "epoch": 1.7666201766620175, "grad_norm": 3.9788715839385986, "learning_rate": 4.877641290737892e-05, "loss": 0.7691, "step": 3800 }, { "epoch": 1.7675499767549976, "grad_norm": 4.442105293273926, "learning_rate": 4.880056567364199e-05, "loss": 0.7293, "step": 3802 }, { "epoch": 1.7684797768479776, "grad_norm": 3.950080394744873, "learning_rate": 4.882448353793054e-05, "loss": 0.7816, "step": 3804 }, { "epoch": 1.7694095769409577, "grad_norm": 4.296248912811279, "learning_rate": 4.8848166264184926e-05, "loss": 0.7157, "step": 3806 }, { "epoch": 1.7703393770339377, "grad_norm": 4.0640387535095215, "learning_rate": 4.887161361866617e-05, "loss": 0.6915, "step": 3808 }, { "epoch": 1.7712691771269178, "grad_norm": 4.854212760925293, "learning_rate": 4.889482536995835e-05, "loss": 0.9306, "step": 3810 }, { "epoch": 1.7721989772198978, "grad_norm": 4.905151844024658, "learning_rate": 4.891780128897086e-05, "loss": 0.8289, "step": 3812 }, { "epoch": 1.7731287773128779, "grad_norm": 4.136887550354004, "learning_rate": 4.8940541148940656e-05, "loss": 0.727, "step": 3814 }, { "epoch": 1.7740585774058577, "grad_norm": 3.509799003601074, "learning_rate": 4.8963044725434496e-05, "loss": 0.7148, "step": 3816 }, { "epoch": 1.7749883774988378, "grad_norm": 4.320206642150879, "learning_rate": 4.8985311796351155e-05, "loss": 0.6406, "step": 3818 }, { "epoch": 1.7759181775918178, "grad_norm": 3.744168281555176, "learning_rate": 4.9007342141923666e-05, "loss": 0.6638, "step": 3820 }, { "epoch": 1.7768479776847976, "grad_norm": 4.721642017364502, "learning_rate": 4.9029135544721397e-05, "loss": 0.814, "step": 3822 }, { "epoch": 1.7777777777777777, "grad_norm": 4.428520679473877, "learning_rate": 4.905069178965225e-05, "loss": 0.7357, "step": 3824 }, { "epoch": 1.7787075778707577, "grad_norm": 4.677486896514893, "learning_rate": 4.907201066396479e-05, "loss": 0.7428, "step": 3826 }, { "epoch": 1.7796373779637378, "grad_norm": 4.662682056427002, "learning_rate": 4.9093091957250346e-05, "loss": 0.733, "step": 3828 }, { "epoch": 1.7805671780567178, "grad_norm": 4.06486177444458, "learning_rate": 4.911393546144506e-05, "loss": 0.7684, "step": 3830 }, { "epoch": 1.7814969781496979, "grad_norm": 4.559412479400635, "learning_rate": 4.9134540970831954e-05, "loss": 0.7556, "step": 3832 }, { "epoch": 1.782426778242678, "grad_norm": 4.879431247711182, "learning_rate": 4.915490828204296e-05, "loss": 0.7035, "step": 3834 }, { "epoch": 1.783356578335658, "grad_norm": 4.913750648498535, "learning_rate": 4.9175037194060985e-05, "loss": 0.7338, "step": 3836 }, { "epoch": 1.7842863784286378, "grad_norm": 4.723535060882568, "learning_rate": 4.919492750822173e-05, "loss": 0.7747, "step": 3838 }, { "epoch": 1.7852161785216178, "grad_norm": 4.355487823486328, "learning_rate": 4.921457902821587e-05, "loss": 0.773, "step": 3840 }, { "epoch": 1.786145978614598, "grad_norm": 4.417761325836182, "learning_rate": 4.923399156009082e-05, "loss": 0.7777, "step": 3842 }, { "epoch": 1.7870757787075777, "grad_norm": 4.7719340324401855, "learning_rate": 4.9253164912252735e-05, "loss": 0.7946, "step": 3844 }, { "epoch": 1.7880055788005578, "grad_norm": 4.218984127044678, "learning_rate": 4.9272098895468365e-05, "loss": 0.749, "step": 3846 }, { "epoch": 1.7889353788935378, "grad_norm": 4.433305740356445, "learning_rate": 4.9290793322866937e-05, "loss": 0.6696, "step": 3848 }, { "epoch": 1.7898651789865179, "grad_norm": 4.3072710037231445, "learning_rate": 4.9309248009942015e-05, "loss": 0.8129, "step": 3850 }, { "epoch": 1.790794979079498, "grad_norm": 4.4708781242370605, "learning_rate": 4.9327462774553254e-05, "loss": 0.6891, "step": 3852 }, { "epoch": 1.791724779172478, "grad_norm": 4.766537189483643, "learning_rate": 4.934543743692832e-05, "loss": 0.8391, "step": 3854 }, { "epoch": 1.792654579265458, "grad_norm": 4.391709327697754, "learning_rate": 4.936317181966453e-05, "loss": 0.7232, "step": 3856 }, { "epoch": 1.793584379358438, "grad_norm": 4.6488037109375, "learning_rate": 4.938066574773068e-05, "loss": 0.7374, "step": 3858 }, { "epoch": 1.794514179451418, "grad_norm": 4.135992527008057, "learning_rate": 4.9397919048468786e-05, "loss": 0.7607, "step": 3860 }, { "epoch": 1.795443979544398, "grad_norm": 4.779835224151611, "learning_rate": 4.9414931551595714e-05, "loss": 0.7861, "step": 3862 }, { "epoch": 1.796373779637378, "grad_norm": 4.272384166717529, "learning_rate": 4.9431703089204934e-05, "loss": 0.8693, "step": 3864 }, { "epoch": 1.7973035797303578, "grad_norm": 4.362462043762207, "learning_rate": 4.944823349576815e-05, "loss": 0.8594, "step": 3866 }, { "epoch": 1.7982333798233379, "grad_norm": 4.347607135772705, "learning_rate": 4.94645226081369e-05, "loss": 0.795, "step": 3868 }, { "epoch": 1.799163179916318, "grad_norm": 4.482820987701416, "learning_rate": 4.948057026554425e-05, "loss": 0.8727, "step": 3870 }, { "epoch": 1.800092980009298, "grad_norm": 4.292705535888672, "learning_rate": 4.949637630960629e-05, "loss": 0.7298, "step": 3872 }, { "epoch": 1.801022780102278, "grad_norm": 4.299073696136475, "learning_rate": 4.951194058432373e-05, "loss": 0.7582, "step": 3874 }, { "epoch": 1.801952580195258, "grad_norm": 4.693606853485107, "learning_rate": 4.952726293608347e-05, "loss": 0.7898, "step": 3876 }, { "epoch": 1.802882380288238, "grad_norm": 4.187946796417236, "learning_rate": 4.9542343213660096e-05, "loss": 0.7972, "step": 3878 }, { "epoch": 1.8038121803812182, "grad_norm": 4.528362274169922, "learning_rate": 4.9557181268217355e-05, "loss": 0.7937, "step": 3880 }, { "epoch": 1.8047419804741982, "grad_norm": 4.585784912109375, "learning_rate": 4.95717769533096e-05, "loss": 0.753, "step": 3882 }, { "epoch": 1.805671780567178, "grad_norm": 4.190438270568848, "learning_rate": 4.9586130124883365e-05, "loss": 0.8553, "step": 3884 }, { "epoch": 1.806601580660158, "grad_norm": 4.270400047302246, "learning_rate": 4.9600240641278605e-05, "loss": 0.7782, "step": 3886 }, { "epoch": 1.8075313807531381, "grad_norm": 4.612380027770996, "learning_rate": 4.961410836323025e-05, "loss": 0.7042, "step": 3888 }, { "epoch": 1.808461180846118, "grad_norm": 4.355504035949707, "learning_rate": 4.962773315386946e-05, "loss": 0.7868, "step": 3890 }, { "epoch": 1.809390980939098, "grad_norm": 4.2326812744140625, "learning_rate": 4.9641114878725064e-05, "loss": 0.6702, "step": 3892 }, { "epoch": 1.810320781032078, "grad_norm": 3.999129056930542, "learning_rate": 4.965425340572483e-05, "loss": 0.6771, "step": 3894 }, { "epoch": 1.811250581125058, "grad_norm": 4.3528289794921875, "learning_rate": 4.966714860519681e-05, "loss": 0.7309, "step": 3896 }, { "epoch": 1.8121803812180381, "grad_norm": 4.2938666343688965, "learning_rate": 4.96798003498706e-05, "loss": 0.7553, "step": 3898 }, { "epoch": 1.8131101813110182, "grad_norm": 3.8622918128967285, "learning_rate": 4.969220851487857e-05, "loss": 0.6744, "step": 3900 }, { "epoch": 1.8140399814039982, "grad_norm": 4.227387428283691, "learning_rate": 4.9704372977757136e-05, "loss": 0.7541, "step": 3902 }, { "epoch": 1.8149697814969783, "grad_norm": 4.562478542327881, "learning_rate": 4.9716293618447985e-05, "loss": 0.7055, "step": 3904 }, { "epoch": 1.8158995815899581, "grad_norm": 5.370102405548096, "learning_rate": 4.972797031929918e-05, "loss": 0.8911, "step": 3906 }, { "epoch": 1.8168293816829382, "grad_norm": 4.11726713180542, "learning_rate": 4.973940296506642e-05, "loss": 0.7305, "step": 3908 }, { "epoch": 1.8177591817759182, "grad_norm": 5.133968353271484, "learning_rate": 4.97505914429141e-05, "loss": 0.7974, "step": 3910 }, { "epoch": 1.818688981868898, "grad_norm": 3.8824622631073, "learning_rate": 4.9761535642416435e-05, "loss": 0.6732, "step": 3912 }, { "epoch": 1.819618781961878, "grad_norm": 4.444409370422363, "learning_rate": 4.977223545555863e-05, "loss": 0.7028, "step": 3914 }, { "epoch": 1.8205485820548581, "grad_norm": 4.324506759643555, "learning_rate": 4.978269077673783e-05, "loss": 0.8532, "step": 3916 }, { "epoch": 1.8214783821478382, "grad_norm": 4.507174015045166, "learning_rate": 4.979290150276424e-05, "loss": 0.7412, "step": 3918 }, { "epoch": 1.8224081822408182, "grad_norm": 4.394320964813232, "learning_rate": 4.980286753286211e-05, "loss": 0.74, "step": 3920 }, { "epoch": 1.8233379823337983, "grad_norm": 4.442283630371094, "learning_rate": 4.981258876867076e-05, "loss": 0.6583, "step": 3922 }, { "epoch": 1.8242677824267783, "grad_norm": 4.749024868011475, "learning_rate": 4.98220651142455e-05, "loss": 0.7277, "step": 3924 }, { "epoch": 1.8251975825197584, "grad_norm": 4.18754768371582, "learning_rate": 4.9831296476058646e-05, "loss": 0.7417, "step": 3926 }, { "epoch": 1.8261273826127382, "grad_norm": 4.283425807952881, "learning_rate": 4.9840282763000376e-05, "loss": 0.6627, "step": 3928 }, { "epoch": 1.8270571827057183, "grad_norm": 3.789283275604248, "learning_rate": 4.984902388637965e-05, "loss": 0.7152, "step": 3930 }, { "epoch": 1.8279869827986983, "grad_norm": 4.000759601593018, "learning_rate": 4.985751975992514e-05, "loss": 0.6535, "step": 3932 }, { "epoch": 1.8289167828916781, "grad_norm": 4.500207901000977, "learning_rate": 4.986577029978598e-05, "loss": 0.8179, "step": 3934 }, { "epoch": 1.8298465829846582, "grad_norm": 3.7360851764678955, "learning_rate": 4.987377542453268e-05, "loss": 0.7036, "step": 3936 }, { "epoch": 1.8307763830776382, "grad_norm": 3.658295154571533, "learning_rate": 4.988153505515789e-05, "loss": 0.7534, "step": 3938 }, { "epoch": 1.8317061831706183, "grad_norm": 4.002298831939697, "learning_rate": 4.988904911507717e-05, "loss": 0.6499, "step": 3940 }, { "epoch": 1.8326359832635983, "grad_norm": 4.553714275360107, "learning_rate": 4.9896317530129805e-05, "loss": 0.7076, "step": 3942 }, { "epoch": 1.8335657833565784, "grad_norm": 4.585302829742432, "learning_rate": 4.990334022857948e-05, "loss": 0.6222, "step": 3944 }, { "epoch": 1.8344955834495584, "grad_norm": 4.794110298156738, "learning_rate": 4.991011714111498e-05, "loss": 0.6369, "step": 3946 }, { "epoch": 1.8354253835425385, "grad_norm": 4.696752071380615, "learning_rate": 4.991664820085092e-05, "loss": 0.6256, "step": 3948 }, { "epoch": 1.8363551836355185, "grad_norm": 3.7933082580566406, "learning_rate": 4.992293334332838e-05, "loss": 0.6996, "step": 3950 }, { "epoch": 1.8372849837284984, "grad_norm": 4.691423416137695, "learning_rate": 4.992897250651552e-05, "loss": 0.7482, "step": 3952 }, { "epoch": 1.8382147838214784, "grad_norm": 4.6434783935546875, "learning_rate": 4.9934765630808264e-05, "loss": 0.8521, "step": 3954 }, { "epoch": 1.8391445839144582, "grad_norm": 4.610189437866211, "learning_rate": 4.99403126590308e-05, "loss": 0.6479, "step": 3956 }, { "epoch": 1.8400743840074383, "grad_norm": 4.2179274559021, "learning_rate": 4.9945613536436205e-05, "loss": 0.5811, "step": 3958 }, { "epoch": 1.8410041841004183, "grad_norm": 3.6567788124084473, "learning_rate": 4.9950668210706956e-05, "loss": 0.7005, "step": 3960 }, { "epoch": 1.8419339841933984, "grad_norm": 3.888202428817749, "learning_rate": 4.995547663195546e-05, "loss": 0.5737, "step": 3962 }, { "epoch": 1.8428637842863784, "grad_norm": 4.7735724449157715, "learning_rate": 4.996003875272455e-05, "loss": 0.7688, "step": 3964 }, { "epoch": 1.8437935843793585, "grad_norm": 4.3484392166137695, "learning_rate": 4.996435452798792e-05, "loss": 0.5944, "step": 3966 }, { "epoch": 1.8447233844723385, "grad_norm": 3.8754642009735107, "learning_rate": 4.9968423915150616e-05, "loss": 0.6654, "step": 3968 }, { "epoch": 1.8456531845653186, "grad_norm": 4.548868179321289, "learning_rate": 4.9972246874049424e-05, "loss": 0.7207, "step": 3970 }, { "epoch": 1.8465829846582986, "grad_norm": 4.5030670166015625, "learning_rate": 4.9975823366953294e-05, "loss": 0.615, "step": 3972 }, { "epoch": 1.8475127847512784, "grad_norm": 4.409163475036621, "learning_rate": 4.997915335856367e-05, "loss": 0.6394, "step": 3974 }, { "epoch": 1.8484425848442585, "grad_norm": 4.069077491760254, "learning_rate": 4.99822368160149e-05, "loss": 0.6349, "step": 3976 }, { "epoch": 1.8493723849372385, "grad_norm": 4.703945636749268, "learning_rate": 4.99850737088745e-05, "loss": 0.6914, "step": 3978 }, { "epoch": 1.8503021850302184, "grad_norm": 3.819704532623291, "learning_rate": 4.998766400914347e-05, "loss": 0.7075, "step": 3980 }, { "epoch": 1.8512319851231984, "grad_norm": 3.5208377838134766, "learning_rate": 4.9990007691256595e-05, "loss": 0.6731, "step": 3982 }, { "epoch": 1.8521617852161785, "grad_norm": 4.044941425323486, "learning_rate": 4.9992104732082685e-05, "loss": 0.7091, "step": 3984 }, { "epoch": 1.8530915853091585, "grad_norm": 5.0902485847473145, "learning_rate": 4.999395511092479e-05, "loss": 0.7885, "step": 3986 }, { "epoch": 1.8540213854021386, "grad_norm": 3.9330146312713623, "learning_rate": 4.999555880952042e-05, "loss": 0.5949, "step": 3988 }, { "epoch": 1.8549511854951186, "grad_norm": 4.469142913818359, "learning_rate": 4.9996915812041715e-05, "loss": 0.6921, "step": 3990 }, { "epoch": 1.8558809855880987, "grad_norm": 4.257002353668213, "learning_rate": 4.99980261050956e-05, "loss": 0.583, "step": 3992 }, { "epoch": 1.8568107856810787, "grad_norm": 4.753361701965332, "learning_rate": 4.999888967772394e-05, "loss": 0.6843, "step": 3994 }, { "epoch": 1.8577405857740585, "grad_norm": 4.171480178833008, "learning_rate": 4.999950652140362e-05, "loss": 0.7581, "step": 3996 }, { "epoch": 1.8586703858670386, "grad_norm": 4.867319583892822, "learning_rate": 4.999987663004666e-05, "loss": 0.6929, "step": 3998 }, { "epoch": 1.8596001859600186, "grad_norm": 4.304727554321289, "learning_rate": 5.000000000000021e-05, "loss": 0.6402, "step": 4000 }, { "epoch": 1.8596001859600186, "eval_cer": 0.4301928897552968, "eval_loss": 0.7232626080513, "eval_runtime": 392.7033, "eval_samples_per_second": 32.325, "eval_steps_per_second": 1.011, "step": 4000 }, { "epoch": 1.8605299860529985, "grad_norm": 6.515676975250244, "learning_rate": 4.999987663004667e-05, "loss": 0.6756, "step": 4002 }, { "epoch": 1.8614597861459785, "grad_norm": 5.525432586669922, "learning_rate": 4.999950652140364e-05, "loss": 0.8766, "step": 4004 }, { "epoch": 1.8623895862389586, "grad_norm": 4.31777286529541, "learning_rate": 4.999888967772396e-05, "loss": 0.6593, "step": 4006 }, { "epoch": 1.8633193863319386, "grad_norm": 3.902731418609619, "learning_rate": 4.999802610509562e-05, "loss": 0.5859, "step": 4008 }, { "epoch": 1.8642491864249187, "grad_norm": 3.7434067726135254, "learning_rate": 4.9996915812041735e-05, "loss": 0.6703, "step": 4010 }, { "epoch": 1.8651789865178987, "grad_norm": 4.588319301605225, "learning_rate": 4.9995558809520444e-05, "loss": 0.6341, "step": 4012 }, { "epoch": 1.8661087866108788, "grad_norm": 4.119388103485107, "learning_rate": 4.9993955110924826e-05, "loss": 0.8025, "step": 4014 }, { "epoch": 1.8670385867038588, "grad_norm": 4.333853244781494, "learning_rate": 4.999210473208272e-05, "loss": 0.6331, "step": 4016 }, { "epoch": 1.8679683867968386, "grad_norm": 4.914608955383301, "learning_rate": 4.999000769125662e-05, "loss": 0.7366, "step": 4018 }, { "epoch": 1.8688981868898187, "grad_norm": 4.231735706329346, "learning_rate": 4.99876640091435e-05, "loss": 0.6457, "step": 4020 }, { "epoch": 1.8698279869827987, "grad_norm": 3.9431354999542236, "learning_rate": 4.998507370887453e-05, "loss": 0.7253, "step": 4022 }, { "epoch": 1.8707577870757786, "grad_norm": 4.500439643859863, "learning_rate": 4.998223681601494e-05, "loss": 0.7796, "step": 4024 }, { "epoch": 1.8716875871687586, "grad_norm": 3.575521469116211, "learning_rate": 4.997915335856371e-05, "loss": 0.584, "step": 4026 }, { "epoch": 1.8726173872617387, "grad_norm": 4.259825229644775, "learning_rate": 4.997582336695333e-05, "loss": 0.6497, "step": 4028 }, { "epoch": 1.8735471873547187, "grad_norm": 4.181138515472412, "learning_rate": 4.9972246874049464e-05, "loss": 0.525, "step": 4030 }, { "epoch": 1.8744769874476988, "grad_norm": 3.7120015621185303, "learning_rate": 4.996842391515065e-05, "loss": 0.5873, "step": 4032 }, { "epoch": 1.8754067875406788, "grad_norm": 4.5097222328186035, "learning_rate": 4.996435452798795e-05, "loss": 0.7514, "step": 4034 }, { "epoch": 1.8763365876336588, "grad_norm": 4.941904544830322, "learning_rate": 4.9960038752724584e-05, "loss": 0.8239, "step": 4036 }, { "epoch": 1.877266387726639, "grad_norm": 4.688776016235352, "learning_rate": 4.99554766319555e-05, "loss": 0.6597, "step": 4038 }, { "epoch": 1.878196187819619, "grad_norm": 4.1838788986206055, "learning_rate": 4.995066821070699e-05, "loss": 0.5814, "step": 4040 }, { "epoch": 1.8791259879125988, "grad_norm": 4.028554916381836, "learning_rate": 4.9945613536436246e-05, "loss": 0.6199, "step": 4042 }, { "epoch": 1.8800557880055788, "grad_norm": 4.322054862976074, "learning_rate": 4.994031265903083e-05, "loss": 0.7675, "step": 4044 }, { "epoch": 1.8809855880985586, "grad_norm": 3.7601819038391113, "learning_rate": 4.99347656308083e-05, "loss": 0.6516, "step": 4046 }, { "epoch": 1.8819153881915387, "grad_norm": 3.3438475131988525, "learning_rate": 4.992897250651556e-05, "loss": 0.6423, "step": 4048 }, { "epoch": 1.8828451882845187, "grad_norm": 5.147468090057373, "learning_rate": 4.992293334332841e-05, "loss": 0.6654, "step": 4050 }, { "epoch": 1.8837749883774988, "grad_norm": 4.926107406616211, "learning_rate": 4.991664820085095e-05, "loss": 0.7754, "step": 4052 }, { "epoch": 1.8847047884704788, "grad_norm": 3.983346939086914, "learning_rate": 4.991011714111501e-05, "loss": 0.5809, "step": 4054 }, { "epoch": 1.885634588563459, "grad_norm": 3.9227046966552734, "learning_rate": 4.990334022857952e-05, "loss": 0.6416, "step": 4056 }, { "epoch": 1.886564388656439, "grad_norm": 4.945847034454346, "learning_rate": 4.989631753012985e-05, "loss": 0.768, "step": 4058 }, { "epoch": 1.887494188749419, "grad_norm": 4.014359951019287, "learning_rate": 4.988904911507721e-05, "loss": 0.5765, "step": 4060 }, { "epoch": 1.888423988842399, "grad_norm": 4.3151774406433105, "learning_rate": 4.988153505515792e-05, "loss": 0.6135, "step": 4062 }, { "epoch": 1.8893537889353789, "grad_norm": 4.116644382476807, "learning_rate": 4.987377542453271e-05, "loss": 0.7092, "step": 4064 }, { "epoch": 1.890283589028359, "grad_norm": 4.393803596496582, "learning_rate": 4.986577029978602e-05, "loss": 0.671, "step": 4066 }, { "epoch": 1.891213389121339, "grad_norm": 3.989750385284424, "learning_rate": 4.985751975992518e-05, "loss": 0.5758, "step": 4068 }, { "epoch": 1.8921431892143188, "grad_norm": 5.297903060913086, "learning_rate": 4.9849023886379694e-05, "loss": 0.7496, "step": 4070 }, { "epoch": 1.8930729893072988, "grad_norm": 4.830567359924316, "learning_rate": 4.98402827630004e-05, "loss": 0.641, "step": 4072 }, { "epoch": 1.8940027894002789, "grad_norm": 4.094538688659668, "learning_rate": 4.983129647605868e-05, "loss": 0.6097, "step": 4074 }, { "epoch": 1.894932589493259, "grad_norm": 3.9088656902313232, "learning_rate": 4.982206511424554e-05, "loss": 0.6873, "step": 4076 }, { "epoch": 1.895862389586239, "grad_norm": 3.536940097808838, "learning_rate": 4.98125887686708e-05, "loss": 0.6067, "step": 4078 }, { "epoch": 1.896792189679219, "grad_norm": 4.212688446044922, "learning_rate": 4.9802867532862146e-05, "loss": 0.5665, "step": 4080 }, { "epoch": 1.897721989772199, "grad_norm": 3.6789209842681885, "learning_rate": 4.979290150276427e-05, "loss": 0.5847, "step": 4082 }, { "epoch": 1.8986517898651791, "grad_norm": 3.3487274646759033, "learning_rate": 4.978269077673786e-05, "loss": 0.5124, "step": 4084 }, { "epoch": 1.899581589958159, "grad_norm": 4.485864639282227, "learning_rate": 4.977223545555866e-05, "loss": 0.7615, "step": 4086 }, { "epoch": 1.900511390051139, "grad_norm": 4.322275161743164, "learning_rate": 4.976153564241647e-05, "loss": 0.751, "step": 4088 }, { "epoch": 1.901441190144119, "grad_norm": 4.45305871963501, "learning_rate": 4.9750591442914124e-05, "loss": 0.7015, "step": 4090 }, { "epoch": 1.9023709902370989, "grad_norm": 3.995868444442749, "learning_rate": 4.973940296506646e-05, "loss": 0.6427, "step": 4092 }, { "epoch": 1.903300790330079, "grad_norm": 4.531580924987793, "learning_rate": 4.9727970319299234e-05, "loss": 0.8448, "step": 4094 }, { "epoch": 1.904230590423059, "grad_norm": 4.627540111541748, "learning_rate": 4.971629361844803e-05, "loss": 0.6644, "step": 4096 }, { "epoch": 1.905160390516039, "grad_norm": 5.059457302093506, "learning_rate": 4.97043729777572e-05, "loss": 0.6565, "step": 4098 }, { "epoch": 1.906090190609019, "grad_norm": 4.206478118896484, "learning_rate": 4.969220851487863e-05, "loss": 0.6273, "step": 4100 }, { "epoch": 1.9070199907019991, "grad_norm": 4.0236639976501465, "learning_rate": 4.967980034987067e-05, "loss": 0.6125, "step": 4102 }, { "epoch": 1.9079497907949792, "grad_norm": 4.346922397613525, "learning_rate": 4.9667148605196884e-05, "loss": 0.7311, "step": 4104 }, { "epoch": 1.9088795908879592, "grad_norm": 4.6915106773376465, "learning_rate": 4.965425340572491e-05, "loss": 0.8014, "step": 4106 }, { "epoch": 1.909809390980939, "grad_norm": 4.229055881500244, "learning_rate": 4.964111487872513e-05, "loss": 0.6448, "step": 4108 }, { "epoch": 1.910739191073919, "grad_norm": 3.804426670074463, "learning_rate": 4.962773315386952e-05, "loss": 0.5476, "step": 4110 }, { "epoch": 1.9116689911668991, "grad_norm": 4.047356605529785, "learning_rate": 4.961410836323032e-05, "loss": 0.6359, "step": 4112 }, { "epoch": 1.912598791259879, "grad_norm": 3.9024195671081543, "learning_rate": 4.960024064127867e-05, "loss": 0.6351, "step": 4114 }, { "epoch": 1.913528591352859, "grad_norm": 4.237554550170898, "learning_rate": 4.9586130124883426e-05, "loss": 0.6151, "step": 4116 }, { "epoch": 1.914458391445839, "grad_norm": 3.639256715774536, "learning_rate": 4.957177695330966e-05, "loss": 0.6117, "step": 4118 }, { "epoch": 1.9153881915388191, "grad_norm": 3.7447397708892822, "learning_rate": 4.9557181268217396e-05, "loss": 0.6208, "step": 4120 }, { "epoch": 1.9163179916317992, "grad_norm": 3.275486946105957, "learning_rate": 4.9542343213660164e-05, "loss": 0.6535, "step": 4122 }, { "epoch": 1.9172477917247792, "grad_norm": 3.4026176929473877, "learning_rate": 4.952726293608354e-05, "loss": 0.6125, "step": 4124 }, { "epoch": 1.9181775918177593, "grad_norm": 4.5546979904174805, "learning_rate": 4.95119405843238e-05, "loss": 0.6507, "step": 4126 }, { "epoch": 1.9191073919107393, "grad_norm": 3.14329195022583, "learning_rate": 4.9496376309606355e-05, "loss": 0.4986, "step": 4128 }, { "epoch": 1.9200371920037194, "grad_norm": 3.916020393371582, "learning_rate": 4.948057026554433e-05, "loss": 0.5822, "step": 4130 }, { "epoch": 1.9209669920966992, "grad_norm": 3.846733570098877, "learning_rate": 4.946452260813699e-05, "loss": 0.56, "step": 4132 }, { "epoch": 1.9218967921896792, "grad_norm": 4.008645534515381, "learning_rate": 4.944823349576823e-05, "loss": 0.597, "step": 4134 }, { "epoch": 1.922826592282659, "grad_norm": 3.8048291206359863, "learning_rate": 4.943170308920502e-05, "loss": 0.6414, "step": 4136 }, { "epoch": 1.9237563923756391, "grad_norm": 3.8368723392486572, "learning_rate": 4.941493155159581e-05, "loss": 0.5149, "step": 4138 }, { "epoch": 1.9246861924686192, "grad_norm": 4.177979469299316, "learning_rate": 4.9397919048468874e-05, "loss": 0.5702, "step": 4140 }, { "epoch": 1.9256159925615992, "grad_norm": 4.720351696014404, "learning_rate": 4.938066574773077e-05, "loss": 0.6731, "step": 4142 }, { "epoch": 1.9265457926545793, "grad_norm": 4.1437177658081055, "learning_rate": 4.9363171819664624e-05, "loss": 0.6612, "step": 4144 }, { "epoch": 1.9274755927475593, "grad_norm": 4.600264072418213, "learning_rate": 4.934543743692841e-05, "loss": 0.6535, "step": 4146 }, { "epoch": 1.9284053928405394, "grad_norm": 4.727096080780029, "learning_rate": 4.9327462774553355e-05, "loss": 0.6608, "step": 4148 }, { "epoch": 1.9293351929335194, "grad_norm": 4.372187614440918, "learning_rate": 4.930924800994211e-05, "loss": 0.5944, "step": 4150 }, { "epoch": 1.9302649930264995, "grad_norm": 5.072577476501465, "learning_rate": 4.9290793322867045e-05, "loss": 0.6502, "step": 4152 }, { "epoch": 1.9311947931194793, "grad_norm": 3.8912971019744873, "learning_rate": 4.927209889546847e-05, "loss": 0.5978, "step": 4154 }, { "epoch": 1.9321245932124593, "grad_norm": 4.457938194274902, "learning_rate": 4.925316491225284e-05, "loss": 0.7052, "step": 4156 }, { "epoch": 1.9330543933054394, "grad_norm": 4.35558557510376, "learning_rate": 4.923399156009092e-05, "loss": 0.6607, "step": 4158 }, { "epoch": 1.9339841933984192, "grad_norm": 4.3423380851745605, "learning_rate": 4.921457902821597e-05, "loss": 0.7144, "step": 4160 }, { "epoch": 1.9349139934913993, "grad_norm": 3.3653414249420166, "learning_rate": 4.919492750822182e-05, "loss": 0.5846, "step": 4162 }, { "epoch": 1.9358437935843793, "grad_norm": 3.443279504776001, "learning_rate": 4.917503719406106e-05, "loss": 0.5688, "step": 4164 }, { "epoch": 1.9367735936773594, "grad_norm": 4.136734962463379, "learning_rate": 4.915490828204305e-05, "loss": 0.6299, "step": 4166 }, { "epoch": 1.9377033937703394, "grad_norm": 4.418069362640381, "learning_rate": 4.913454097083203e-05, "loss": 0.6885, "step": 4168 }, { "epoch": 1.9386331938633194, "grad_norm": 4.8063249588012695, "learning_rate": 4.911393546144514e-05, "loss": 0.6787, "step": 4170 }, { "epoch": 1.9395629939562995, "grad_norm": 3.995640993118286, "learning_rate": 4.9093091957250434e-05, "loss": 0.6051, "step": 4172 }, { "epoch": 1.9404927940492795, "grad_norm": 5.736366271972656, "learning_rate": 4.907201066396488e-05, "loss": 0.6326, "step": 4174 }, { "epoch": 1.9414225941422594, "grad_norm": 3.940152168273926, "learning_rate": 4.905069178965234e-05, "loss": 0.6136, "step": 4176 }, { "epoch": 1.9423523942352394, "grad_norm": 4.076253890991211, "learning_rate": 4.90291355447215e-05, "loss": 0.7067, "step": 4178 }, { "epoch": 1.9432821943282195, "grad_norm": 3.954338312149048, "learning_rate": 4.9007342141923774e-05, "loss": 0.6212, "step": 4180 }, { "epoch": 1.9442119944211993, "grad_norm": 4.093294143676758, "learning_rate": 4.898531179635127e-05, "loss": 0.5566, "step": 4182 }, { "epoch": 1.9451417945141793, "grad_norm": 2.8570964336395264, "learning_rate": 4.896304472543459e-05, "loss": 0.5842, "step": 4184 }, { "epoch": 1.9460715946071594, "grad_norm": 4.350346088409424, "learning_rate": 4.894054114894074e-05, "loss": 0.7271, "step": 4186 }, { "epoch": 1.9470013947001394, "grad_norm": 3.3849568367004395, "learning_rate": 4.891780128897096e-05, "loss": 0.5426, "step": 4188 }, { "epoch": 1.9479311947931195, "grad_norm": 4.201946258544922, "learning_rate": 4.889482536995844e-05, "loss": 0.648, "step": 4190 }, { "epoch": 1.9488609948860995, "grad_norm": 4.303542137145996, "learning_rate": 4.887161361866626e-05, "loss": 0.6579, "step": 4192 }, { "epoch": 1.9497907949790796, "grad_norm": 4.644157409667969, "learning_rate": 4.8848166264185034e-05, "loss": 0.6981, "step": 4194 }, { "epoch": 1.9507205950720596, "grad_norm": 4.041478633880615, "learning_rate": 4.8824483537930655e-05, "loss": 0.4843, "step": 4196 }, { "epoch": 1.9516503951650395, "grad_norm": 4.509360313415527, "learning_rate": 4.880056567364212e-05, "loss": 0.748, "step": 4198 }, { "epoch": 1.9525801952580195, "grad_norm": 4.84096622467041, "learning_rate": 4.8776412907379026e-05, "loss": 0.7055, "step": 4200 }, { "epoch": 1.9535099953509996, "grad_norm": 4.175709247589111, "learning_rate": 4.8752025477519485e-05, "loss": 0.6486, "step": 4202 }, { "epoch": 1.9544397954439794, "grad_norm": 3.9075331687927246, "learning_rate": 4.8727403624757555e-05, "loss": 0.5701, "step": 4204 }, { "epoch": 1.9553695955369594, "grad_norm": 3.7550129890441895, "learning_rate": 4.8702547592100986e-05, "loss": 0.5433, "step": 4206 }, { "epoch": 1.9562993956299395, "grad_norm": 4.240418434143066, "learning_rate": 4.86774576248688e-05, "loss": 0.608, "step": 4208 }, { "epoch": 1.9572291957229195, "grad_norm": 4.150586128234863, "learning_rate": 4.865213397068883e-05, "loss": 0.763, "step": 4210 }, { "epoch": 1.9581589958158996, "grad_norm": 4.065403938293457, "learning_rate": 4.8626576879495316e-05, "loss": 0.6162, "step": 4212 }, { "epoch": 1.9590887959088796, "grad_norm": 4.199869155883789, "learning_rate": 4.860078660352644e-05, "loss": 0.6867, "step": 4214 }, { "epoch": 1.9600185960018597, "grad_norm": 3.871168851852417, "learning_rate": 4.8574763397321804e-05, "loss": 0.5928, "step": 4216 }, { "epoch": 1.9609483960948397, "grad_norm": 2.963146448135376, "learning_rate": 4.8548507517719965e-05, "loss": 0.4969, "step": 4218 }, { "epoch": 1.9618781961878198, "grad_norm": 4.055029392242432, "learning_rate": 4.852201922385584e-05, "loss": 0.5225, "step": 4220 }, { "epoch": 1.9628079962807996, "grad_norm": 3.708958387374878, "learning_rate": 4.8495298777158195e-05, "loss": 0.5627, "step": 4222 }, { "epoch": 1.9637377963737797, "grad_norm": 3.2326862812042236, "learning_rate": 4.846834644134705e-05, "loss": 0.5753, "step": 4224 }, { "epoch": 1.9646675964667595, "grad_norm": 3.879624128341675, "learning_rate": 4.8441162482431094e-05, "loss": 0.5811, "step": 4226 }, { "epoch": 1.9655973965597395, "grad_norm": 4.214951038360596, "learning_rate": 4.8413747168705004e-05, "loss": 0.5651, "step": 4228 }, { "epoch": 1.9665271966527196, "grad_norm": 4.5904130935668945, "learning_rate": 4.8386100770746864e-05, "loss": 0.6214, "step": 4230 }, { "epoch": 1.9674569967456996, "grad_norm": 4.590631484985352, "learning_rate": 4.8358223561415494e-05, "loss": 0.6698, "step": 4232 }, { "epoch": 1.9683867968386797, "grad_norm": 3.5514581203460693, "learning_rate": 4.833011581584765e-05, "loss": 0.6815, "step": 4234 }, { "epoch": 1.9693165969316597, "grad_norm": 5.107456207275391, "learning_rate": 4.830177781145546e-05, "loss": 0.6937, "step": 4236 }, { "epoch": 1.9702463970246398, "grad_norm": 4.401675224304199, "learning_rate": 4.8273209827923574e-05, "loss": 0.6962, "step": 4238 }, { "epoch": 1.9711761971176198, "grad_norm": 4.839224815368652, "learning_rate": 4.824441214720646e-05, "loss": 0.6196, "step": 4240 }, { "epoch": 1.9721059972105999, "grad_norm": 4.698374271392822, "learning_rate": 4.821538505352562e-05, "loss": 0.6762, "step": 4242 }, { "epoch": 1.9730357973035797, "grad_norm": 3.7175474166870117, "learning_rate": 4.818612883336672e-05, "loss": 0.5229, "step": 4244 }, { "epoch": 1.9739655973965597, "grad_norm": 4.3499321937561035, "learning_rate": 4.815664377547686e-05, "loss": 0.6103, "step": 4246 }, { "epoch": 1.9748953974895398, "grad_norm": 3.5836880207061768, "learning_rate": 4.8126930170861653e-05, "loss": 0.5767, "step": 4248 }, { "epoch": 1.9758251975825196, "grad_norm": 3.733567953109741, "learning_rate": 4.809698831278236e-05, "loss": 0.5381, "step": 4250 }, { "epoch": 1.9767549976754997, "grad_norm": 3.7311301231384277, "learning_rate": 4.8066818496753064e-05, "loss": 0.5753, "step": 4252 }, { "epoch": 1.9776847977684797, "grad_norm": 4.563572883605957, "learning_rate": 4.803642102053765e-05, "loss": 0.4728, "step": 4254 }, { "epoch": 1.9786145978614598, "grad_norm": 3.9088501930236816, "learning_rate": 4.800579618414696e-05, "loss": 0.6524, "step": 4256 }, { "epoch": 1.9795443979544398, "grad_norm": 3.7347536087036133, "learning_rate": 4.797494428983572e-05, "loss": 0.6839, "step": 4258 }, { "epoch": 1.9804741980474199, "grad_norm": 4.226534366607666, "learning_rate": 4.794386564209973e-05, "loss": 0.6781, "step": 4260 }, { "epoch": 1.9814039981404, "grad_norm": 3.482175588607788, "learning_rate": 4.791256054767264e-05, "loss": 0.6276, "step": 4262 }, { "epoch": 1.98233379823338, "grad_norm": 4.290441989898682, "learning_rate": 4.788102931552313e-05, "loss": 0.6607, "step": 4264 }, { "epoch": 1.9832635983263598, "grad_norm": 3.6641130447387695, "learning_rate": 4.7849272256851736e-05, "loss": 0.5858, "step": 4266 }, { "epoch": 1.9841933984193398, "grad_norm": 3.5325512886047363, "learning_rate": 4.7817289685087766e-05, "loss": 0.5653, "step": 4268 }, { "epoch": 1.9851231985123199, "grad_norm": 3.71095609664917, "learning_rate": 4.778508191588633e-05, "loss": 0.4719, "step": 4270 }, { "epoch": 1.9860529986052997, "grad_norm": 4.316329002380371, "learning_rate": 4.775264926712508e-05, "loss": 0.5677, "step": 4272 }, { "epoch": 1.9869827986982798, "grad_norm": 4.290351390838623, "learning_rate": 4.7719992058901196e-05, "loss": 0.5904, "step": 4274 }, { "epoch": 1.9879125987912598, "grad_norm": 3.694770097732544, "learning_rate": 4.7687110613528116e-05, "loss": 0.6547, "step": 4276 }, { "epoch": 1.9888423988842399, "grad_norm": 4.455708980560303, "learning_rate": 4.765400525553244e-05, "loss": 0.6527, "step": 4278 }, { "epoch": 1.98977219897722, "grad_norm": 3.8699700832366943, "learning_rate": 4.7620676311650686e-05, "loss": 0.5675, "step": 4280 }, { "epoch": 1.9907019990702, "grad_norm": 4.623018264770508, "learning_rate": 4.758712411082607e-05, "loss": 0.6484, "step": 4282 }, { "epoch": 1.99163179916318, "grad_norm": 3.787188768386841, "learning_rate": 4.755334898420528e-05, "loss": 0.6177, "step": 4284 }, { "epoch": 1.99256159925616, "grad_norm": 4.098361968994141, "learning_rate": 4.751935126513516e-05, "loss": 0.6137, "step": 4286 }, { "epoch": 1.9934913993491399, "grad_norm": 3.827043056488037, "learning_rate": 4.748513128915947e-05, "loss": 0.6698, "step": 4288 }, { "epoch": 1.99442119944212, "grad_norm": 4.3220672607421875, "learning_rate": 4.745068939401559e-05, "loss": 0.6946, "step": 4290 }, { "epoch": 1.9953509995351, "grad_norm": 3.5671627521514893, "learning_rate": 4.7416025919631094e-05, "loss": 0.5744, "step": 4292 }, { "epoch": 1.9962807996280798, "grad_norm": 3.8899106979370117, "learning_rate": 4.738114120812049e-05, "loss": 0.4648, "step": 4294 }, { "epoch": 1.9972105997210599, "grad_norm": 4.24798583984375, "learning_rate": 4.734603560378179e-05, "loss": 0.6589, "step": 4296 }, { "epoch": 1.99814039981404, "grad_norm": 3.329463005065918, "learning_rate": 4.731070945309315e-05, "loss": 0.4614, "step": 4298 }, { "epoch": 1.99907019990702, "grad_norm": 3.1626124382019043, "learning_rate": 4.727516310470938e-05, "loss": 0.694, "step": 4300 }, { "epoch": 2.0, "grad_norm": 5.198307514190674, "learning_rate": 4.723939690945863e-05, "loss": 0.6389, "step": 4302 }, { "epoch": 2.00092980009298, "grad_norm": 3.775944232940674, "learning_rate": 4.72034112203388e-05, "loss": 0.4817, "step": 4304 }, { "epoch": 2.00185960018596, "grad_norm": 4.571168899536133, "learning_rate": 4.7167206392514086e-05, "loss": 0.4732, "step": 4306 }, { "epoch": 2.00278940027894, "grad_norm": 4.0107622146606445, "learning_rate": 4.7130782783311563e-05, "loss": 0.5215, "step": 4308 }, { "epoch": 2.00371920037192, "grad_norm": 4.016903877258301, "learning_rate": 4.709414075221751e-05, "loss": 0.4737, "step": 4310 }, { "epoch": 2.0046490004649002, "grad_norm": 2.931978940963745, "learning_rate": 4.7057280660874e-05, "loss": 0.4652, "step": 4312 }, { "epoch": 2.00557880055788, "grad_norm": 3.8469672203063965, "learning_rate": 4.7020202873075266e-05, "loss": 0.4227, "step": 4314 }, { "epoch": 2.00650860065086, "grad_norm": 3.9644534587860107, "learning_rate": 4.698290775476407e-05, "loss": 0.5201, "step": 4316 }, { "epoch": 2.00743840074384, "grad_norm": 4.532963752746582, "learning_rate": 4.694539567402821e-05, "loss": 0.5445, "step": 4318 }, { "epoch": 2.00836820083682, "grad_norm": 4.009785175323486, "learning_rate": 4.690766700109676e-05, "loss": 0.5065, "step": 4320 }, { "epoch": 2.0092980009298, "grad_norm": 4.089921951293945, "learning_rate": 4.686972210833649e-05, "loss": 0.54, "step": 4322 }, { "epoch": 2.01022780102278, "grad_norm": 4.064752578735352, "learning_rate": 4.6831561370248165e-05, "loss": 0.4613, "step": 4324 }, { "epoch": 2.01115760111576, "grad_norm": 3.5423951148986816, "learning_rate": 4.679318516346288e-05, "loss": 0.5068, "step": 4326 }, { "epoch": 2.01208740120874, "grad_norm": 3.8985681533813477, "learning_rate": 4.6754593866738304e-05, "loss": 0.5248, "step": 4328 }, { "epoch": 2.0130172013017202, "grad_norm": 4.009593963623047, "learning_rate": 4.6715787860954945e-05, "loss": 0.4963, "step": 4330 }, { "epoch": 2.0139470013947003, "grad_norm": 3.4723758697509766, "learning_rate": 4.667676752911242e-05, "loss": 0.4902, "step": 4332 }, { "epoch": 2.0148768014876803, "grad_norm": 3.6109511852264404, "learning_rate": 4.663753325632564e-05, "loss": 0.5406, "step": 4334 }, { "epoch": 2.01580660158066, "grad_norm": 3.66243577003479, "learning_rate": 4.6598085429821044e-05, "loss": 0.5249, "step": 4336 }, { "epoch": 2.01673640167364, "grad_norm": 3.6514604091644287, "learning_rate": 4.655842443893277e-05, "loss": 0.5143, "step": 4338 }, { "epoch": 2.01766620176662, "grad_norm": 3.925161838531494, "learning_rate": 4.651855067509875e-05, "loss": 0.4553, "step": 4340 }, { "epoch": 2.0185960018596, "grad_norm": 3.5586907863616943, "learning_rate": 4.647846453185698e-05, "loss": 0.5573, "step": 4342 }, { "epoch": 2.01952580195258, "grad_norm": 3.721860408782959, "learning_rate": 4.643816640484146e-05, "loss": 0.4895, "step": 4344 }, { "epoch": 2.02045560204556, "grad_norm": 4.294020175933838, "learning_rate": 4.6397656691778494e-05, "loss": 0.5842, "step": 4346 }, { "epoch": 2.0213854021385402, "grad_norm": 4.269245147705078, "learning_rate": 4.635693579248255e-05, "loss": 0.5404, "step": 4348 }, { "epoch": 2.0223152022315203, "grad_norm": 3.7907614707946777, "learning_rate": 4.631600410885246e-05, "loss": 0.5033, "step": 4350 }, { "epoch": 2.0232450023245003, "grad_norm": 3.8364109992980957, "learning_rate": 4.6274862044867446e-05, "loss": 0.523, "step": 4352 }, { "epoch": 2.0241748024174804, "grad_norm": 3.3390350341796875, "learning_rate": 4.623351000658306e-05, "loss": 0.4546, "step": 4354 }, { "epoch": 2.0251046025104604, "grad_norm": 2.950497627258301, "learning_rate": 4.619194840212723e-05, "loss": 0.4752, "step": 4356 }, { "epoch": 2.0260344026034405, "grad_norm": 3.4001598358154297, "learning_rate": 4.6150177641696204e-05, "loss": 0.4602, "step": 4358 }, { "epoch": 2.02696420269642, "grad_norm": 3.522695779800415, "learning_rate": 4.6108198137550515e-05, "loss": 0.4377, "step": 4360 }, { "epoch": 2.0278940027894, "grad_norm": 3.0883140563964844, "learning_rate": 4.606601030401094e-05, "loss": 0.5857, "step": 4362 }, { "epoch": 2.02882380288238, "grad_norm": 4.170331954956055, "learning_rate": 4.602361455745436e-05, "loss": 0.4856, "step": 4364 }, { "epoch": 2.0297536029753602, "grad_norm": 4.476413726806641, "learning_rate": 4.5981011316309684e-05, "loss": 0.6378, "step": 4366 }, { "epoch": 2.0306834030683403, "grad_norm": 2.5626158714294434, "learning_rate": 4.593820100105368e-05, "loss": 0.3503, "step": 4368 }, { "epoch": 2.0316132031613203, "grad_norm": 4.257845401763916, "learning_rate": 4.589518403420691e-05, "loss": 0.5214, "step": 4370 }, { "epoch": 2.0325430032543004, "grad_norm": 3.1160905361175537, "learning_rate": 4.585196084032941e-05, "loss": 0.4266, "step": 4372 }, { "epoch": 2.0334728033472804, "grad_norm": 3.389256238937378, "learning_rate": 4.580853184601671e-05, "loss": 0.5076, "step": 4374 }, { "epoch": 2.0344026034402605, "grad_norm": 4.0000481605529785, "learning_rate": 4.5764897479895466e-05, "loss": 0.5284, "step": 4376 }, { "epoch": 2.0353324035332405, "grad_norm": 3.0324342250823975, "learning_rate": 4.572105817261918e-05, "loss": 0.5362, "step": 4378 }, { "epoch": 2.0362622036262206, "grad_norm": 3.7401392459869385, "learning_rate": 4.567701435686419e-05, "loss": 0.4209, "step": 4380 }, { "epoch": 2.0371920037192, "grad_norm": 4.357623100280762, "learning_rate": 4.563276646732513e-05, "loss": 0.5207, "step": 4382 }, { "epoch": 2.03812180381218, "grad_norm": 3.168426990509033, "learning_rate": 4.5588314940710824e-05, "loss": 0.5427, "step": 4384 }, { "epoch": 2.0390516039051603, "grad_norm": 3.066565990447998, "learning_rate": 4.554366021573991e-05, "loss": 0.4793, "step": 4386 }, { "epoch": 2.0399814039981403, "grad_norm": 2.9502246379852295, "learning_rate": 4.549880273313645e-05, "loss": 0.403, "step": 4388 }, { "epoch": 2.0409112040911204, "grad_norm": 3.26049542427063, "learning_rate": 4.5453742935625726e-05, "loss": 0.4983, "step": 4390 }, { "epoch": 2.0418410041841004, "grad_norm": 3.8096601963043213, "learning_rate": 4.540848126792974e-05, "loss": 0.5275, "step": 4392 }, { "epoch": 2.0427708042770805, "grad_norm": 3.47080659866333, "learning_rate": 4.536301817676288e-05, "loss": 0.4917, "step": 4394 }, { "epoch": 2.0437006043700605, "grad_norm": 3.4805452823638916, "learning_rate": 4.531735411082748e-05, "loss": 0.5008, "step": 4396 }, { "epoch": 2.0446304044630406, "grad_norm": 3.1165823936462402, "learning_rate": 4.527148952080947e-05, "loss": 0.4903, "step": 4398 }, { "epoch": 2.0455602045560206, "grad_norm": 3.6423683166503906, "learning_rate": 4.522542485937381e-05, "loss": 0.611, "step": 4400 }, { "epoch": 2.0464900046490007, "grad_norm": 3.241551637649536, "learning_rate": 4.517916058116015e-05, "loss": 0.4014, "step": 4402 }, { "epoch": 2.0474198047419803, "grad_norm": 4.489548683166504, "learning_rate": 4.5132697142778185e-05, "loss": 0.526, "step": 4404 }, { "epoch": 2.0483496048349603, "grad_norm": 3.611036539077759, "learning_rate": 4.508603500280333e-05, "loss": 0.4488, "step": 4406 }, { "epoch": 2.0492794049279404, "grad_norm": 3.28324818611145, "learning_rate": 4.503917462177205e-05, "loss": 0.484, "step": 4408 }, { "epoch": 2.0502092050209204, "grad_norm": 2.9598066806793213, "learning_rate": 4.499211646217741e-05, "loss": 0.4824, "step": 4410 }, { "epoch": 2.0511390051139005, "grad_norm": 3.666255235671997, "learning_rate": 4.494486098846441e-05, "loss": 0.5456, "step": 4412 }, { "epoch": 2.0520688052068805, "grad_norm": 3.6429975032806396, "learning_rate": 4.4897408667025554e-05, "loss": 0.4271, "step": 4414 }, { "epoch": 2.0529986052998606, "grad_norm": 3.3538479804992676, "learning_rate": 4.4849759966196036e-05, "loss": 0.5261, "step": 4416 }, { "epoch": 2.0539284053928406, "grad_norm": 4.124536037445068, "learning_rate": 4.480191535624933e-05, "loss": 0.5375, "step": 4418 }, { "epoch": 2.0548582054858207, "grad_norm": 4.9720048904418945, "learning_rate": 4.4753875309392415e-05, "loss": 0.4933, "step": 4420 }, { "epoch": 2.0557880055788007, "grad_norm": 3.7404863834381104, "learning_rate": 4.470564029976114e-05, "loss": 0.4677, "step": 4422 }, { "epoch": 2.0567178056717808, "grad_norm": 3.1777331829071045, "learning_rate": 4.465721080341563e-05, "loss": 0.4734, "step": 4424 }, { "epoch": 2.0576476057647604, "grad_norm": 3.3637664318084717, "learning_rate": 4.46085872983354e-05, "loss": 0.5271, "step": 4426 }, { "epoch": 2.0585774058577404, "grad_norm": 3.4867875576019287, "learning_rate": 4.455977026441486e-05, "loss": 0.5614, "step": 4428 }, { "epoch": 2.0595072059507205, "grad_norm": 4.306983470916748, "learning_rate": 4.45107601834584e-05, "loss": 0.5076, "step": 4430 }, { "epoch": 2.0604370060437005, "grad_norm": 3.834690570831299, "learning_rate": 4.4461557539175736e-05, "loss": 0.5613, "step": 4432 }, { "epoch": 2.0613668061366806, "grad_norm": 3.617784261703491, "learning_rate": 4.4412162817177114e-05, "loss": 0.4548, "step": 4434 }, { "epoch": 2.0622966062296606, "grad_norm": 3.579624891281128, "learning_rate": 4.43625765049685e-05, "loss": 0.4841, "step": 4436 }, { "epoch": 2.0632264063226406, "grad_norm": 3.3913283348083496, "learning_rate": 4.431279909194677e-05, "loss": 0.4442, "step": 4438 }, { "epoch": 2.0641562064156207, "grad_norm": 2.992852210998535, "learning_rate": 4.426283106939488e-05, "loss": 0.4093, "step": 4440 }, { "epoch": 2.0650860065086007, "grad_norm": 4.011861801147461, "learning_rate": 4.421267293047708e-05, "loss": 0.471, "step": 4442 }, { "epoch": 2.066015806601581, "grad_norm": 4.153837203979492, "learning_rate": 4.416232517023391e-05, "loss": 0.6303, "step": 4444 }, { "epoch": 2.066945606694561, "grad_norm": 2.837022066116333, "learning_rate": 4.411178828557744e-05, "loss": 0.4139, "step": 4446 }, { "epoch": 2.067875406787541, "grad_norm": 3.799687623977661, "learning_rate": 4.406106277528636e-05, "loss": 0.4541, "step": 4448 }, { "epoch": 2.0688052068805205, "grad_norm": 2.804683208465576, "learning_rate": 4.401014914000093e-05, "loss": 0.5052, "step": 4450 }, { "epoch": 2.0697350069735005, "grad_norm": 3.4617092609405518, "learning_rate": 4.3959047882218214e-05, "loss": 0.364, "step": 4452 }, { "epoch": 2.0706648070664806, "grad_norm": 2.996633291244507, "learning_rate": 4.390775950628696e-05, "loss": 0.3668, "step": 4454 }, { "epoch": 2.0715946071594606, "grad_norm": 3.578129529953003, "learning_rate": 4.385628451840275e-05, "loss": 0.5548, "step": 4456 }, { "epoch": 2.0725244072524407, "grad_norm": 3.160885810852051, "learning_rate": 4.380462342660296e-05, "loss": 0.5485, "step": 4458 }, { "epoch": 2.0734542073454207, "grad_norm": 3.3811440467834473, "learning_rate": 4.375277674076165e-05, "loss": 0.5001, "step": 4460 }, { "epoch": 2.074384007438401, "grad_norm": 3.2071585655212402, "learning_rate": 4.3700744972584725e-05, "loss": 0.497, "step": 4462 }, { "epoch": 2.075313807531381, "grad_norm": 3.356126546859741, "learning_rate": 4.364852863560472e-05, "loss": 0.4473, "step": 4464 }, { "epoch": 2.076243607624361, "grad_norm": 3.4141550064086914, "learning_rate": 4.35961282451758e-05, "loss": 0.5672, "step": 4466 }, { "epoch": 2.077173407717341, "grad_norm": 3.8376402854919434, "learning_rate": 4.3543544318468655e-05, "loss": 0.4963, "step": 4468 }, { "epoch": 2.078103207810321, "grad_norm": 3.5418097972869873, "learning_rate": 4.34907773744654e-05, "loss": 0.497, "step": 4470 }, { "epoch": 2.0790330079033006, "grad_norm": 3.0415894985198975, "learning_rate": 4.343782793395452e-05, "loss": 0.389, "step": 4472 }, { "epoch": 2.0799628079962806, "grad_norm": 3.9034175872802734, "learning_rate": 4.3384696519525565e-05, "loss": 0.526, "step": 4474 }, { "epoch": 2.0808926080892607, "grad_norm": 3.44889760017395, "learning_rate": 4.333138365556417e-05, "loss": 0.4348, "step": 4476 }, { "epoch": 2.0818224081822407, "grad_norm": 2.9464569091796875, "learning_rate": 4.327788986824676e-05, "loss": 0.4881, "step": 4478 }, { "epoch": 2.082752208275221, "grad_norm": 3.409226894378662, "learning_rate": 4.322421568553545e-05, "loss": 0.4779, "step": 4480 }, { "epoch": 2.083682008368201, "grad_norm": 3.1398966312408447, "learning_rate": 4.317036163717273e-05, "loss": 0.4328, "step": 4482 }, { "epoch": 2.084611808461181, "grad_norm": 3.466338872909546, "learning_rate": 4.311632825467632e-05, "loss": 0.5314, "step": 4484 }, { "epoch": 2.085541608554161, "grad_norm": 3.3399810791015625, "learning_rate": 4.3062116071333916e-05, "loss": 0.4475, "step": 4486 }, { "epoch": 2.086471408647141, "grad_norm": 3.7630138397216797, "learning_rate": 4.3007725622197836e-05, "loss": 0.554, "step": 4488 }, { "epoch": 2.087401208740121, "grad_norm": 2.9016480445861816, "learning_rate": 4.29531574440799e-05, "loss": 0.4332, "step": 4490 }, { "epoch": 2.088331008833101, "grad_norm": 2.9311156272888184, "learning_rate": 4.2898412075545956e-05, "loss": 0.4208, "step": 4492 }, { "epoch": 2.0892608089260807, "grad_norm": 3.3853461742401123, "learning_rate": 4.284349005691069e-05, "loss": 0.465, "step": 4494 }, { "epoch": 2.0901906090190607, "grad_norm": 3.6475749015808105, "learning_rate": 4.278839193023232e-05, "loss": 0.5655, "step": 4496 }, { "epoch": 2.091120409112041, "grad_norm": 3.58455491065979, "learning_rate": 4.2733118239307016e-05, "loss": 0.4394, "step": 4498 }, { "epoch": 2.092050209205021, "grad_norm": 3.8609395027160645, "learning_rate": 4.267766952966386e-05, "loss": 0.6016, "step": 4500 }, { "epoch": 2.092980009298001, "grad_norm": 4.074398994445801, "learning_rate": 4.2622046348559206e-05, "loss": 0.5008, "step": 4502 }, { "epoch": 2.093909809390981, "grad_norm": 3.4706835746765137, "learning_rate": 4.25662492449714e-05, "loss": 0.5391, "step": 4504 }, { "epoch": 2.094839609483961, "grad_norm": 3.181978702545166, "learning_rate": 4.251027876959533e-05, "loss": 0.5155, "step": 4506 }, { "epoch": 2.095769409576941, "grad_norm": 3.4297313690185547, "learning_rate": 4.2454135474836993e-05, "loss": 0.4329, "step": 4508 }, { "epoch": 2.096699209669921, "grad_norm": 3.065164804458618, "learning_rate": 4.2397819914808046e-05, "loss": 0.513, "step": 4510 }, { "epoch": 2.097629009762901, "grad_norm": 4.074069499969482, "learning_rate": 4.234133264532031e-05, "loss": 0.5128, "step": 4512 }, { "epoch": 2.098558809855881, "grad_norm": 3.4698221683502197, "learning_rate": 4.2284674223880356e-05, "loss": 0.4896, "step": 4514 }, { "epoch": 2.099488609948861, "grad_norm": 3.2786853313446045, "learning_rate": 4.22278452096839e-05, "loss": 0.4648, "step": 4516 }, { "epoch": 2.100418410041841, "grad_norm": 3.317847490310669, "learning_rate": 4.2170846163610396e-05, "loss": 0.4067, "step": 4518 }, { "epoch": 2.101348210134821, "grad_norm": 3.829220771789551, "learning_rate": 4.2113677648217415e-05, "loss": 0.4557, "step": 4520 }, { "epoch": 2.102278010227801, "grad_norm": 3.3619604110717773, "learning_rate": 4.205634022773509e-05, "loss": 0.4857, "step": 4522 }, { "epoch": 2.103207810320781, "grad_norm": 3.291917562484741, "learning_rate": 4.1998834468060684e-05, "loss": 0.4457, "step": 4524 }, { "epoch": 2.104137610413761, "grad_norm": 3.2016797065734863, "learning_rate": 4.1941160936752745e-05, "loss": 0.4153, "step": 4526 }, { "epoch": 2.105067410506741, "grad_norm": 3.455622911453247, "learning_rate": 4.188332020302578e-05, "loss": 0.5343, "step": 4528 }, { "epoch": 2.105997210599721, "grad_norm": 3.5476908683776855, "learning_rate": 4.1825312837744527e-05, "loss": 0.4833, "step": 4530 }, { "epoch": 2.106927010692701, "grad_norm": 3.4766697883605957, "learning_rate": 4.176713941341818e-05, "loss": 0.5048, "step": 4532 }, { "epoch": 2.107856810785681, "grad_norm": 3.1368134021759033, "learning_rate": 4.170880050419503e-05, "loss": 0.4502, "step": 4534 }, { "epoch": 2.1087866108786613, "grad_norm": 3.1872658729553223, "learning_rate": 4.1650296685856475e-05, "loss": 0.4051, "step": 4536 }, { "epoch": 2.1097164109716413, "grad_norm": 4.195072174072266, "learning_rate": 4.159162853581165e-05, "loss": 0.517, "step": 4538 }, { "epoch": 2.110646211064621, "grad_norm": 3.450924873352051, "learning_rate": 4.153279663309149e-05, "loss": 0.4433, "step": 4540 }, { "epoch": 2.111576011157601, "grad_norm": 4.187885284423828, "learning_rate": 4.1473801558343104e-05, "loss": 0.4482, "step": 4542 }, { "epoch": 2.112505811250581, "grad_norm": 3.5478456020355225, "learning_rate": 4.14146438938241e-05, "loss": 0.4126, "step": 4544 }, { "epoch": 2.113435611343561, "grad_norm": 2.970770835876465, "learning_rate": 4.135532422339672e-05, "loss": 0.3947, "step": 4546 }, { "epoch": 2.114365411436541, "grad_norm": 4.102116107940674, "learning_rate": 4.129584313252216e-05, "loss": 0.5594, "step": 4548 }, { "epoch": 2.115295211529521, "grad_norm": 3.845499277114868, "learning_rate": 4.123620120825477e-05, "loss": 0.3989, "step": 4550 }, { "epoch": 2.116225011622501, "grad_norm": 3.9049830436706543, "learning_rate": 4.11763990392363e-05, "loss": 0.4506, "step": 4552 }, { "epoch": 2.1171548117154813, "grad_norm": 3.854703664779663, "learning_rate": 4.111643721568998e-05, "loss": 0.5202, "step": 4554 }, { "epoch": 2.1180846118084613, "grad_norm": 4.243005275726318, "learning_rate": 4.105631632941482e-05, "loss": 0.4584, "step": 4556 }, { "epoch": 2.1190144119014414, "grad_norm": 3.7034356594085693, "learning_rate": 4.099603697377967e-05, "loss": 0.4881, "step": 4558 }, { "epoch": 2.1199442119944214, "grad_norm": 3.4927854537963867, "learning_rate": 4.0935599743717444e-05, "loss": 0.4743, "step": 4560 }, { "epoch": 2.120874012087401, "grad_norm": 3.3612704277038574, "learning_rate": 4.087500523571924e-05, "loss": 0.457, "step": 4562 }, { "epoch": 2.121803812180381, "grad_norm": 3.3981826305389404, "learning_rate": 4.081425404782831e-05, "loss": 0.4402, "step": 4564 }, { "epoch": 2.122733612273361, "grad_norm": 3.908897876739502, "learning_rate": 4.0753346779634414e-05, "loss": 0.532, "step": 4566 }, { "epoch": 2.123663412366341, "grad_norm": 4.522519111633301, "learning_rate": 4.069228403226772e-05, "loss": 0.52, "step": 4568 }, { "epoch": 2.124593212459321, "grad_norm": 3.134903907775879, "learning_rate": 4.063106640839283e-05, "loss": 0.3265, "step": 4570 }, { "epoch": 2.1255230125523012, "grad_norm": 3.3281333446502686, "learning_rate": 4.0569694512203014e-05, "loss": 0.462, "step": 4572 }, { "epoch": 2.1264528126452813, "grad_norm": 3.062217950820923, "learning_rate": 4.0508168949414116e-05, "loss": 0.4518, "step": 4574 }, { "epoch": 2.1273826127382613, "grad_norm": 3.5704517364501953, "learning_rate": 4.0446490327258564e-05, "loss": 0.4159, "step": 4576 }, { "epoch": 2.1283124128312414, "grad_norm": 3.2354671955108643, "learning_rate": 4.038465925447951e-05, "loss": 0.475, "step": 4578 }, { "epoch": 2.1292422129242214, "grad_norm": 3.5049145221710205, "learning_rate": 4.032267634132462e-05, "loss": 0.5028, "step": 4580 }, { "epoch": 2.1301720130172015, "grad_norm": 3.5024209022521973, "learning_rate": 4.0260542199540275e-05, "loss": 0.443, "step": 4582 }, { "epoch": 2.131101813110181, "grad_norm": 2.554466962814331, "learning_rate": 4.019825744236535e-05, "loss": 0.4442, "step": 4584 }, { "epoch": 2.132031613203161, "grad_norm": 2.722719192504883, "learning_rate": 4.013582268452525e-05, "loss": 0.3585, "step": 4586 }, { "epoch": 2.132961413296141, "grad_norm": 3.3709120750427246, "learning_rate": 4.0073238542225815e-05, "loss": 0.5045, "step": 4588 }, { "epoch": 2.1338912133891212, "grad_norm": 2.9327988624572754, "learning_rate": 4.00105056331473e-05, "loss": 0.4064, "step": 4590 }, { "epoch": 2.1348210134821013, "grad_norm": 3.275188446044922, "learning_rate": 3.994762457643817e-05, "loss": 0.3699, "step": 4592 }, { "epoch": 2.1357508135750813, "grad_norm": 4.299485206604004, "learning_rate": 3.988459599270906e-05, "loss": 0.5072, "step": 4594 }, { "epoch": 2.1366806136680614, "grad_norm": 3.1881790161132812, "learning_rate": 3.9821420504026684e-05, "loss": 0.4312, "step": 4596 }, { "epoch": 2.1376104137610414, "grad_norm": 3.128575325012207, "learning_rate": 3.975809873390756e-05, "loss": 0.4089, "step": 4598 }, { "epoch": 2.1385402138540215, "grad_norm": 3.3112823963165283, "learning_rate": 3.9694631307312026e-05, "loss": 0.5185, "step": 4600 }, { "epoch": 2.1394700139470015, "grad_norm": 3.8935046195983887, "learning_rate": 3.9631018850637957e-05, "loss": 0.537, "step": 4602 }, { "epoch": 2.1403998140399816, "grad_norm": 3.187584161758423, "learning_rate": 3.956726199171459e-05, "loss": 0.374, "step": 4604 }, { "epoch": 2.141329614132961, "grad_norm": 3.1182169914245605, "learning_rate": 3.950336135979644e-05, "loss": 0.4809, "step": 4606 }, { "epoch": 2.1422594142259412, "grad_norm": 3.3914315700531006, "learning_rate": 3.9439317585556885e-05, "loss": 0.3542, "step": 4608 }, { "epoch": 2.1431892143189213, "grad_norm": 5.287722110748291, "learning_rate": 3.937513130108216e-05, "loss": 0.4937, "step": 4610 }, { "epoch": 2.1441190144119013, "grad_norm": 2.5850844383239746, "learning_rate": 3.931080313986497e-05, "loss": 0.4036, "step": 4612 }, { "epoch": 2.1450488145048814, "grad_norm": 3.629363775253296, "learning_rate": 3.924633373679828e-05, "loss": 0.447, "step": 4614 }, { "epoch": 2.1459786145978614, "grad_norm": 2.64500093460083, "learning_rate": 3.91817237281691e-05, "loss": 0.4227, "step": 4616 }, { "epoch": 2.1469084146908415, "grad_norm": 2.690601348876953, "learning_rate": 3.9116973751652116e-05, "loss": 0.4056, "step": 4618 }, { "epoch": 2.1478382147838215, "grad_norm": 3.1267738342285156, "learning_rate": 3.905208444630346e-05, "loss": 0.3595, "step": 4620 }, { "epoch": 2.1487680148768016, "grad_norm": 2.8560853004455566, "learning_rate": 3.8987056452554374e-05, "loss": 0.3889, "step": 4622 }, { "epoch": 2.1496978149697816, "grad_norm": 3.4054551124572754, "learning_rate": 3.8921890412204874e-05, "loss": 0.4394, "step": 4624 }, { "epoch": 2.1506276150627617, "grad_norm": 3.544426441192627, "learning_rate": 3.885658696841753e-05, "loss": 0.4823, "step": 4626 }, { "epoch": 2.1515574151557413, "grad_norm": 3.591400146484375, "learning_rate": 3.8791146765710935e-05, "loss": 0.4411, "step": 4628 }, { "epoch": 2.1524872152487213, "grad_norm": 3.053438425064087, "learning_rate": 3.872557044995348e-05, "loss": 0.4381, "step": 4630 }, { "epoch": 2.1534170153417014, "grad_norm": 3.114530563354492, "learning_rate": 3.865985866835691e-05, "loss": 0.4227, "step": 4632 }, { "epoch": 2.1543468154346814, "grad_norm": 3.290297508239746, "learning_rate": 3.859401206947001e-05, "loss": 0.4545, "step": 4634 }, { "epoch": 2.1552766155276615, "grad_norm": 3.662348747253418, "learning_rate": 3.8528031303172085e-05, "loss": 0.4714, "step": 4636 }, { "epoch": 2.1562064156206415, "grad_norm": 3.2660603523254395, "learning_rate": 3.846191702066668e-05, "loss": 0.4249, "step": 4638 }, { "epoch": 2.1571362157136216, "grad_norm": 3.5876352787017822, "learning_rate": 3.839566987447512e-05, "loss": 0.5214, "step": 4640 }, { "epoch": 2.1580660158066016, "grad_norm": 2.9179069995880127, "learning_rate": 3.83292905184299e-05, "loss": 0.3945, "step": 4642 }, { "epoch": 2.1589958158995817, "grad_norm": 2.8682708740234375, "learning_rate": 3.8262779607668556e-05, "loss": 0.4341, "step": 4644 }, { "epoch": 2.1599256159925617, "grad_norm": 4.1055827140808105, "learning_rate": 3.819613779862685e-05, "loss": 0.5276, "step": 4646 }, { "epoch": 2.1608554160855418, "grad_norm": 3.4899938106536865, "learning_rate": 3.812936574903256e-05, "loss": 0.4437, "step": 4648 }, { "epoch": 2.161785216178522, "grad_norm": 3.581888198852539, "learning_rate": 3.8062464117898914e-05, "loss": 0.421, "step": 4650 }, { "epoch": 2.1627150162715014, "grad_norm": 3.580599308013916, "learning_rate": 3.799543356551792e-05, "loss": 0.4134, "step": 4652 }, { "epoch": 2.1636448163644815, "grad_norm": 3.5085740089416504, "learning_rate": 3.792827475345412e-05, "loss": 0.4292, "step": 4654 }, { "epoch": 2.1645746164574615, "grad_norm": 2.813819169998169, "learning_rate": 3.786098834453786e-05, "loss": 0.4364, "step": 4656 }, { "epoch": 2.1655044165504416, "grad_norm": 3.755056142807007, "learning_rate": 3.779357500285882e-05, "loss": 0.406, "step": 4658 }, { "epoch": 2.1664342166434216, "grad_norm": 3.5387766361236572, "learning_rate": 3.772603539375946e-05, "loss": 0.4266, "step": 4660 }, { "epoch": 2.1673640167364017, "grad_norm": 2.883554697036743, "learning_rate": 3.765837018382849e-05, "loss": 0.3575, "step": 4662 }, { "epoch": 2.1682938168293817, "grad_norm": 2.762657880783081, "learning_rate": 3.75905800408942e-05, "loss": 0.3713, "step": 4664 }, { "epoch": 2.1692236169223618, "grad_norm": 3.360987663269043, "learning_rate": 3.7522665634017934e-05, "loss": 0.4475, "step": 4666 }, { "epoch": 2.170153417015342, "grad_norm": 2.9941458702087402, "learning_rate": 3.745462763348746e-05, "loss": 0.4147, "step": 4668 }, { "epoch": 2.171083217108322, "grad_norm": 3.018435478210449, "learning_rate": 3.7386466710810356e-05, "loss": 0.3281, "step": 4670 }, { "epoch": 2.172013017201302, "grad_norm": 2.885315418243408, "learning_rate": 3.7318183538707464e-05, "loss": 0.3882, "step": 4672 }, { "epoch": 2.172942817294282, "grad_norm": 3.438981771469116, "learning_rate": 3.7249778791106096e-05, "loss": 0.3897, "step": 4674 }, { "epoch": 2.1738726173872616, "grad_norm": 3.1209867000579834, "learning_rate": 3.718125314313347e-05, "loss": 0.4216, "step": 4676 }, { "epoch": 2.1748024174802416, "grad_norm": 3.6221578121185303, "learning_rate": 3.7112607271110134e-05, "loss": 0.3757, "step": 4678 }, { "epoch": 2.1757322175732217, "grad_norm": 3.9098801612854004, "learning_rate": 3.704384185254306e-05, "loss": 0.4317, "step": 4680 }, { "epoch": 2.1766620176662017, "grad_norm": 3.682194471359253, "learning_rate": 3.69749575661192e-05, "loss": 0.3174, "step": 4682 }, { "epoch": 2.1775918177591818, "grad_norm": 2.7175605297088623, "learning_rate": 3.6905955091698654e-05, "loss": 0.3244, "step": 4684 }, { "epoch": 2.178521617852162, "grad_norm": 3.173694372177124, "learning_rate": 3.6836835110307964e-05, "loss": 0.4031, "step": 4686 }, { "epoch": 2.179451417945142, "grad_norm": 3.055032968521118, "learning_rate": 3.67675983041335e-05, "loss": 0.4198, "step": 4688 }, { "epoch": 2.180381218038122, "grad_norm": 3.2539501190185547, "learning_rate": 3.6698245356514504e-05, "loss": 0.3971, "step": 4690 }, { "epoch": 2.181311018131102, "grad_norm": 3.490469217300415, "learning_rate": 3.6628776951936634e-05, "loss": 0.3903, "step": 4692 }, { "epoch": 2.182240818224082, "grad_norm": 4.090719699859619, "learning_rate": 3.655919377602498e-05, "loss": 0.5276, "step": 4694 }, { "epoch": 2.183170618317062, "grad_norm": 2.7410433292388916, "learning_rate": 3.6489496515537367e-05, "loss": 0.3781, "step": 4696 }, { "epoch": 2.1841004184100417, "grad_norm": 3.2568719387054443, "learning_rate": 3.641968585835766e-05, "loss": 0.4295, "step": 4698 }, { "epoch": 2.1850302185030217, "grad_norm": 2.9220166206359863, "learning_rate": 3.634976249348885e-05, "loss": 0.4493, "step": 4700 }, { "epoch": 2.1859600185960018, "grad_norm": 3.7855935096740723, "learning_rate": 3.62797271110463e-05, "loss": 0.5017, "step": 4702 }, { "epoch": 2.186889818688982, "grad_norm": 3.0728046894073486, "learning_rate": 3.6209580402250964e-05, "loss": 0.4268, "step": 4704 }, { "epoch": 2.187819618781962, "grad_norm": 3.523120641708374, "learning_rate": 3.6139323059422584e-05, "loss": 0.4908, "step": 4706 }, { "epoch": 2.188749418874942, "grad_norm": 4.081243991851807, "learning_rate": 3.60689557759727e-05, "loss": 0.4646, "step": 4708 }, { "epoch": 2.189679218967922, "grad_norm": 3.2554826736450195, "learning_rate": 3.5998479246398036e-05, "loss": 0.3731, "step": 4710 }, { "epoch": 2.190609019060902, "grad_norm": 3.0235683917999268, "learning_rate": 3.5927894166273485e-05, "loss": 0.4243, "step": 4712 }, { "epoch": 2.191538819153882, "grad_norm": 3.88913893699646, "learning_rate": 3.585720123224526e-05, "loss": 0.497, "step": 4714 }, { "epoch": 2.192468619246862, "grad_norm": 3.110145330429077, "learning_rate": 3.5786401142024145e-05, "loss": 0.4157, "step": 4716 }, { "epoch": 2.193398419339842, "grad_norm": 2.457838535308838, "learning_rate": 3.571549459437837e-05, "loss": 0.3357, "step": 4718 }, { "epoch": 2.1943282194328217, "grad_norm": 3.354752540588379, "learning_rate": 3.564448228912696e-05, "loss": 0.4116, "step": 4720 }, { "epoch": 2.195258019525802, "grad_norm": 3.4623775482177734, "learning_rate": 3.557336492713275e-05, "loss": 0.4874, "step": 4722 }, { "epoch": 2.196187819618782, "grad_norm": 3.513793468475342, "learning_rate": 3.550214321029531e-05, "loss": 0.4938, "step": 4724 }, { "epoch": 2.197117619711762, "grad_norm": 2.9224631786346436, "learning_rate": 3.5430817841544295e-05, "loss": 0.398, "step": 4726 }, { "epoch": 2.198047419804742, "grad_norm": 2.7186036109924316, "learning_rate": 3.535938952483227e-05, "loss": 0.3798, "step": 4728 }, { "epoch": 2.198977219897722, "grad_norm": 2.3600053787231445, "learning_rate": 3.528785896512789e-05, "loss": 0.4309, "step": 4730 }, { "epoch": 2.199907019990702, "grad_norm": 2.852210521697998, "learning_rate": 3.52162268684089e-05, "loss": 0.3466, "step": 4732 }, { "epoch": 2.200836820083682, "grad_norm": 3.1389923095703125, "learning_rate": 3.5144493941655157e-05, "loss": 0.2885, "step": 4734 }, { "epoch": 2.201766620176662, "grad_norm": 3.9322664737701416, "learning_rate": 3.507266089284173e-05, "loss": 0.4057, "step": 4736 }, { "epoch": 2.202696420269642, "grad_norm": 3.1747078895568848, "learning_rate": 3.500072843093178e-05, "loss": 0.3681, "step": 4738 }, { "epoch": 2.2036262203626222, "grad_norm": 3.8876752853393555, "learning_rate": 3.492869726586968e-05, "loss": 0.4823, "step": 4740 }, { "epoch": 2.204556020455602, "grad_norm": 3.215681552886963, "learning_rate": 3.485656810857392e-05, "loss": 0.3745, "step": 4742 }, { "epoch": 2.205485820548582, "grad_norm": 3.1267731189727783, "learning_rate": 3.478434167093022e-05, "loss": 0.4062, "step": 4744 }, { "epoch": 2.206415620641562, "grad_norm": 3.181790590286255, "learning_rate": 3.471201866578432e-05, "loss": 0.4192, "step": 4746 }, { "epoch": 2.207345420734542, "grad_norm": 3.1657087802886963, "learning_rate": 3.463959980693506e-05, "loss": 0.3528, "step": 4748 }, { "epoch": 2.208275220827522, "grad_norm": 3.8338868618011475, "learning_rate": 3.456708580912742e-05, "loss": 0.4405, "step": 4750 }, { "epoch": 2.209205020920502, "grad_norm": 4.129052639007568, "learning_rate": 3.449447738804519e-05, "loss": 0.5197, "step": 4752 }, { "epoch": 2.210134821013482, "grad_norm": 2.8206214904785156, "learning_rate": 3.442177526030425e-05, "loss": 0.3852, "step": 4754 }, { "epoch": 2.211064621106462, "grad_norm": 3.0567996501922607, "learning_rate": 3.4348980143445177e-05, "loss": 0.4405, "step": 4756 }, { "epoch": 2.2119944211994422, "grad_norm": 2.755260944366455, "learning_rate": 3.427609275592643e-05, "loss": 0.3127, "step": 4758 }, { "epoch": 2.2129242212924223, "grad_norm": 2.726912021636963, "learning_rate": 3.420311381711713e-05, "loss": 0.3874, "step": 4760 }, { "epoch": 2.2138540213854023, "grad_norm": 3.668109893798828, "learning_rate": 3.4130044047289864e-05, "loss": 0.4755, "step": 4762 }, { "epoch": 2.214783821478382, "grad_norm": 2.8139522075653076, "learning_rate": 3.405688416761381e-05, "loss": 0.358, "step": 4764 }, { "epoch": 2.215713621571362, "grad_norm": 3.027250051498413, "learning_rate": 3.398363490014744e-05, "loss": 0.3, "step": 4766 }, { "epoch": 2.216643421664342, "grad_norm": 2.930248498916626, "learning_rate": 3.391029696783143e-05, "loss": 0.3417, "step": 4768 }, { "epoch": 2.217573221757322, "grad_norm": 3.735938310623169, "learning_rate": 3.383687109448159e-05, "loss": 0.4262, "step": 4770 }, { "epoch": 2.218503021850302, "grad_norm": 2.584505558013916, "learning_rate": 3.376335800478163e-05, "loss": 0.3855, "step": 4772 }, { "epoch": 2.219432821943282, "grad_norm": 3.2462093830108643, "learning_rate": 3.368975842427608e-05, "loss": 0.4062, "step": 4774 }, { "epoch": 2.2203626220362622, "grad_norm": 3.987848997116089, "learning_rate": 3.361607307936308e-05, "loss": 0.3555, "step": 4776 }, { "epoch": 2.2212924221292423, "grad_norm": 3.6213572025299072, "learning_rate": 3.3542302697287244e-05, "loss": 0.4195, "step": 4778 }, { "epoch": 2.2222222222222223, "grad_norm": 3.4999287128448486, "learning_rate": 3.346844800613243e-05, "loss": 0.4005, "step": 4780 }, { "epoch": 2.2231520223152024, "grad_norm": 3.666363000869751, "learning_rate": 3.339450973481466e-05, "loss": 0.4276, "step": 4782 }, { "epoch": 2.2240818224081824, "grad_norm": 3.8171730041503906, "learning_rate": 3.332048861307481e-05, "loss": 0.4963, "step": 4784 }, { "epoch": 2.225011622501162, "grad_norm": 3.7435479164123535, "learning_rate": 3.3246385371471455e-05, "loss": 0.3893, "step": 4786 }, { "epoch": 2.225941422594142, "grad_norm": 2.1317644119262695, "learning_rate": 3.317220074137371e-05, "loss": 0.3377, "step": 4788 }, { "epoch": 2.226871222687122, "grad_norm": 2.90415358543396, "learning_rate": 3.3097935454953874e-05, "loss": 0.4042, "step": 4790 }, { "epoch": 2.227801022780102, "grad_norm": 3.117114782333374, "learning_rate": 3.3023590245180354e-05, "loss": 0.4253, "step": 4792 }, { "epoch": 2.228730822873082, "grad_norm": 3.164691209793091, "learning_rate": 3.2949165845810425e-05, "loss": 0.3944, "step": 4794 }, { "epoch": 2.2296606229660623, "grad_norm": 2.970520257949829, "learning_rate": 3.287466299138275e-05, "loss": 0.3176, "step": 4796 }, { "epoch": 2.2305904230590423, "grad_norm": 3.267120838165283, "learning_rate": 3.2800082417210525e-05, "loss": 0.4052, "step": 4798 }, { "epoch": 2.2315202231520224, "grad_norm": 3.9235310554504395, "learning_rate": 3.272542485937382e-05, "loss": 0.3266, "step": 4800 }, { "epoch": 2.2324500232450024, "grad_norm": 3.201227903366089, "learning_rate": 3.265069105471266e-05, "loss": 0.4139, "step": 4802 }, { "epoch": 2.2333798233379825, "grad_norm": 2.6175377368927, "learning_rate": 3.2575881740819504e-05, "loss": 0.3478, "step": 4804 }, { "epoch": 2.2343096234309625, "grad_norm": 3.325690269470215, "learning_rate": 3.2500997656032043e-05, "loss": 0.3321, "step": 4806 }, { "epoch": 2.2352394235239426, "grad_norm": 3.4481008052825928, "learning_rate": 3.242603953942602e-05, "loss": 0.3735, "step": 4808 }, { "epoch": 2.236169223616922, "grad_norm": 3.272484302520752, "learning_rate": 3.235100813080775e-05, "loss": 0.4279, "step": 4810 }, { "epoch": 2.237099023709902, "grad_norm": 3.170534610748291, "learning_rate": 3.227590417070695e-05, "loss": 0.4109, "step": 4812 }, { "epoch": 2.2380288238028823, "grad_norm": 3.064009428024292, "learning_rate": 3.220072840036937e-05, "loss": 0.2966, "step": 4814 }, { "epoch": 2.2389586238958623, "grad_norm": 3.15728497505188, "learning_rate": 3.2125481561749544e-05, "loss": 0.4073, "step": 4816 }, { "epoch": 2.2398884239888424, "grad_norm": 3.6197547912597656, "learning_rate": 3.205016439750339e-05, "loss": 0.3981, "step": 4818 }, { "epoch": 2.2408182240818224, "grad_norm": 2.808335065841675, "learning_rate": 3.197477765098089e-05, "loss": 0.321, "step": 4820 }, { "epoch": 2.2417480241748025, "grad_norm": 3.6416983604431152, "learning_rate": 3.189932206621881e-05, "loss": 0.4528, "step": 4822 }, { "epoch": 2.2426778242677825, "grad_norm": 3.6418683528900146, "learning_rate": 3.1823798387933276e-05, "loss": 0.3793, "step": 4824 }, { "epoch": 2.2436076243607626, "grad_norm": 2.747840166091919, "learning_rate": 3.174820736151258e-05, "loss": 0.3823, "step": 4826 }, { "epoch": 2.2445374244537426, "grad_norm": 3.4788942337036133, "learning_rate": 3.167254973300954e-05, "loss": 0.4748, "step": 4828 }, { "epoch": 2.2454672245467227, "grad_norm": 3.1621689796447754, "learning_rate": 3.159682624913445e-05, "loss": 0.3927, "step": 4830 }, { "epoch": 2.2463970246397027, "grad_norm": 3.0874462127685547, "learning_rate": 3.1521037657247585e-05, "loss": 0.4856, "step": 4832 }, { "epoch": 2.2473268247326823, "grad_norm": 3.7450037002563477, "learning_rate": 3.144518470535164e-05, "loss": 0.3978, "step": 4834 }, { "epoch": 2.2482566248256624, "grad_norm": 3.4451944828033447, "learning_rate": 3.1369268142084705e-05, "loss": 0.32, "step": 4836 }, { "epoch": 2.2491864249186424, "grad_norm": 3.5200023651123047, "learning_rate": 3.129328871671258e-05, "loss": 0.4439, "step": 4838 }, { "epoch": 2.2501162250116225, "grad_norm": 3.825927734375, "learning_rate": 3.1217247179121514e-05, "loss": 0.4011, "step": 4840 }, { "epoch": 2.2510460251046025, "grad_norm": 3.097801446914673, "learning_rate": 3.114114427981083e-05, "loss": 0.311, "step": 4842 }, { "epoch": 2.2519758251975825, "grad_norm": 3.7702646255493164, "learning_rate": 3.1064980769885336e-05, "loss": 0.397, "step": 4844 }, { "epoch": 2.2529056252905626, "grad_norm": 2.899056911468506, "learning_rate": 3.0988757401048206e-05, "loss": 0.4202, "step": 4846 }, { "epoch": 2.2538354253835426, "grad_norm": 2.8947277069091797, "learning_rate": 3.0912474925593276e-05, "loss": 0.366, "step": 4848 }, { "epoch": 2.2547652254765227, "grad_norm": 2.90264892578125, "learning_rate": 3.0836134096397805e-05, "loss": 0.31, "step": 4850 }, { "epoch": 2.2556950255695027, "grad_norm": 3.1452574729919434, "learning_rate": 3.075973566691492e-05, "loss": 0.4085, "step": 4852 }, { "epoch": 2.256624825662483, "grad_norm": 3.372601270675659, "learning_rate": 3.068328039116632e-05, "loss": 0.3968, "step": 4854 }, { "epoch": 2.2575546257554624, "grad_norm": 3.002793312072754, "learning_rate": 3.060676902373469e-05, "loss": 0.483, "step": 4856 }, { "epoch": 2.2584844258484424, "grad_norm": 2.889601469039917, "learning_rate": 3.053020231975632e-05, "loss": 0.3561, "step": 4858 }, { "epoch": 2.2594142259414225, "grad_norm": 2.8357739448547363, "learning_rate": 3.045358103491373e-05, "loss": 0.4387, "step": 4860 }, { "epoch": 2.2603440260344025, "grad_norm": 2.930962562561035, "learning_rate": 3.0376905925427982e-05, "loss": 0.3828, "step": 4862 }, { "epoch": 2.2612738261273826, "grad_norm": 4.000149726867676, "learning_rate": 3.0300177748051515e-05, "loss": 0.3865, "step": 4864 }, { "epoch": 2.2622036262203626, "grad_norm": 2.7851078510284424, "learning_rate": 3.0223397260060448e-05, "loss": 0.3309, "step": 4866 }, { "epoch": 2.2631334263133427, "grad_norm": 3.043165683746338, "learning_rate": 3.0146565219247175e-05, "loss": 0.3747, "step": 4868 }, { "epoch": 2.2640632264063227, "grad_norm": 3.4928159713745117, "learning_rate": 3.0069682383912976e-05, "loss": 0.3934, "step": 4870 }, { "epoch": 2.264993026499303, "grad_norm": 2.7139620780944824, "learning_rate": 2.9992749512860322e-05, "loss": 0.3832, "step": 4872 }, { "epoch": 2.265922826592283, "grad_norm": 3.410310983657837, "learning_rate": 2.9915767365385645e-05, "loss": 0.4561, "step": 4874 }, { "epoch": 2.266852626685263, "grad_norm": 2.5056729316711426, "learning_rate": 2.983873670127166e-05, "loss": 0.2709, "step": 4876 }, { "epoch": 2.2677824267782425, "grad_norm": 2.6230432987213135, "learning_rate": 2.976165828077988e-05, "loss": 0.3719, "step": 4878 }, { "epoch": 2.2687122268712225, "grad_norm": 3.328594207763672, "learning_rate": 2.9684532864643258e-05, "loss": 0.3777, "step": 4880 }, { "epoch": 2.2696420269642026, "grad_norm": 3.290447950363159, "learning_rate": 2.960736121405848e-05, "loss": 0.4179, "step": 4882 }, { "epoch": 2.2705718270571826, "grad_norm": 4.244988918304443, "learning_rate": 2.9530144090678584e-05, "loss": 0.4426, "step": 4884 }, { "epoch": 2.2715016271501627, "grad_norm": 3.554015874862671, "learning_rate": 2.945288225660539e-05, "loss": 0.3813, "step": 4886 }, { "epoch": 2.2724314272431427, "grad_norm": 2.82839298248291, "learning_rate": 2.9375576474382044e-05, "loss": 0.3669, "step": 4888 }, { "epoch": 2.273361227336123, "grad_norm": 2.547863483428955, "learning_rate": 2.9298227506985385e-05, "loss": 0.3186, "step": 4890 }, { "epoch": 2.274291027429103, "grad_norm": 2.9217751026153564, "learning_rate": 2.92208361178185e-05, "loss": 0.3516, "step": 4892 }, { "epoch": 2.275220827522083, "grad_norm": 3.3168442249298096, "learning_rate": 2.9143403070703152e-05, "loss": 0.3809, "step": 4894 }, { "epoch": 2.276150627615063, "grad_norm": 3.7803611755371094, "learning_rate": 2.9065929129872236e-05, "loss": 0.4798, "step": 4896 }, { "epoch": 2.277080427708043, "grad_norm": 2.2040297985076904, "learning_rate": 2.898841505996233e-05, "loss": 0.2901, "step": 4898 }, { "epoch": 2.2780102278010226, "grad_norm": 2.7852139472961426, "learning_rate": 2.891086162600593e-05, "loss": 0.3326, "step": 4900 }, { "epoch": 2.2789400278940026, "grad_norm": 3.0842032432556152, "learning_rate": 2.8833269593424153e-05, "loss": 0.3668, "step": 4902 }, { "epoch": 2.2798698279869827, "grad_norm": 3.155099391937256, "learning_rate": 2.8755639728019092e-05, "loss": 0.4256, "step": 4904 }, { "epoch": 2.2807996280799627, "grad_norm": 3.10101056098938, "learning_rate": 2.8677972795966082e-05, "loss": 0.3319, "step": 4906 }, { "epoch": 2.2817294281729428, "grad_norm": 2.576056718826294, "learning_rate": 2.8600269563806454e-05, "loss": 0.3869, "step": 4908 }, { "epoch": 2.282659228265923, "grad_norm": 2.688131809234619, "learning_rate": 2.8522530798439723e-05, "loss": 0.3492, "step": 4910 }, { "epoch": 2.283589028358903, "grad_norm": 2.6909687519073486, "learning_rate": 2.8444757267116087e-05, "loss": 0.2833, "step": 4912 }, { "epoch": 2.284518828451883, "grad_norm": 3.263435125350952, "learning_rate": 2.836694973742898e-05, "loss": 0.412, "step": 4914 }, { "epoch": 2.285448628544863, "grad_norm": 2.6507022380828857, "learning_rate": 2.8289108977307203e-05, "loss": 0.366, "step": 4916 }, { "epoch": 2.286378428637843, "grad_norm": 2.710958480834961, "learning_rate": 2.8211235755007718e-05, "loss": 0.3763, "step": 4918 }, { "epoch": 2.287308228730823, "grad_norm": 2.6443233489990234, "learning_rate": 2.8133330839107754e-05, "loss": 0.3213, "step": 4920 }, { "epoch": 2.2882380288238027, "grad_norm": 3.28083872795105, "learning_rate": 2.805539499849739e-05, "loss": 0.4599, "step": 4922 }, { "epoch": 2.2891678289167827, "grad_norm": 2.811690330505371, "learning_rate": 2.7977429002371886e-05, "loss": 0.3538, "step": 4924 }, { "epoch": 2.2900976290097628, "grad_norm": 2.3816046714782715, "learning_rate": 2.789943362022418e-05, "loss": 0.3033, "step": 4926 }, { "epoch": 2.291027429102743, "grad_norm": 3.128115177154541, "learning_rate": 2.7821409621837192e-05, "loss": 0.3471, "step": 4928 }, { "epoch": 2.291957229195723, "grad_norm": 2.6351373195648193, "learning_rate": 2.7743357777276262e-05, "loss": 0.366, "step": 4930 }, { "epoch": 2.292887029288703, "grad_norm": 2.5909485816955566, "learning_rate": 2.766527885688165e-05, "loss": 0.3063, "step": 4932 }, { "epoch": 2.293816829381683, "grad_norm": 2.83809232711792, "learning_rate": 2.7587173631260698e-05, "loss": 0.3153, "step": 4934 }, { "epoch": 2.294746629474663, "grad_norm": 2.7558257579803467, "learning_rate": 2.7509042871280518e-05, "loss": 0.3344, "step": 4936 }, { "epoch": 2.295676429567643, "grad_norm": 2.9928059577941895, "learning_rate": 2.7430887348060132e-05, "loss": 0.2663, "step": 4938 }, { "epoch": 2.296606229660623, "grad_norm": 2.5302348136901855, "learning_rate": 2.735270783296299e-05, "loss": 0.4071, "step": 4940 }, { "epoch": 2.297536029753603, "grad_norm": 2.727423667907715, "learning_rate": 2.727450509758941e-05, "loss": 0.2968, "step": 4942 }, { "epoch": 2.2984658298465828, "grad_norm": 3.026275396347046, "learning_rate": 2.7196279913768723e-05, "loss": 0.349, "step": 4944 }, { "epoch": 2.299395629939563, "grad_norm": 3.0753860473632812, "learning_rate": 2.711803305355198e-05, "loss": 0.347, "step": 4946 }, { "epoch": 2.300325430032543, "grad_norm": 3.480992555618286, "learning_rate": 2.703976528920409e-05, "loss": 0.4891, "step": 4948 }, { "epoch": 2.301255230125523, "grad_norm": 3.328375816345215, "learning_rate": 2.6961477393196255e-05, "loss": 0.3152, "step": 4950 }, { "epoch": 2.302185030218503, "grad_norm": 2.75244140625, "learning_rate": 2.688317013819846e-05, "loss": 0.3882, "step": 4952 }, { "epoch": 2.303114830311483, "grad_norm": 3.0123674869537354, "learning_rate": 2.680484429707166e-05, "loss": 0.4143, "step": 4954 }, { "epoch": 2.304044630404463, "grad_norm": 2.2477266788482666, "learning_rate": 2.6726500642860296e-05, "loss": 0.3014, "step": 4956 }, { "epoch": 2.304974430497443, "grad_norm": 2.9288997650146484, "learning_rate": 2.66481399487846e-05, "loss": 0.4005, "step": 4958 }, { "epoch": 2.305904230590423, "grad_norm": 3.2187435626983643, "learning_rate": 2.656976298823297e-05, "loss": 0.3311, "step": 4960 }, { "epoch": 2.306834030683403, "grad_norm": 2.964693546295166, "learning_rate": 2.649137053475441e-05, "loss": 0.3713, "step": 4962 }, { "epoch": 2.3077638307763833, "grad_norm": 3.5222795009613037, "learning_rate": 2.641296336205076e-05, "loss": 0.379, "step": 4964 }, { "epoch": 2.308693630869363, "grad_norm": 3.529493570327759, "learning_rate": 2.6334542243969167e-05, "loss": 0.3258, "step": 4966 }, { "epoch": 2.309623430962343, "grad_norm": 2.7780206203460693, "learning_rate": 2.6256107954494374e-05, "loss": 0.3802, "step": 4968 }, { "epoch": 2.310553231055323, "grad_norm": 3.5887792110443115, "learning_rate": 2.617766126774123e-05, "loss": 0.5123, "step": 4970 }, { "epoch": 2.311483031148303, "grad_norm": 3.184898614883423, "learning_rate": 2.6099202957946766e-05, "loss": 0.3917, "step": 4972 }, { "epoch": 2.312412831241283, "grad_norm": 2.908425807952881, "learning_rate": 2.6020733799462876e-05, "loss": 0.2894, "step": 4974 }, { "epoch": 2.313342631334263, "grad_norm": 3.2958133220672607, "learning_rate": 2.594225456674852e-05, "loss": 0.3729, "step": 4976 }, { "epoch": 2.314272431427243, "grad_norm": 3.413919687271118, "learning_rate": 2.5863766034361946e-05, "loss": 0.4552, "step": 4978 }, { "epoch": 2.315202231520223, "grad_norm": 2.6663718223571777, "learning_rate": 2.5785268976953372e-05, "loss": 0.3736, "step": 4980 }, { "epoch": 2.3161320316132032, "grad_norm": 2.7767720222473145, "learning_rate": 2.570676416925698e-05, "loss": 0.3729, "step": 4982 }, { "epoch": 2.3170618317061833, "grad_norm": 3.0108439922332764, "learning_rate": 2.5628252386083566e-05, "loss": 0.2999, "step": 4984 }, { "epoch": 2.3179916317991633, "grad_norm": 2.9982855319976807, "learning_rate": 2.5549734402312786e-05, "loss": 0.4101, "step": 4986 }, { "epoch": 2.318921431892143, "grad_norm": 2.623142957687378, "learning_rate": 2.5471210992885348e-05, "loss": 0.3602, "step": 4988 }, { "epoch": 2.3198512319851234, "grad_norm": 3.0562174320220947, "learning_rate": 2.5392682932795662e-05, "loss": 0.3901, "step": 4990 }, { "epoch": 2.320781032078103, "grad_norm": 3.513258934020996, "learning_rate": 2.5314150997083955e-05, "loss": 0.332, "step": 4992 }, { "epoch": 2.321710832171083, "grad_norm": 2.5072295665740967, "learning_rate": 2.5235615960828748e-05, "loss": 0.3473, "step": 4994 }, { "epoch": 2.322640632264063, "grad_norm": 3.0032408237457275, "learning_rate": 2.5157078599139102e-05, "loss": 0.3313, "step": 4996 }, { "epoch": 2.323570432357043, "grad_norm": 3.5995590686798096, "learning_rate": 2.5078539687147124e-05, "loss": 0.4156, "step": 4998 }, { "epoch": 2.3245002324500232, "grad_norm": 4.213214874267578, "learning_rate": 2.5000000000000133e-05, "loss": 0.3955, "step": 5000 }, { "epoch": 2.3245002324500232, "eval_cer": 0.28017978097107044, "eval_loss": 0.4403761923313141, "eval_runtime": 397.7253, "eval_samples_per_second": 31.916, "eval_steps_per_second": 0.998, "step": 5000 }, { "epoch": 2.3254300325430033, "grad_norm": 2.877028703689575, "learning_rate": 2.4921460312853146e-05, "loss": 0.2646, "step": 5002 }, { "epoch": 2.3263598326359833, "grad_norm": 3.377638101577759, "learning_rate": 2.4842921400861168e-05, "loss": 0.4778, "step": 5004 }, { "epoch": 2.3272896327289634, "grad_norm": 3.2326889038085938, "learning_rate": 2.4764384039171526e-05, "loss": 0.328, "step": 5006 }, { "epoch": 2.3282194328219434, "grad_norm": 2.7026686668395996, "learning_rate": 2.4685849002916318e-05, "loss": 0.3701, "step": 5008 }, { "epoch": 2.3291492329149235, "grad_norm": 2.9943745136260986, "learning_rate": 2.460731706720462e-05, "loss": 0.2926, "step": 5010 }, { "epoch": 2.3300790330079035, "grad_norm": 2.5782101154327393, "learning_rate": 2.4528789007114912e-05, "loss": 0.3375, "step": 5012 }, { "epoch": 2.331008833100883, "grad_norm": 3.003493547439575, "learning_rate": 2.4450265597687518e-05, "loss": 0.3283, "step": 5014 }, { "epoch": 2.331938633193863, "grad_norm": 3.332096815109253, "learning_rate": 2.437174761391669e-05, "loss": 0.3202, "step": 5016 }, { "epoch": 2.3328684332868432, "grad_norm": 2.8879146575927734, "learning_rate": 2.42932358307433e-05, "loss": 0.3374, "step": 5018 }, { "epoch": 2.3337982333798233, "grad_norm": 2.781337022781372, "learning_rate": 2.4214731023046928e-05, "loss": 0.3888, "step": 5020 }, { "epoch": 2.3347280334728033, "grad_norm": 2.5525567531585693, "learning_rate": 2.4136233965638314e-05, "loss": 0.3375, "step": 5022 }, { "epoch": 2.3356578335657834, "grad_norm": 2.896031141281128, "learning_rate": 2.4057745433251787e-05, "loss": 0.355, "step": 5024 }, { "epoch": 2.3365876336587634, "grad_norm": 3.6540191173553467, "learning_rate": 2.3979266200537384e-05, "loss": 0.3439, "step": 5026 }, { "epoch": 2.3375174337517435, "grad_norm": 3.389752149581909, "learning_rate": 2.3900797042053514e-05, "loss": 0.3415, "step": 5028 }, { "epoch": 2.3384472338447235, "grad_norm": 3.2161660194396973, "learning_rate": 2.3822338732259076e-05, "loss": 0.3175, "step": 5030 }, { "epoch": 2.3393770339377036, "grad_norm": 2.667556047439575, "learning_rate": 2.374389204550589e-05, "loss": 0.4171, "step": 5032 }, { "epoch": 2.3403068340306836, "grad_norm": 2.951172113418579, "learning_rate": 2.366545775603112e-05, "loss": 0.3326, "step": 5034 }, { "epoch": 2.3412366341236632, "grad_norm": 2.9096009731292725, "learning_rate": 2.358703663794952e-05, "loss": 0.3761, "step": 5036 }, { "epoch": 2.3421664342166433, "grad_norm": 2.1154420375823975, "learning_rate": 2.350862946524587e-05, "loss": 0.3145, "step": 5038 }, { "epoch": 2.3430962343096233, "grad_norm": 2.615011692047119, "learning_rate": 2.3430237011767286e-05, "loss": 0.3145, "step": 5040 }, { "epoch": 2.3440260344026034, "grad_norm": 3.3589117527008057, "learning_rate": 2.33518600512157e-05, "loss": 0.3696, "step": 5042 }, { "epoch": 2.3449558344955834, "grad_norm": 2.503469705581665, "learning_rate": 2.3273499357139984e-05, "loss": 0.308, "step": 5044 }, { "epoch": 2.3458856345885635, "grad_norm": 3.6599209308624268, "learning_rate": 2.3195155702928616e-05, "loss": 0.4487, "step": 5046 }, { "epoch": 2.3468154346815435, "grad_norm": 2.1134774684906006, "learning_rate": 2.311682986180182e-05, "loss": 0.3555, "step": 5048 }, { "epoch": 2.3477452347745236, "grad_norm": 2.93213152885437, "learning_rate": 2.3038522606803995e-05, "loss": 0.3553, "step": 5050 }, { "epoch": 2.3486750348675036, "grad_norm": 3.12560772895813, "learning_rate": 2.2960234710796206e-05, "loss": 0.3294, "step": 5052 }, { "epoch": 2.3496048349604837, "grad_norm": 3.0637145042419434, "learning_rate": 2.2881966946448292e-05, "loss": 0.3316, "step": 5054 }, { "epoch": 2.3505346350534637, "grad_norm": 2.8946621417999268, "learning_rate": 2.2803720086231533e-05, "loss": 0.2826, "step": 5056 }, { "epoch": 2.3514644351464433, "grad_norm": 1.734933614730835, "learning_rate": 2.272549490241089e-05, "loss": 0.2687, "step": 5058 }, { "epoch": 2.3523942352394234, "grad_norm": 2.4268338680267334, "learning_rate": 2.264729216703726e-05, "loss": 0.2954, "step": 5060 }, { "epoch": 2.3533240353324034, "grad_norm": 2.2105283737182617, "learning_rate": 2.2569112651940138e-05, "loss": 0.2581, "step": 5062 }, { "epoch": 2.3542538354253835, "grad_norm": 3.2103383541107178, "learning_rate": 2.2490957128719753e-05, "loss": 0.3997, "step": 5064 }, { "epoch": 2.3551836355183635, "grad_norm": 3.230971097946167, "learning_rate": 2.2412826368739566e-05, "loss": 0.4738, "step": 5066 }, { "epoch": 2.3561134356113436, "grad_norm": 2.6410696506500244, "learning_rate": 2.233472114311864e-05, "loss": 0.3315, "step": 5068 }, { "epoch": 2.3570432357043236, "grad_norm": 4.243075370788574, "learning_rate": 2.2256642222723988e-05, "loss": 0.3834, "step": 5070 }, { "epoch": 2.3579730357973037, "grad_norm": 2.6800055503845215, "learning_rate": 2.217859037816308e-05, "loss": 0.2478, "step": 5072 }, { "epoch": 2.3589028358902837, "grad_norm": 2.9371085166931152, "learning_rate": 2.2100566379776092e-05, "loss": 0.2806, "step": 5074 }, { "epoch": 2.3598326359832638, "grad_norm": 3.3723437786102295, "learning_rate": 2.2022570997628388e-05, "loss": 0.3347, "step": 5076 }, { "epoch": 2.360762436076244, "grad_norm": 2.387702226638794, "learning_rate": 2.194460500150288e-05, "loss": 0.374, "step": 5078 }, { "epoch": 2.3616922361692234, "grad_norm": 2.5427284240722656, "learning_rate": 2.1866669160892513e-05, "loss": 0.252, "step": 5080 }, { "epoch": 2.3626220362622035, "grad_norm": 3.5441133975982666, "learning_rate": 2.1788764244992556e-05, "loss": 0.3352, "step": 5082 }, { "epoch": 2.3635518363551835, "grad_norm": 2.2604563236236572, "learning_rate": 2.1710891022693047e-05, "loss": 0.2741, "step": 5084 }, { "epoch": 2.3644816364481636, "grad_norm": 3.1077609062194824, "learning_rate": 2.1633050262571318e-05, "loss": 0.4206, "step": 5086 }, { "epoch": 2.3654114365411436, "grad_norm": 3.3549749851226807, "learning_rate": 2.1555242732884166e-05, "loss": 0.2878, "step": 5088 }, { "epoch": 2.3663412366341237, "grad_norm": 2.394758701324463, "learning_rate": 2.1477469201560578e-05, "loss": 0.2713, "step": 5090 }, { "epoch": 2.3672710367271037, "grad_norm": 2.7237939834594727, "learning_rate": 2.139973043619382e-05, "loss": 0.3594, "step": 5092 }, { "epoch": 2.3682008368200838, "grad_norm": 2.6209535598754883, "learning_rate": 2.132202720403419e-05, "loss": 0.2719, "step": 5094 }, { "epoch": 2.369130636913064, "grad_norm": 2.249479293823242, "learning_rate": 2.1244360271981202e-05, "loss": 0.4033, "step": 5096 }, { "epoch": 2.370060437006044, "grad_norm": 2.6583986282348633, "learning_rate": 2.1166730406576117e-05, "loss": 0.3081, "step": 5098 }, { "epoch": 2.370990237099024, "grad_norm": 2.6606619358062744, "learning_rate": 2.108913837399432e-05, "loss": 0.3382, "step": 5100 }, { "epoch": 2.3719200371920035, "grad_norm": 2.7383100986480713, "learning_rate": 2.101158494003798e-05, "loss": 0.2441, "step": 5102 }, { "epoch": 2.3728498372849836, "grad_norm": 2.8839268684387207, "learning_rate": 2.093407087012801e-05, "loss": 0.3843, "step": 5104 }, { "epoch": 2.3737796373779636, "grad_norm": 2.9414172172546387, "learning_rate": 2.0856596929297114e-05, "loss": 0.3088, "step": 5106 }, { "epoch": 2.3747094374709437, "grad_norm": 2.80674147605896, "learning_rate": 2.0779163882181763e-05, "loss": 0.299, "step": 5108 }, { "epoch": 2.3756392375639237, "grad_norm": 2.999774932861328, "learning_rate": 2.0701772493014875e-05, "loss": 0.3393, "step": 5110 }, { "epoch": 2.3765690376569037, "grad_norm": 3.1087393760681152, "learning_rate": 2.062442352561822e-05, "loss": 0.339, "step": 5112 }, { "epoch": 2.377498837749884, "grad_norm": 3.0064170360565186, "learning_rate": 2.0547117743394873e-05, "loss": 0.3535, "step": 5114 }, { "epoch": 2.378428637842864, "grad_norm": 3.0166244506835938, "learning_rate": 2.0469855909321652e-05, "loss": 0.2931, "step": 5116 }, { "epoch": 2.379358437935844, "grad_norm": 2.492537498474121, "learning_rate": 2.03926387859418e-05, "loss": 0.3256, "step": 5118 }, { "epoch": 2.380288238028824, "grad_norm": 3.645638942718506, "learning_rate": 2.0315467135356975e-05, "loss": 0.4185, "step": 5120 }, { "epoch": 2.381218038121804, "grad_norm": 2.0779542922973633, "learning_rate": 2.0238341719220352e-05, "loss": 0.3283, "step": 5122 }, { "epoch": 2.3821478382147836, "grad_norm": 2.968794345855713, "learning_rate": 2.0161263298728597e-05, "loss": 0.3948, "step": 5124 }, { "epoch": 2.3830776383077636, "grad_norm": 2.464888334274292, "learning_rate": 2.0084232634614608e-05, "loss": 0.2282, "step": 5126 }, { "epoch": 2.3840074384007437, "grad_norm": 2.9569590091705322, "learning_rate": 2.0007250487139938e-05, "loss": 0.3062, "step": 5128 }, { "epoch": 2.3849372384937237, "grad_norm": 2.6530256271362305, "learning_rate": 1.9930317616087304e-05, "loss": 0.2807, "step": 5130 }, { "epoch": 2.385867038586704, "grad_norm": 3.113560914993286, "learning_rate": 1.985343478075309e-05, "loss": 0.3614, "step": 5132 }, { "epoch": 2.386796838679684, "grad_norm": 2.449469804763794, "learning_rate": 1.9776602739939833e-05, "loss": 0.3523, "step": 5134 }, { "epoch": 2.387726638772664, "grad_norm": 1.944501280784607, "learning_rate": 1.9699822251948765e-05, "loss": 0.2924, "step": 5136 }, { "epoch": 2.388656438865644, "grad_norm": 2.6685268878936768, "learning_rate": 1.9623094074572258e-05, "loss": 0.3166, "step": 5138 }, { "epoch": 2.389586238958624, "grad_norm": 3.4080679416656494, "learning_rate": 1.9546418965086534e-05, "loss": 0.3317, "step": 5140 }, { "epoch": 2.390516039051604, "grad_norm": 2.378126621246338, "learning_rate": 1.9469797680243923e-05, "loss": 0.3247, "step": 5142 }, { "epoch": 2.391445839144584, "grad_norm": 3.2308061122894287, "learning_rate": 1.9393230976265575e-05, "loss": 0.3155, "step": 5144 }, { "epoch": 2.3923756392375637, "grad_norm": 3.476830005645752, "learning_rate": 1.931671960883395e-05, "loss": 0.3015, "step": 5146 }, { "epoch": 2.393305439330544, "grad_norm": 2.9651808738708496, "learning_rate": 1.924026433308535e-05, "loss": 0.2799, "step": 5148 }, { "epoch": 2.394235239423524, "grad_norm": 2.6306731700897217, "learning_rate": 1.9163865903602482e-05, "loss": 0.3751, "step": 5150 }, { "epoch": 2.395165039516504, "grad_norm": 2.583646774291992, "learning_rate": 1.9087525074407e-05, "loss": 0.3419, "step": 5152 }, { "epoch": 2.396094839609484, "grad_norm": 2.804506301879883, "learning_rate": 1.9011242598952034e-05, "loss": 0.3169, "step": 5154 }, { "epoch": 2.397024639702464, "grad_norm": 2.8696541786193848, "learning_rate": 1.893501923011494e-05, "loss": 0.3469, "step": 5156 }, { "epoch": 2.397954439795444, "grad_norm": 3.4462320804595947, "learning_rate": 1.8858855720189428e-05, "loss": 0.4001, "step": 5158 }, { "epoch": 2.398884239888424, "grad_norm": 2.923581838607788, "learning_rate": 1.878275282087872e-05, "loss": 0.2902, "step": 5160 }, { "epoch": 2.399814039981404, "grad_norm": 2.3169798851013184, "learning_rate": 1.8706711283287664e-05, "loss": 0.3175, "step": 5162 }, { "epoch": 2.400743840074384, "grad_norm": 2.9857938289642334, "learning_rate": 1.8630731857915544e-05, "loss": 0.2518, "step": 5164 }, { "epoch": 2.401673640167364, "grad_norm": 3.2230000495910645, "learning_rate": 1.8554815294648605e-05, "loss": 0.4133, "step": 5166 }, { "epoch": 2.402603440260344, "grad_norm": 2.3222014904022217, "learning_rate": 1.8478962342752685e-05, "loss": 0.317, "step": 5168 }, { "epoch": 2.4035332403533243, "grad_norm": 2.762336492538452, "learning_rate": 1.840317375086579e-05, "loss": 0.2852, "step": 5170 }, { "epoch": 2.404463040446304, "grad_norm": 2.3767154216766357, "learning_rate": 1.8327450266990684e-05, "loss": 0.3337, "step": 5172 }, { "epoch": 2.405392840539284, "grad_norm": 3.0347399711608887, "learning_rate": 1.825179263848771e-05, "loss": 0.3784, "step": 5174 }, { "epoch": 2.406322640632264, "grad_norm": 2.90901255607605, "learning_rate": 1.817620161206695e-05, "loss": 0.3202, "step": 5176 }, { "epoch": 2.407252440725244, "grad_norm": 2.990335464477539, "learning_rate": 1.8100677933781445e-05, "loss": 0.3268, "step": 5178 }, { "epoch": 2.408182240818224, "grad_norm": 3.511474132537842, "learning_rate": 1.8025222349019362e-05, "loss": 0.3357, "step": 5180 }, { "epoch": 2.409112040911204, "grad_norm": 2.4905245304107666, "learning_rate": 1.794983560249686e-05, "loss": 0.2852, "step": 5182 }, { "epoch": 2.410041841004184, "grad_norm": 2.3444511890411377, "learning_rate": 1.7874518438250695e-05, "loss": 0.2481, "step": 5184 }, { "epoch": 2.410971641097164, "grad_norm": 2.9933111667633057, "learning_rate": 1.7799271599630874e-05, "loss": 0.2856, "step": 5186 }, { "epoch": 2.4119014411901443, "grad_norm": 2.6733756065368652, "learning_rate": 1.7724095829293267e-05, "loss": 0.3139, "step": 5188 }, { "epoch": 2.4128312412831243, "grad_norm": 2.7636845111846924, "learning_rate": 1.764899186919251e-05, "loss": 0.2851, "step": 5190 }, { "epoch": 2.4137610413761044, "grad_norm": 3.180759906768799, "learning_rate": 1.7573960460574198e-05, "loss": 0.3619, "step": 5192 }, { "epoch": 2.414690841469084, "grad_norm": 3.0357584953308105, "learning_rate": 1.749900234396817e-05, "loss": 0.3121, "step": 5194 }, { "epoch": 2.415620641562064, "grad_norm": 2.385322332382202, "learning_rate": 1.742411825918073e-05, "loss": 0.3214, "step": 5196 }, { "epoch": 2.416550441655044, "grad_norm": 3.400581121444702, "learning_rate": 1.734930894528756e-05, "loss": 0.3202, "step": 5198 }, { "epoch": 2.417480241748024, "grad_norm": 2.9596705436706543, "learning_rate": 1.7274575140626396e-05, "loss": 0.3513, "step": 5200 }, { "epoch": 2.418410041841004, "grad_norm": 2.920670986175537, "learning_rate": 1.7199917582789714e-05, "loss": 0.3269, "step": 5202 }, { "epoch": 2.419339841933984, "grad_norm": 3.6983442306518555, "learning_rate": 1.712533700861747e-05, "loss": 0.383, "step": 5204 }, { "epoch": 2.4202696420269643, "grad_norm": 3.0483062267303467, "learning_rate": 1.705083415418982e-05, "loss": 0.3941, "step": 5206 }, { "epoch": 2.4211994421199443, "grad_norm": 3.6441190242767334, "learning_rate": 1.6976409754819862e-05, "loss": 0.4127, "step": 5208 }, { "epoch": 2.4221292422129244, "grad_norm": 2.6786859035491943, "learning_rate": 1.6902064545046325e-05, "loss": 0.3173, "step": 5210 }, { "epoch": 2.4230590423059044, "grad_norm": 2.2916526794433594, "learning_rate": 1.68277992586265e-05, "loss": 0.2858, "step": 5212 }, { "epoch": 2.4239888423988845, "grad_norm": 2.8995182514190674, "learning_rate": 1.6753614628528747e-05, "loss": 0.4058, "step": 5214 }, { "epoch": 2.424918642491864, "grad_norm": 2.9348206520080566, "learning_rate": 1.6679511386925404e-05, "loss": 0.285, "step": 5216 }, { "epoch": 2.425848442584844, "grad_norm": 3.178560495376587, "learning_rate": 1.6605490265185556e-05, "loss": 0.3606, "step": 5218 }, { "epoch": 2.426778242677824, "grad_norm": 2.6221888065338135, "learning_rate": 1.6531551993867795e-05, "loss": 0.2619, "step": 5220 }, { "epoch": 2.427708042770804, "grad_norm": 2.2990331649780273, "learning_rate": 1.6457697302713e-05, "loss": 0.3047, "step": 5222 }, { "epoch": 2.4286378428637843, "grad_norm": 2.9978957176208496, "learning_rate": 1.638392692063712e-05, "loss": 0.2895, "step": 5224 }, { "epoch": 2.4295676429567643, "grad_norm": 3.2127275466918945, "learning_rate": 1.6310241575724165e-05, "loss": 0.3583, "step": 5226 }, { "epoch": 2.4304974430497444, "grad_norm": 2.3272147178649902, "learning_rate": 1.623664199521862e-05, "loss": 0.3042, "step": 5228 }, { "epoch": 2.4314272431427244, "grad_norm": 3.006659507751465, "learning_rate": 1.616312890551863e-05, "loss": 0.2877, "step": 5230 }, { "epoch": 2.4323570432357045, "grad_norm": 2.707934856414795, "learning_rate": 1.608970303216879e-05, "loss": 0.3088, "step": 5232 }, { "epoch": 2.4332868433286845, "grad_norm": 2.90030837059021, "learning_rate": 1.60163650998528e-05, "loss": 0.2919, "step": 5234 }, { "epoch": 2.4342166434216645, "grad_norm": 3.3991498947143555, "learning_rate": 1.5943115832386427e-05, "loss": 0.3669, "step": 5236 }, { "epoch": 2.435146443514644, "grad_norm": 2.3189730644226074, "learning_rate": 1.586995595271038e-05, "loss": 0.249, "step": 5238 }, { "epoch": 2.436076243607624, "grad_norm": 2.5629096031188965, "learning_rate": 1.579688618288313e-05, "loss": 0.2931, "step": 5240 }, { "epoch": 2.4370060437006043, "grad_norm": 3.1237428188323975, "learning_rate": 1.5723907244073804e-05, "loss": 0.4025, "step": 5242 }, { "epoch": 2.4379358437935843, "grad_norm": 2.582821846008301, "learning_rate": 1.565101985655503e-05, "loss": 0.3782, "step": 5244 }, { "epoch": 2.4388656438865643, "grad_norm": 2.431276559829712, "learning_rate": 1.557822473969598e-05, "loss": 0.2164, "step": 5246 }, { "epoch": 2.4397954439795444, "grad_norm": 2.4176185131073, "learning_rate": 1.550552261195506e-05, "loss": 0.3022, "step": 5248 }, { "epoch": 2.4407252440725244, "grad_norm": 2.805905818939209, "learning_rate": 1.5432914190872807e-05, "loss": 0.3214, "step": 5250 }, { "epoch": 2.4416550441655045, "grad_norm": 2.9299213886260986, "learning_rate": 1.536040019306514e-05, "loss": 0.3204, "step": 5252 }, { "epoch": 2.4425848442584845, "grad_norm": 2.5117738246917725, "learning_rate": 1.528798133421591e-05, "loss": 0.276, "step": 5254 }, { "epoch": 2.4435146443514646, "grad_norm": 3.060241222381592, "learning_rate": 1.5215658329070004e-05, "loss": 0.3073, "step": 5256 }, { "epoch": 2.4444444444444446, "grad_norm": 2.997753143310547, "learning_rate": 1.514343189142629e-05, "loss": 0.3415, "step": 5258 }, { "epoch": 2.4453742445374242, "grad_norm": 2.8638041019439697, "learning_rate": 1.5071302734130555e-05, "loss": 0.2455, "step": 5260 }, { "epoch": 2.4463040446304043, "grad_norm": 3.469010353088379, "learning_rate": 1.4999271569068458e-05, "loss": 0.2752, "step": 5262 }, { "epoch": 2.4472338447233843, "grad_norm": 1.9265904426574707, "learning_rate": 1.4927339107158472e-05, "loss": 0.2419, "step": 5264 }, { "epoch": 2.4481636448163644, "grad_norm": 3.034344434738159, "learning_rate": 1.4855506058345042e-05, "loss": 0.3568, "step": 5266 }, { "epoch": 2.4490934449093444, "grad_norm": 2.9017653465270996, "learning_rate": 1.478377313159132e-05, "loss": 0.3521, "step": 5268 }, { "epoch": 2.4500232450023245, "grad_norm": 2.6614370346069336, "learning_rate": 1.471214103487237e-05, "loss": 0.3323, "step": 5270 }, { "epoch": 2.4509530450953045, "grad_norm": 2.7346882820129395, "learning_rate": 1.4640610475167952e-05, "loss": 0.3044, "step": 5272 }, { "epoch": 2.4518828451882846, "grad_norm": 2.705310821533203, "learning_rate": 1.4569182158455927e-05, "loss": 0.3045, "step": 5274 }, { "epoch": 2.4528126452812646, "grad_norm": 3.7406413555145264, "learning_rate": 1.4497856789704904e-05, "loss": 0.3991, "step": 5276 }, { "epoch": 2.4537424453742447, "grad_norm": 2.5255563259124756, "learning_rate": 1.4426635072867443e-05, "loss": 0.36, "step": 5278 }, { "epoch": 2.4546722454672247, "grad_norm": 2.264953136444092, "learning_rate": 1.4355517710873251e-05, "loss": 0.3106, "step": 5280 }, { "epoch": 2.4556020455602043, "grad_norm": 2.400481700897217, "learning_rate": 1.4284505405621865e-05, "loss": 0.3137, "step": 5282 }, { "epoch": 2.4565318456531844, "grad_norm": 3.536932945251465, "learning_rate": 1.4213598857976062e-05, "loss": 0.3942, "step": 5284 }, { "epoch": 2.4574616457461644, "grad_norm": 2.2724850177764893, "learning_rate": 1.4142798767754928e-05, "loss": 0.3096, "step": 5286 }, { "epoch": 2.4583914458391445, "grad_norm": 3.1374948024749756, "learning_rate": 1.4072105833726723e-05, "loss": 0.2972, "step": 5288 }, { "epoch": 2.4593212459321245, "grad_norm": 2.2938497066497803, "learning_rate": 1.400152075360217e-05, "loss": 0.2102, "step": 5290 }, { "epoch": 2.4602510460251046, "grad_norm": 3.1050586700439453, "learning_rate": 1.3931044224027517e-05, "loss": 0.3387, "step": 5292 }, { "epoch": 2.4611808461180846, "grad_norm": 2.4782419204711914, "learning_rate": 1.3860676940577652e-05, "loss": 0.2546, "step": 5294 }, { "epoch": 2.4621106462110647, "grad_norm": 2.8739261627197266, "learning_rate": 1.3790419597749258e-05, "loss": 0.3618, "step": 5296 }, { "epoch": 2.4630404463040447, "grad_norm": 2.7046313285827637, "learning_rate": 1.3720272888953902e-05, "loss": 0.2149, "step": 5298 }, { "epoch": 2.4639702463970248, "grad_norm": 3.026378631591797, "learning_rate": 1.3650237506511365e-05, "loss": 0.2744, "step": 5300 }, { "epoch": 2.464900046490005, "grad_norm": 2.9215445518493652, "learning_rate": 1.358031414164259e-05, "loss": 0.2965, "step": 5302 }, { "epoch": 2.4658298465829844, "grad_norm": 2.8348288536071777, "learning_rate": 1.3510503484462885e-05, "loss": 0.3043, "step": 5304 }, { "epoch": 2.4667596466759645, "grad_norm": 2.685866594314575, "learning_rate": 1.344080622397525e-05, "loss": 0.3588, "step": 5306 }, { "epoch": 2.4676894467689445, "grad_norm": 2.45527720451355, "learning_rate": 1.3371223048063592e-05, "loss": 0.2629, "step": 5308 }, { "epoch": 2.4686192468619246, "grad_norm": 2.7061667442321777, "learning_rate": 1.3301754643485719e-05, "loss": 0.2822, "step": 5310 }, { "epoch": 2.4695490469549046, "grad_norm": 2.753054141998291, "learning_rate": 1.3232401695866704e-05, "loss": 0.2974, "step": 5312 }, { "epoch": 2.4704788470478847, "grad_norm": 2.3718981742858887, "learning_rate": 1.3163164889692255e-05, "loss": 0.3416, "step": 5314 }, { "epoch": 2.4714086471408647, "grad_norm": 2.624847412109375, "learning_rate": 1.3094044908301585e-05, "loss": 0.3099, "step": 5316 }, { "epoch": 2.4723384472338448, "grad_norm": 3.0346410274505615, "learning_rate": 1.3025042433881041e-05, "loss": 0.342, "step": 5318 }, { "epoch": 2.473268247326825, "grad_norm": 3.011763095855713, "learning_rate": 1.2956158147457148e-05, "loss": 0.3525, "step": 5320 }, { "epoch": 2.474198047419805, "grad_norm": 2.3350186347961426, "learning_rate": 1.2887392728890084e-05, "loss": 0.2614, "step": 5322 }, { "epoch": 2.475127847512785, "grad_norm": 3.0409698486328125, "learning_rate": 1.281874685686677e-05, "loss": 0.3043, "step": 5324 }, { "epoch": 2.4760576476057645, "grad_norm": 2.944215774536133, "learning_rate": 1.2750221208894132e-05, "loss": 0.2574, "step": 5326 }, { "epoch": 2.476987447698745, "grad_norm": 2.6613590717315674, "learning_rate": 1.2681816461292759e-05, "loss": 0.3083, "step": 5328 }, { "epoch": 2.4779172477917246, "grad_norm": 3.0180959701538086, "learning_rate": 1.2613533289189863e-05, "loss": 0.2772, "step": 5330 }, { "epoch": 2.4788470478847047, "grad_norm": 2.30562162399292, "learning_rate": 1.2545372366512747e-05, "loss": 0.3181, "step": 5332 }, { "epoch": 2.4797768479776847, "grad_norm": 2.5720157623291016, "learning_rate": 1.2477334365982272e-05, "loss": 0.214, "step": 5334 }, { "epoch": 2.4807066480706648, "grad_norm": 2.2951951026916504, "learning_rate": 1.2409419959106044e-05, "loss": 0.31, "step": 5336 }, { "epoch": 2.481636448163645, "grad_norm": 2.699254035949707, "learning_rate": 1.2341629816171756e-05, "loss": 0.2845, "step": 5338 }, { "epoch": 2.482566248256625, "grad_norm": 2.928802013397217, "learning_rate": 1.2273964606240752e-05, "loss": 0.3037, "step": 5340 }, { "epoch": 2.483496048349605, "grad_norm": 2.2580857276916504, "learning_rate": 1.2206424997141415e-05, "loss": 0.3026, "step": 5342 }, { "epoch": 2.484425848442585, "grad_norm": 3.28802490234375, "learning_rate": 1.213901165546238e-05, "loss": 0.3316, "step": 5344 }, { "epoch": 2.485355648535565, "grad_norm": 2.623776435852051, "learning_rate": 1.2071725246546117e-05, "loss": 0.3181, "step": 5346 }, { "epoch": 2.4862854486285446, "grad_norm": 1.8095697164535522, "learning_rate": 1.2004566434482313e-05, "loss": 0.2253, "step": 5348 }, { "epoch": 2.487215248721525, "grad_norm": 2.5772838592529297, "learning_rate": 1.1937535882101332e-05, "loss": 0.2978, "step": 5350 }, { "epoch": 2.4881450488145047, "grad_norm": 2.2179439067840576, "learning_rate": 1.1870634250967666e-05, "loss": 0.2524, "step": 5352 }, { "epoch": 2.4890748489074848, "grad_norm": 2.5413076877593994, "learning_rate": 1.1803862201373366e-05, "loss": 0.2527, "step": 5354 }, { "epoch": 2.490004649000465, "grad_norm": 2.88993239402771, "learning_rate": 1.1737220392331678e-05, "loss": 0.319, "step": 5356 }, { "epoch": 2.490934449093445, "grad_norm": 3.296889543533325, "learning_rate": 1.1670709481570355e-05, "loss": 0.2969, "step": 5358 }, { "epoch": 2.491864249186425, "grad_norm": 3.2007758617401123, "learning_rate": 1.160433012552512e-05, "loss": 0.3552, "step": 5360 }, { "epoch": 2.492794049279405, "grad_norm": 2.288856267929077, "learning_rate": 1.1538082979333535e-05, "loss": 0.2968, "step": 5362 }, { "epoch": 2.493723849372385, "grad_norm": 2.535599946975708, "learning_rate": 1.1471968696828146e-05, "loss": 0.2193, "step": 5364 }, { "epoch": 2.494653649465365, "grad_norm": 2.4244468212127686, "learning_rate": 1.1405987930530235e-05, "loss": 0.2796, "step": 5366 }, { "epoch": 2.495583449558345, "grad_norm": 2.246622085571289, "learning_rate": 1.1340141331643323e-05, "loss": 0.3156, "step": 5368 }, { "epoch": 2.496513249651325, "grad_norm": 2.3487071990966797, "learning_rate": 1.1274429550046763e-05, "loss": 0.3052, "step": 5370 }, { "epoch": 2.497443049744305, "grad_norm": 1.8854631185531616, "learning_rate": 1.1208853234289301e-05, "loss": 0.1879, "step": 5372 }, { "epoch": 2.498372849837285, "grad_norm": 2.471529960632324, "learning_rate": 1.1143413031582673e-05, "loss": 0.2816, "step": 5374 }, { "epoch": 2.499302649930265, "grad_norm": 2.5361831188201904, "learning_rate": 1.107810958779533e-05, "loss": 0.3428, "step": 5376 }, { "epoch": 2.500232450023245, "grad_norm": 2.690376043319702, "learning_rate": 1.101294354744586e-05, "loss": 0.3006, "step": 5378 }, { "epoch": 2.501162250116225, "grad_norm": 2.8967525959014893, "learning_rate": 1.0947915553696772e-05, "loss": 0.2903, "step": 5380 }, { "epoch": 2.502092050209205, "grad_norm": 2.1973893642425537, "learning_rate": 1.0883026248348114e-05, "loss": 0.2741, "step": 5382 }, { "epoch": 2.503021850302185, "grad_norm": 2.785287618637085, "learning_rate": 1.0818276271831132e-05, "loss": 0.2998, "step": 5384 }, { "epoch": 2.503951650395165, "grad_norm": 2.1207351684570312, "learning_rate": 1.0753666263201955e-05, "loss": 0.2379, "step": 5386 }, { "epoch": 2.504881450488145, "grad_norm": 2.3837146759033203, "learning_rate": 1.0689196860135244e-05, "loss": 0.2464, "step": 5388 }, { "epoch": 2.505811250581125, "grad_norm": 2.651937484741211, "learning_rate": 1.0624868698918089e-05, "loss": 0.237, "step": 5390 }, { "epoch": 2.506741050674105, "grad_norm": 3.5430822372436523, "learning_rate": 1.0560682414443368e-05, "loss": 0.3426, "step": 5392 }, { "epoch": 2.5076708507670853, "grad_norm": 2.184375047683716, "learning_rate": 1.049663864020379e-05, "loss": 0.263, "step": 5394 }, { "epoch": 2.508600650860065, "grad_norm": 1.9629511833190918, "learning_rate": 1.0432738008285627e-05, "loss": 0.1962, "step": 5396 }, { "epoch": 2.509530450953045, "grad_norm": 2.7801711559295654, "learning_rate": 1.036898114936228e-05, "loss": 0.3238, "step": 5398 }, { "epoch": 2.510460251046025, "grad_norm": 3.296351432800293, "learning_rate": 1.0305368692688207e-05, "loss": 0.2955, "step": 5400 }, { "epoch": 2.511390051139005, "grad_norm": 2.6011953353881836, "learning_rate": 1.0241901266092673e-05, "loss": 0.2833, "step": 5402 }, { "epoch": 2.512319851231985, "grad_norm": 2.5382776260375977, "learning_rate": 1.0178579495973561e-05, "loss": 0.3326, "step": 5404 }, { "epoch": 2.513249651324965, "grad_norm": 2.417444944381714, "learning_rate": 1.0115404007291174e-05, "loss": 0.2058, "step": 5406 }, { "epoch": 2.514179451417945, "grad_norm": 2.9613149166107178, "learning_rate": 1.0052375423562044e-05, "loss": 0.2792, "step": 5408 }, { "epoch": 2.5151092515109252, "grad_norm": 2.490175724029541, "learning_rate": 9.989494366852917e-06, "loss": 0.244, "step": 5410 }, { "epoch": 2.5160390516039053, "grad_norm": 3.1078848838806152, "learning_rate": 9.926761457774441e-06, "loss": 0.3006, "step": 5412 }, { "epoch": 2.5169688516968853, "grad_norm": 2.001051902770996, "learning_rate": 9.864177315475024e-06, "loss": 0.2533, "step": 5414 }, { "epoch": 2.5178986517898654, "grad_norm": 2.5898361206054688, "learning_rate": 9.801742557634896e-06, "loss": 0.2769, "step": 5416 }, { "epoch": 2.518828451882845, "grad_norm": 3.1585769653320312, "learning_rate": 9.739457800459972e-06, "loss": 0.3075, "step": 5418 }, { "epoch": 2.519758251975825, "grad_norm": 2.534066677093506, "learning_rate": 9.677323658675628e-06, "loss": 0.2986, "step": 5420 }, { "epoch": 2.520688052068805, "grad_norm": 2.9148213863372803, "learning_rate": 9.615340745520721e-06, "loss": 0.2851, "step": 5422 }, { "epoch": 2.521617852161785, "grad_norm": 2.5295403003692627, "learning_rate": 9.553509672741687e-06, "loss": 0.3389, "step": 5424 }, { "epoch": 2.522547652254765, "grad_norm": 3.3628814220428467, "learning_rate": 9.49183105058615e-06, "loss": 0.3726, "step": 5426 }, { "epoch": 2.5234774523477452, "grad_norm": 2.176935911178589, "learning_rate": 9.430305487797206e-06, "loss": 0.2472, "step": 5428 }, { "epoch": 2.5244072524407253, "grad_norm": 2.973721742630005, "learning_rate": 9.368933591607394e-06, "loss": 0.2872, "step": 5430 }, { "epoch": 2.5253370525337053, "grad_norm": 3.080075740814209, "learning_rate": 9.307715967732515e-06, "loss": 0.2955, "step": 5432 }, { "epoch": 2.5262668526266854, "grad_norm": 2.941044330596924, "learning_rate": 9.246653220365837e-06, "loss": 0.2831, "step": 5434 }, { "epoch": 2.5271966527196654, "grad_norm": 2.608295440673828, "learning_rate": 9.185745952171924e-06, "loss": 0.3231, "step": 5436 }, { "epoch": 2.5281264528126455, "grad_norm": 2.9607107639312744, "learning_rate": 9.124994764281023e-06, "loss": 0.285, "step": 5438 }, { "epoch": 2.529056252905625, "grad_norm": 2.463712692260742, "learning_rate": 9.064400256282799e-06, "loss": 0.3244, "step": 5440 }, { "epoch": 2.5299860529986056, "grad_norm": 1.7961950302124023, "learning_rate": 9.00396302622055e-06, "loss": 0.2134, "step": 5442 }, { "epoch": 2.530915853091585, "grad_norm": 2.3293919563293457, "learning_rate": 8.943683670585407e-06, "loss": 0.3047, "step": 5444 }, { "epoch": 2.5318456531845652, "grad_norm": 2.114621877670288, "learning_rate": 8.883562784310268e-06, "loss": 0.2449, "step": 5446 }, { "epoch": 2.5327754532775453, "grad_norm": 2.530632257461548, "learning_rate": 8.823600960763947e-06, "loss": 0.3049, "step": 5448 }, { "epoch": 2.5337052533705253, "grad_norm": 2.6851425170898438, "learning_rate": 8.763798791745433e-06, "loss": 0.309, "step": 5450 }, { "epoch": 2.5346350534635054, "grad_norm": 1.9306267499923706, "learning_rate": 8.70415686747806e-06, "loss": 0.2307, "step": 5452 }, { "epoch": 2.5355648535564854, "grad_norm": 2.3314528465270996, "learning_rate": 8.644675776603505e-06, "loss": 0.247, "step": 5454 }, { "epoch": 2.5364946536494655, "grad_norm": 2.5432262420654297, "learning_rate": 8.585356106176126e-06, "loss": 0.2751, "step": 5456 }, { "epoch": 2.5374244537424455, "grad_norm": 2.457660436630249, "learning_rate": 8.526198441657118e-06, "loss": 0.2297, "step": 5458 }, { "epoch": 2.5383542538354256, "grad_norm": 2.5028276443481445, "learning_rate": 8.467203366908746e-06, "loss": 0.2543, "step": 5460 }, { "epoch": 2.539284053928405, "grad_norm": 2.8545799255371094, "learning_rate": 8.408371464188586e-06, "loss": 0.3073, "step": 5462 }, { "epoch": 2.5402138540213857, "grad_norm": 2.965803861618042, "learning_rate": 8.349703314143728e-06, "loss": 0.3539, "step": 5464 }, { "epoch": 2.5411436541143653, "grad_norm": 2.5843420028686523, "learning_rate": 8.291199495805196e-06, "loss": 0.2414, "step": 5466 }, { "epoch": 2.5420734542073453, "grad_norm": 2.4603209495544434, "learning_rate": 8.232860586582059e-06, "loss": 0.2702, "step": 5468 }, { "epoch": 2.5430032543003254, "grad_norm": 2.1121883392333984, "learning_rate": 8.174687162255694e-06, "loss": 0.2044, "step": 5470 }, { "epoch": 2.5439330543933054, "grad_norm": 2.757425308227539, "learning_rate": 8.116679796974422e-06, "loss": 0.2968, "step": 5472 }, { "epoch": 2.5448628544862855, "grad_norm": 2.2397444248199463, "learning_rate": 8.05883906324748e-06, "loss": 0.238, "step": 5474 }, { "epoch": 2.5457926545792655, "grad_norm": 2.6868205070495605, "learning_rate": 8.001165531939528e-06, "loss": 0.2802, "step": 5476 }, { "epoch": 2.5467224546722456, "grad_norm": 3.422095775604248, "learning_rate": 7.943659772265133e-06, "loss": 0.3025, "step": 5478 }, { "epoch": 2.5476522547652256, "grad_norm": 2.7831714153289795, "learning_rate": 7.886322351782828e-06, "loss": 0.3391, "step": 5480 }, { "epoch": 2.5485820548582057, "grad_norm": 3.243180751800537, "learning_rate": 7.829153836389844e-06, "loss": 0.326, "step": 5482 }, { "epoch": 2.5495118549511853, "grad_norm": 2.3928492069244385, "learning_rate": 7.77215479031631e-06, "loss": 0.253, "step": 5484 }, { "epoch": 2.5504416550441658, "grad_norm": 2.167447566986084, "learning_rate": 7.715325776119864e-06, "loss": 0.2243, "step": 5486 }, { "epoch": 2.5513714551371454, "grad_norm": 2.696125030517578, "learning_rate": 7.658667354679903e-06, "loss": 0.2882, "step": 5488 }, { "epoch": 2.5523012552301254, "grad_norm": 2.5759360790252686, "learning_rate": 7.602180085192172e-06, "loss": 0.2869, "step": 5490 }, { "epoch": 2.5532310553231055, "grad_norm": 2.1934051513671875, "learning_rate": 7.5458645251632165e-06, "loss": 0.3068, "step": 5492 }, { "epoch": 2.5541608554160855, "grad_norm": 2.9338862895965576, "learning_rate": 7.489721230404879e-06, "loss": 0.2866, "step": 5494 }, { "epoch": 2.5550906555090656, "grad_norm": 2.6495003700256348, "learning_rate": 7.43375075502881e-06, "loss": 0.321, "step": 5496 }, { "epoch": 2.5560204556020456, "grad_norm": 2.383559465408325, "learning_rate": 7.377953651440979e-06, "loss": 0.2728, "step": 5498 }, { "epoch": 2.5569502556950257, "grad_norm": 2.9671459197998047, "learning_rate": 7.322330470336359e-06, "loss": 0.3363, "step": 5500 }, { "epoch": 2.5578800557880057, "grad_norm": 3.068509101867676, "learning_rate": 7.266881760693204e-06, "loss": 0.3188, "step": 5502 }, { "epoch": 2.5588098558809858, "grad_norm": 3.0766944885253906, "learning_rate": 7.211608069767889e-06, "loss": 0.4218, "step": 5504 }, { "epoch": 2.5597396559739654, "grad_norm": 2.42033052444458, "learning_rate": 7.156509943089493e-06, "loss": 0.3161, "step": 5506 }, { "epoch": 2.560669456066946, "grad_norm": 2.878898859024048, "learning_rate": 7.10158792445425e-06, "loss": 0.2315, "step": 5508 }, { "epoch": 2.5615992561599255, "grad_norm": 2.1140518188476562, "learning_rate": 7.046842555920313e-06, "loss": 0.2092, "step": 5510 }, { "epoch": 2.5625290562529055, "grad_norm": 2.642272710800171, "learning_rate": 6.992274377802362e-06, "loss": 0.292, "step": 5512 }, { "epoch": 2.5634588563458856, "grad_norm": 2.5869898796081543, "learning_rate": 6.9378839286662925e-06, "loss": 0.3527, "step": 5514 }, { "epoch": 2.5643886564388656, "grad_norm": 2.04522442817688, "learning_rate": 6.883671745323876e-06, "loss": 0.2942, "step": 5516 }, { "epoch": 2.5653184565318456, "grad_norm": 2.8969616889953613, "learning_rate": 6.8296383628274434e-06, "loss": 0.3798, "step": 5518 }, { "epoch": 2.5662482566248257, "grad_norm": 2.0284643173217773, "learning_rate": 6.775784314464729e-06, "loss": 0.2267, "step": 5520 }, { "epoch": 2.5671780567178057, "grad_norm": 3.0340323448181152, "learning_rate": 6.7221101317534485e-06, "loss": 0.3578, "step": 5522 }, { "epoch": 2.568107856810786, "grad_norm": 2.3671815395355225, "learning_rate": 6.668616344436021e-06, "loss": 0.3048, "step": 5524 }, { "epoch": 2.569037656903766, "grad_norm": 3.076282024383545, "learning_rate": 6.615303480474629e-06, "loss": 0.3226, "step": 5526 }, { "epoch": 2.5699674569967454, "grad_norm": 2.377346992492676, "learning_rate": 6.562172066045682e-06, "loss": 0.2543, "step": 5528 }, { "epoch": 2.570897257089726, "grad_norm": 2.6357383728027344, "learning_rate": 6.509222625534791e-06, "loss": 0.2428, "step": 5530 }, { "epoch": 2.5718270571827055, "grad_norm": 2.271533966064453, "learning_rate": 6.456455681531525e-06, "loss": 0.2676, "step": 5532 }, { "epoch": 2.5727568572756856, "grad_norm": 2.159168243408203, "learning_rate": 6.403871754824409e-06, "loss": 0.3035, "step": 5534 }, { "epoch": 2.5736866573686656, "grad_norm": 1.7627044916152954, "learning_rate": 6.351471364395487e-06, "loss": 0.2319, "step": 5536 }, { "epoch": 2.5746164574616457, "grad_norm": 2.461913585662842, "learning_rate": 6.299255027415452e-06, "loss": 0.3039, "step": 5538 }, { "epoch": 2.5755462575546257, "grad_norm": 2.431652307510376, "learning_rate": 6.247223259238528e-06, "loss": 0.3084, "step": 5540 }, { "epoch": 2.576476057647606, "grad_norm": 2.5355308055877686, "learning_rate": 6.195376573397236e-06, "loss": 0.2279, "step": 5542 }, { "epoch": 2.577405857740586, "grad_norm": 2.3598501682281494, "learning_rate": 6.143715481597457e-06, "loss": 0.2221, "step": 5544 }, { "epoch": 2.578335657833566, "grad_norm": 2.3482067584991455, "learning_rate": 6.092240493713228e-06, "loss": 0.3008, "step": 5546 }, { "epoch": 2.579265457926546, "grad_norm": 2.8339357376098633, "learning_rate": 6.040952117781984e-06, "loss": 0.2569, "step": 5548 }, { "epoch": 2.5801952580195255, "grad_norm": 2.7812998294830322, "learning_rate": 5.989850859999261e-06, "loss": 0.3085, "step": 5550 }, { "epoch": 2.581125058112506, "grad_norm": 2.076362371444702, "learning_rate": 5.938937224713811e-06, "loss": 0.1776, "step": 5552 }, { "epoch": 2.5820548582054856, "grad_norm": 2.347146987915039, "learning_rate": 5.888211714422719e-06, "loss": 0.3034, "step": 5554 }, { "epoch": 2.5829846582984657, "grad_norm": 2.9521098136901855, "learning_rate": 5.837674829766295e-06, "loss": 0.2222, "step": 5556 }, { "epoch": 2.5839144583914457, "grad_norm": 2.3992810249328613, "learning_rate": 5.787327069523128e-06, "loss": 0.2865, "step": 5558 }, { "epoch": 2.584844258484426, "grad_norm": 2.550109386444092, "learning_rate": 5.737168930605286e-06, "loss": 0.2644, "step": 5560 }, { "epoch": 2.585774058577406, "grad_norm": 2.5244874954223633, "learning_rate": 5.687200908053409e-06, "loss": 0.2474, "step": 5562 }, { "epoch": 2.586703858670386, "grad_norm": 2.610778570175171, "learning_rate": 5.637423495031676e-06, "loss": 0.2785, "step": 5564 }, { "epoch": 2.587633658763366, "grad_norm": 3.1628713607788086, "learning_rate": 5.58783718282306e-06, "loss": 0.316, "step": 5566 }, { "epoch": 2.588563458856346, "grad_norm": 2.7362167835235596, "learning_rate": 5.538442460824444e-06, "loss": 0.3081, "step": 5568 }, { "epoch": 2.589493258949326, "grad_norm": 2.562082529067993, "learning_rate": 5.489239816541787e-06, "loss": 0.3043, "step": 5570 }, { "epoch": 2.5904230590423056, "grad_norm": 3.0887398719787598, "learning_rate": 5.4402297355853015e-06, "loss": 0.3769, "step": 5572 }, { "epoch": 2.591352859135286, "grad_norm": 2.701564311981201, "learning_rate": 5.391412701664756e-06, "loss": 0.2814, "step": 5574 }, { "epoch": 2.5922826592282657, "grad_norm": 2.006162643432617, "learning_rate": 5.342789196584538e-06, "loss": 0.3174, "step": 5576 }, { "epoch": 2.5932124593212458, "grad_norm": 1.7908318042755127, "learning_rate": 5.294359700239041e-06, "loss": 0.1886, "step": 5578 }, { "epoch": 2.594142259414226, "grad_norm": 2.1938560009002686, "learning_rate": 5.246124690607757e-06, "loss": 0.2709, "step": 5580 }, { "epoch": 2.595072059507206, "grad_norm": 2.81311297416687, "learning_rate": 5.198084643750845e-06, "loss": 0.3107, "step": 5582 }, { "epoch": 2.596001859600186, "grad_norm": 2.4679553508758545, "learning_rate": 5.150240033804141e-06, "loss": 0.2708, "step": 5584 }, { "epoch": 2.596931659693166, "grad_norm": 2.509796142578125, "learning_rate": 5.1025913329746024e-06, "loss": 0.267, "step": 5586 }, { "epoch": 2.597861459786146, "grad_norm": 2.994870662689209, "learning_rate": 5.055139011535754e-06, "loss": 0.2898, "step": 5588 }, { "epoch": 2.598791259879126, "grad_norm": 2.609898805618286, "learning_rate": 5.0078835378227695e-06, "loss": 0.3434, "step": 5590 }, { "epoch": 2.599721059972106, "grad_norm": 1.8201557397842407, "learning_rate": 4.960825378228121e-06, "loss": 0.1916, "step": 5592 }, { "epoch": 2.600650860065086, "grad_norm": 2.5303585529327393, "learning_rate": 4.913964997196821e-06, "loss": 0.2664, "step": 5594 }, { "epoch": 2.601580660158066, "grad_norm": 2.1021828651428223, "learning_rate": 4.867302857221964e-06, "loss": 0.3129, "step": 5596 }, { "epoch": 2.602510460251046, "grad_norm": 2.6587226390838623, "learning_rate": 4.8208394188400095e-06, "loss": 0.2874, "step": 5598 }, { "epoch": 2.603440260344026, "grad_norm": 2.153458595275879, "learning_rate": 4.7745751406263335e-06, "loss": 0.2044, "step": 5600 }, { "epoch": 2.604370060437006, "grad_norm": 2.6934525966644287, "learning_rate": 4.728510479190684e-06, "loss": 0.3591, "step": 5602 }, { "epoch": 2.605299860529986, "grad_norm": 1.9737348556518555, "learning_rate": 4.682645889172677e-06, "loss": 0.2572, "step": 5604 }, { "epoch": 2.606229660622966, "grad_norm": 2.5174999237060547, "learning_rate": 4.636981823237293e-06, "loss": 0.2666, "step": 5606 }, { "epoch": 2.607159460715946, "grad_norm": 2.4568920135498047, "learning_rate": 4.591518732070404e-06, "loss": 0.2855, "step": 5608 }, { "epoch": 2.608089260808926, "grad_norm": 2.4146740436553955, "learning_rate": 4.546257064374448e-06, "loss": 0.1995, "step": 5610 }, { "epoch": 2.609019060901906, "grad_norm": 2.264836311340332, "learning_rate": 4.5011972668637265e-06, "loss": 0.2102, "step": 5612 }, { "epoch": 2.609948860994886, "grad_norm": 1.48586106300354, "learning_rate": 4.456339784260257e-06, "loss": 0.1194, "step": 5614 }, { "epoch": 2.6108786610878663, "grad_norm": 2.412338972091675, "learning_rate": 4.411685059289329e-06, "loss": 0.225, "step": 5616 }, { "epoch": 2.6118084611808463, "grad_norm": 2.2265913486480713, "learning_rate": 4.367233532675029e-06, "loss": 0.2598, "step": 5618 }, { "epoch": 2.612738261273826, "grad_norm": 2.3660473823547363, "learning_rate": 4.32298564313595e-06, "loss": 0.251, "step": 5620 }, { "epoch": 2.6136680613668064, "grad_norm": 2.6990222930908203, "learning_rate": 4.278941827380973e-06, "loss": 0.2893, "step": 5622 }, { "epoch": 2.614597861459786, "grad_norm": 2.094717025756836, "learning_rate": 4.2351025201047095e-06, "loss": 0.311, "step": 5624 }, { "epoch": 2.615527661552766, "grad_norm": 2.27211856842041, "learning_rate": 4.191468153983442e-06, "loss": 0.271, "step": 5626 }, { "epoch": 2.616457461645746, "grad_norm": 2.208285331726074, "learning_rate": 4.148039159670726e-06, "loss": 0.2443, "step": 5628 }, { "epoch": 2.617387261738726, "grad_norm": 2.0556435585021973, "learning_rate": 4.1048159657932526e-06, "loss": 0.2225, "step": 5630 }, { "epoch": 2.618317061831706, "grad_norm": 2.296304702758789, "learning_rate": 4.061798998946493e-06, "loss": 0.1696, "step": 5632 }, { "epoch": 2.6192468619246863, "grad_norm": 2.5036704540252686, "learning_rate": 4.01898868369047e-06, "loss": 0.2959, "step": 5634 }, { "epoch": 2.6201766620176663, "grad_norm": 1.8679554462432861, "learning_rate": 3.976385442545788e-06, "loss": 0.2828, "step": 5636 }, { "epoch": 2.6211064621106464, "grad_norm": 2.420706272125244, "learning_rate": 3.933989695989213e-06, "loss": 0.3041, "step": 5638 }, { "epoch": 2.6220362622036264, "grad_norm": 2.992461681365967, "learning_rate": 3.891801862449643e-06, "loss": 0.2842, "step": 5640 }, { "epoch": 2.622966062296606, "grad_norm": 1.9685544967651367, "learning_rate": 3.849822358303946e-06, "loss": 0.3064, "step": 5642 }, { "epoch": 2.6238958623895865, "grad_norm": 2.1616241931915283, "learning_rate": 3.8080515978729446e-06, "loss": 0.275, "step": 5644 }, { "epoch": 2.624825662482566, "grad_norm": 2.7108852863311768, "learning_rate": 3.7664899934171136e-06, "loss": 0.2956, "step": 5646 }, { "epoch": 2.625755462575546, "grad_norm": 2.4758353233337402, "learning_rate": 3.72513795513271e-06, "loss": 0.2873, "step": 5648 }, { "epoch": 2.626685262668526, "grad_norm": 2.503990650177002, "learning_rate": 3.683995891147704e-06, "loss": 0.2788, "step": 5650 }, { "epoch": 2.6276150627615062, "grad_norm": 2.545443296432495, "learning_rate": 3.643064207517632e-06, "loss": 0.2861, "step": 5652 }, { "epoch": 2.6285448628544863, "grad_norm": 3.0745058059692383, "learning_rate": 3.6023433082216895e-06, "loss": 0.2289, "step": 5654 }, { "epoch": 2.6294746629474663, "grad_norm": 2.0309906005859375, "learning_rate": 3.5618335951587115e-06, "loss": 0.2687, "step": 5656 }, { "epoch": 2.6304044630404464, "grad_norm": 2.8540866374969482, "learning_rate": 3.5215354681432104e-06, "loss": 0.3379, "step": 5658 }, { "epoch": 2.6313342631334264, "grad_norm": 2.166804075241089, "learning_rate": 3.48144932490143e-06, "loss": 0.2679, "step": 5660 }, { "epoch": 2.6322640632264065, "grad_norm": 1.6345795392990112, "learning_rate": 3.441575561067402e-06, "loss": 0.2053, "step": 5662 }, { "epoch": 2.633193863319386, "grad_norm": 1.8359907865524292, "learning_rate": 3.40191457017912e-06, "loss": 0.2711, "step": 5664 }, { "epoch": 2.6341236634123666, "grad_norm": 2.3982651233673096, "learning_rate": 3.362466743674554e-06, "loss": 0.3487, "step": 5666 }, { "epoch": 2.635053463505346, "grad_norm": 2.586247444152832, "learning_rate": 3.3232324708877562e-06, "loss": 0.3173, "step": 5668 }, { "epoch": 2.6359832635983262, "grad_norm": 2.648834466934204, "learning_rate": 3.2842121390452298e-06, "loss": 0.3226, "step": 5670 }, { "epoch": 2.6369130636913063, "grad_norm": 2.6600165367126465, "learning_rate": 3.24540613326187e-06, "loss": 0.295, "step": 5672 }, { "epoch": 2.6378428637842863, "grad_norm": 2.316488742828369, "learning_rate": 3.2068148365372925e-06, "loss": 0.2455, "step": 5674 }, { "epoch": 2.6387726638772664, "grad_norm": 2.943544626235962, "learning_rate": 3.168438629751991e-06, "loss": 0.2421, "step": 5676 }, { "epoch": 2.6397024639702464, "grad_norm": 1.974583387374878, "learning_rate": 3.1302778916636997e-06, "loss": 0.2161, "step": 5678 }, { "epoch": 2.6406322640632265, "grad_norm": 2.040907859802246, "learning_rate": 3.092332998903433e-06, "loss": 0.2208, "step": 5680 }, { "epoch": 2.6415620641562065, "grad_norm": 1.824689507484436, "learning_rate": 3.054604325971961e-06, "loss": 0.2367, "step": 5682 }, { "epoch": 2.6424918642491866, "grad_norm": 2.200338125228882, "learning_rate": 3.017092245236098e-06, "loss": 0.2584, "step": 5684 }, { "epoch": 2.643421664342166, "grad_norm": 2.0021538734436035, "learning_rate": 2.9797971269249166e-06, "loss": 0.2425, "step": 5686 }, { "epoch": 2.6443514644351467, "grad_norm": 3.021247625350952, "learning_rate": 2.9427193391261996e-06, "loss": 0.3248, "step": 5688 }, { "epoch": 2.6452812645281263, "grad_norm": 2.24408221244812, "learning_rate": 2.905859247782676e-06, "loss": 0.2178, "step": 5690 }, { "epoch": 2.6462110646211063, "grad_norm": 2.744420051574707, "learning_rate": 2.8692172166886337e-06, "loss": 0.2812, "step": 5692 }, { "epoch": 2.6471408647140864, "grad_norm": 2.161972999572754, "learning_rate": 2.8327936074861046e-06, "loss": 0.2727, "step": 5694 }, { "epoch": 2.6480706648070664, "grad_norm": 1.6251959800720215, "learning_rate": 2.7965887796613863e-06, "loss": 0.2301, "step": 5696 }, { "epoch": 2.6490004649000465, "grad_norm": 2.274137020111084, "learning_rate": 2.7606030905415504e-06, "loss": 0.2689, "step": 5698 }, { "epoch": 2.6499302649930265, "grad_norm": 2.8428592681884766, "learning_rate": 2.7248368952908256e-06, "loss": 0.2782, "step": 5700 }, { "epoch": 2.6508600650860066, "grad_norm": 1.7751359939575195, "learning_rate": 2.6892905469070787e-06, "loss": 0.2183, "step": 5702 }, { "epoch": 2.6517898651789866, "grad_norm": 2.8165125846862793, "learning_rate": 2.6539643962184125e-06, "loss": 0.3136, "step": 5704 }, { "epoch": 2.6527196652719667, "grad_norm": 2.5845589637756348, "learning_rate": 2.618858791879717e-06, "loss": 0.2561, "step": 5706 }, { "epoch": 2.6536494653649463, "grad_norm": 1.891725778579712, "learning_rate": 2.583974080369115e-06, "loss": 0.2801, "step": 5708 }, { "epoch": 2.6545792654579268, "grad_norm": 2.138580322265625, "learning_rate": 2.5493106059846234e-06, "loss": 0.2407, "step": 5710 }, { "epoch": 2.6555090655509064, "grad_norm": 3.195814371109009, "learning_rate": 2.5148687108407404e-06, "loss": 0.3075, "step": 5712 }, { "epoch": 2.6564388656438864, "grad_norm": 1.9648475646972656, "learning_rate": 2.4806487348650654e-06, "loss": 0.2385, "step": 5714 }, { "epoch": 2.6573686657368665, "grad_norm": 2.467996120452881, "learning_rate": 2.446651015794929e-06, "loss": 0.245, "step": 5716 }, { "epoch": 2.6582984658298465, "grad_norm": 2.5853805541992188, "learning_rate": 2.4128758891741317e-06, "loss": 0.3089, "step": 5718 }, { "epoch": 2.6592282659228266, "grad_norm": 2.4281556606292725, "learning_rate": 2.3793236883495156e-06, "loss": 0.2371, "step": 5720 }, { "epoch": 2.6601580660158066, "grad_norm": 2.461988925933838, "learning_rate": 2.34599474446778e-06, "loss": 0.2464, "step": 5722 }, { "epoch": 2.6610878661087867, "grad_norm": 1.839308500289917, "learning_rate": 2.312889386472086e-06, "loss": 0.2649, "step": 5724 }, { "epoch": 2.6620176662017667, "grad_norm": 2.486229658126831, "learning_rate": 2.2800079410990072e-06, "loss": 0.2702, "step": 5726 }, { "epoch": 2.6629474662947468, "grad_norm": 2.5522592067718506, "learning_rate": 2.247350732875119e-06, "loss": 0.2556, "step": 5728 }, { "epoch": 2.6638772663877264, "grad_norm": 2.5657968521118164, "learning_rate": 2.2149180841138642e-06, "loss": 0.2704, "step": 5730 }, { "epoch": 2.664807066480707, "grad_norm": 2.1532881259918213, "learning_rate": 2.182710314912447e-06, "loss": 0.2655, "step": 5732 }, { "epoch": 2.6657368665736865, "grad_norm": 2.5707664489746094, "learning_rate": 2.1507277431484882e-06, "loss": 0.2626, "step": 5734 }, { "epoch": 2.6666666666666665, "grad_norm": 2.8557145595550537, "learning_rate": 2.1189706844770798e-06, "loss": 0.2528, "step": 5736 }, { "epoch": 2.6675964667596466, "grad_norm": 2.4022176265716553, "learning_rate": 2.0874394523275537e-06, "loss": 0.2393, "step": 5738 }, { "epoch": 2.6685262668526266, "grad_norm": 2.605020046234131, "learning_rate": 2.0561343579004753e-06, "loss": 0.2411, "step": 5740 }, { "epoch": 2.6694560669456067, "grad_norm": 2.7672784328460693, "learning_rate": 2.025055710164473e-06, "loss": 0.3405, "step": 5742 }, { "epoch": 2.6703858670385867, "grad_norm": 2.773200511932373, "learning_rate": 1.994203815853249e-06, "loss": 0.3183, "step": 5744 }, { "epoch": 2.6713156671315668, "grad_norm": 1.9785062074661255, "learning_rate": 1.9635789794625492e-06, "loss": 0.2345, "step": 5746 }, { "epoch": 2.672245467224547, "grad_norm": 2.1168649196624756, "learning_rate": 1.933181503247142e-06, "loss": 0.2053, "step": 5748 }, { "epoch": 2.673175267317527, "grad_norm": 1.835533857345581, "learning_rate": 1.903011687217846e-06, "loss": 0.2453, "step": 5750 }, { "epoch": 2.6741050674105065, "grad_norm": 2.3920836448669434, "learning_rate": 1.8730698291385467e-06, "loss": 0.384, "step": 5752 }, { "epoch": 2.675034867503487, "grad_norm": 2.3001835346221924, "learning_rate": 1.843356224523352e-06, "loss": 0.2649, "step": 5754 }, { "epoch": 2.6759646675964666, "grad_norm": 2.7392873764038086, "learning_rate": 1.8138711666334857e-06, "loss": 0.2784, "step": 5756 }, { "epoch": 2.6768944676894466, "grad_norm": 2.259678602218628, "learning_rate": 1.7846149464745725e-06, "loss": 0.3029, "step": 5758 }, { "epoch": 2.6778242677824267, "grad_norm": 3.046842098236084, "learning_rate": 1.7555878527937196e-06, "loss": 0.2762, "step": 5760 }, { "epoch": 2.6787540678754067, "grad_norm": 2.4253828525543213, "learning_rate": 1.7267901720766146e-06, "loss": 0.1794, "step": 5762 }, { "epoch": 2.6796838679683868, "grad_norm": 2.562119960784912, "learning_rate": 1.6982221885447181e-06, "loss": 0.2983, "step": 5764 }, { "epoch": 2.680613668061367, "grad_norm": 2.5204615592956543, "learning_rate": 1.6698841841525455e-06, "loss": 0.2767, "step": 5766 }, { "epoch": 2.681543468154347, "grad_norm": 3.1123056411743164, "learning_rate": 1.6417764385847106e-06, "loss": 0.3063, "step": 5768 }, { "epoch": 2.682473268247327, "grad_norm": 2.463193893432617, "learning_rate": 1.6138992292533323e-06, "loss": 0.2571, "step": 5770 }, { "epoch": 2.683403068340307, "grad_norm": 2.3664746284484863, "learning_rate": 1.5862528312951927e-06, "loss": 0.2677, "step": 5772 }, { "epoch": 2.684332868433287, "grad_norm": 2.525906562805176, "learning_rate": 1.5588375175691113e-06, "loss": 0.2379, "step": 5774 }, { "epoch": 2.685262668526267, "grad_norm": 3.082557201385498, "learning_rate": 1.5316535586531674e-06, "loss": 0.2677, "step": 5776 }, { "epoch": 2.6861924686192467, "grad_norm": 2.1303412914276123, "learning_rate": 1.504701222842014e-06, "loss": 0.2791, "step": 5778 }, { "epoch": 2.687122268712227, "grad_norm": 2.5601460933685303, "learning_rate": 1.477980776144369e-06, "loss": 0.2815, "step": 5780 }, { "epoch": 2.6880520688052068, "grad_norm": 2.722132682800293, "learning_rate": 1.451492482280242e-06, "loss": 0.2714, "step": 5782 }, { "epoch": 2.688981868898187, "grad_norm": 2.3074138164520264, "learning_rate": 1.4252366026783977e-06, "loss": 0.2093, "step": 5784 }, { "epoch": 2.689911668991167, "grad_norm": 2.1408069133758545, "learning_rate": 1.3992133964737525e-06, "loss": 0.2305, "step": 5786 }, { "epoch": 2.690841469084147, "grad_norm": 2.3266732692718506, "learning_rate": 1.373423120504893e-06, "loss": 0.2507, "step": 5788 }, { "epoch": 2.691771269177127, "grad_norm": 3.3181345462799072, "learning_rate": 1.3478660293113807e-06, "loss": 0.3148, "step": 5790 }, { "epoch": 2.692701069270107, "grad_norm": 2.660740375518799, "learning_rate": 1.3225423751313934e-06, "loss": 0.2438, "step": 5792 }, { "epoch": 2.693630869363087, "grad_norm": 2.5704398155212402, "learning_rate": 1.2974524078992014e-06, "loss": 0.2132, "step": 5794 }, { "epoch": 2.694560669456067, "grad_norm": 2.573281764984131, "learning_rate": 1.2725963752426398e-06, "loss": 0.2647, "step": 5796 }, { "epoch": 2.695490469549047, "grad_norm": 3.2780816555023193, "learning_rate": 1.2479745224807262e-06, "loss": 0.3357, "step": 5798 }, { "epoch": 2.6964202696420267, "grad_norm": 2.3947157859802246, "learning_rate": 1.223587092621169e-06, "loss": 0.2117, "step": 5800 }, { "epoch": 2.6973500697350072, "grad_norm": 2.0122246742248535, "learning_rate": 1.1994343263580943e-06, "loss": 0.2029, "step": 5802 }, { "epoch": 2.698279869827987, "grad_norm": 2.8697314262390137, "learning_rate": 1.1755164620695387e-06, "loss": 0.3463, "step": 5804 }, { "epoch": 2.699209669920967, "grad_norm": 2.2591471672058105, "learning_rate": 1.1518337358151597e-06, "loss": 0.2466, "step": 5806 }, { "epoch": 2.700139470013947, "grad_norm": 2.183201313018799, "learning_rate": 1.1283863813339249e-06, "loss": 0.2445, "step": 5808 }, { "epoch": 2.701069270106927, "grad_norm": 2.7525737285614014, "learning_rate": 1.1051746300417595e-06, "loss": 0.2693, "step": 5810 }, { "epoch": 2.701999070199907, "grad_norm": 1.9480472803115845, "learning_rate": 1.0821987110292405e-06, "loss": 0.2577, "step": 5812 }, { "epoch": 2.702928870292887, "grad_norm": 2.313754081726074, "learning_rate": 1.0594588510594485e-06, "loss": 0.2263, "step": 5814 }, { "epoch": 2.703858670385867, "grad_norm": 2.220707654953003, "learning_rate": 1.036955274565608e-06, "loss": 0.2439, "step": 5816 }, { "epoch": 2.704788470478847, "grad_norm": 2.757838249206543, "learning_rate": 1.0146882036489345e-06, "loss": 0.2043, "step": 5818 }, { "epoch": 2.7057182705718272, "grad_norm": 2.6037518978118896, "learning_rate": 9.926578580764297e-07, "loss": 0.2569, "step": 5820 }, { "epoch": 2.706648070664807, "grad_norm": 2.5094051361083984, "learning_rate": 9.708644552787118e-07, "loss": 0.2859, "step": 5822 }, { "epoch": 2.7075778707577873, "grad_norm": 2.166354179382324, "learning_rate": 9.493082103478635e-07, "loss": 0.2413, "step": 5824 }, { "epoch": 2.708507670850767, "grad_norm": 2.5567753314971924, "learning_rate": 9.279893360353097e-07, "loss": 0.247, "step": 5826 }, { "epoch": 2.709437470943747, "grad_norm": 2.3945343494415283, "learning_rate": 9.069080427497578e-07, "loss": 0.2871, "step": 5828 }, { "epoch": 2.710367271036727, "grad_norm": 2.004552125930786, "learning_rate": 8.860645385550486e-07, "loss": 0.2242, "step": 5830 }, { "epoch": 2.711297071129707, "grad_norm": 2.7234294414520264, "learning_rate": 8.654590291681673e-07, "loss": 0.3514, "step": 5832 }, { "epoch": 2.712226871222687, "grad_norm": 2.335017204284668, "learning_rate": 8.450917179571363e-07, "loss": 0.2861, "step": 5834 }, { "epoch": 2.713156671315667, "grad_norm": 3.2143702507019043, "learning_rate": 8.249628059391281e-07, "loss": 0.2982, "step": 5836 }, { "epoch": 2.7140864714086472, "grad_norm": 2.176330804824829, "learning_rate": 8.050724917783693e-07, "loss": 0.2233, "step": 5838 }, { "epoch": 2.7150162715016273, "grad_norm": 2.525729179382324, "learning_rate": 7.854209717842177e-07, "loss": 0.2762, "step": 5840 }, { "epoch": 2.7159460715946073, "grad_norm": 1.6723090410232544, "learning_rate": 7.660084399092771e-07, "loss": 0.2519, "step": 5842 }, { "epoch": 2.716875871687587, "grad_norm": 2.8618810176849365, "learning_rate": 7.468350877473634e-07, "loss": 0.2191, "step": 5844 }, { "epoch": 2.7178056717805674, "grad_norm": 2.6042068004608154, "learning_rate": 7.279011045317363e-07, "loss": 0.2442, "step": 5846 }, { "epoch": 2.718735471873547, "grad_norm": 2.2480666637420654, "learning_rate": 7.092066771331534e-07, "loss": 0.2724, "step": 5848 }, { "epoch": 2.719665271966527, "grad_norm": 2.2185983657836914, "learning_rate": 6.907519900580858e-07, "loss": 0.229, "step": 5850 }, { "epoch": 2.720595072059507, "grad_norm": 3.202702760696411, "learning_rate": 6.725372254468396e-07, "loss": 0.347, "step": 5852 }, { "epoch": 2.721524872152487, "grad_norm": 1.853745460510254, "learning_rate": 6.545625630717807e-07, "loss": 0.168, "step": 5854 }, { "epoch": 2.7224546722454672, "grad_norm": 1.7197445631027222, "learning_rate": 6.36828180335577e-07, "loss": 0.1566, "step": 5856 }, { "epoch": 2.7233844723384473, "grad_norm": 2.013608932495117, "learning_rate": 6.193342522694207e-07, "loss": 0.2016, "step": 5858 }, { "epoch": 2.7243142724314273, "grad_norm": 2.1562423706054688, "learning_rate": 6.020809515313137e-07, "loss": 0.231, "step": 5860 }, { "epoch": 2.7252440725244074, "grad_norm": 2.693873643875122, "learning_rate": 5.850684484043851e-07, "loss": 0.3065, "step": 5862 }, { "epoch": 2.7261738726173874, "grad_norm": 1.942315936088562, "learning_rate": 5.682969107951736e-07, "loss": 0.2572, "step": 5864 }, { "epoch": 2.727103672710367, "grad_norm": 2.6825053691864014, "learning_rate": 5.517665042319647e-07, "loss": 0.2516, "step": 5866 }, { "epoch": 2.7280334728033475, "grad_norm": 1.9874346256256104, "learning_rate": 5.354773918632005e-07, "loss": 0.2566, "step": 5868 }, { "epoch": 2.728963272896327, "grad_norm": 1.722001075744629, "learning_rate": 5.194297344558584e-07, "loss": 0.2366, "step": 5870 }, { "epoch": 2.729893072989307, "grad_norm": 2.5765628814697266, "learning_rate": 5.036236903938306e-07, "loss": 0.2262, "step": 5872 }, { "epoch": 2.730822873082287, "grad_norm": 1.8732450008392334, "learning_rate": 4.880594156763861e-07, "loss": 0.2565, "step": 5874 }, { "epoch": 2.7317526731752673, "grad_norm": 2.6180989742279053, "learning_rate": 4.7273706391665257e-07, "loss": 0.323, "step": 5876 }, { "epoch": 2.7326824732682473, "grad_norm": 2.208806276321411, "learning_rate": 4.576567863400346e-07, "loss": 0.2553, "step": 5878 }, { "epoch": 2.7336122733612274, "grad_norm": 2.043833017349243, "learning_rate": 4.428187317827894e-07, "loss": 0.2193, "step": 5880 }, { "epoch": 2.7345420734542074, "grad_norm": 2.447105884552002, "learning_rate": 4.2822304669051967e-07, "loss": 0.3011, "step": 5882 }, { "epoch": 2.7354718735471875, "grad_norm": 2.7307798862457275, "learning_rate": 4.138698751167586e-07, "loss": 0.2721, "step": 5884 }, { "epoch": 2.7364016736401675, "grad_norm": 2.5852560997009277, "learning_rate": 3.997593587215148e-07, "loss": 0.299, "step": 5886 }, { "epoch": 2.737331473733147, "grad_norm": 2.8663344383239746, "learning_rate": 3.858916367698684e-07, "loss": 0.3212, "step": 5888 }, { "epoch": 2.7382612738261276, "grad_norm": 2.000790596008301, "learning_rate": 3.722668461306549e-07, "loss": 0.2312, "step": 5890 }, { "epoch": 2.739191073919107, "grad_norm": 2.583371639251709, "learning_rate": 3.5888512127505313e-07, "loss": 0.3011, "step": 5892 }, { "epoch": 2.7401208740120873, "grad_norm": 2.314758062362671, "learning_rate": 3.457465942752828e-07, "loss": 0.2457, "step": 5894 }, { "epoch": 2.7410506741050673, "grad_norm": 2.7891530990600586, "learning_rate": 3.3285139480329497e-07, "loss": 0.2539, "step": 5896 }, { "epoch": 2.7419804741980474, "grad_norm": 2.187875747680664, "learning_rate": 3.2019965012952544e-07, "loss": 0.2891, "step": 5898 }, { "epoch": 2.7429102742910274, "grad_norm": 2.415600061416626, "learning_rate": 3.0779148512156264e-07, "loss": 0.3204, "step": 5900 }, { "epoch": 2.7438400743840075, "grad_norm": 2.339444398880005, "learning_rate": 2.956270222429876e-07, "loss": 0.2335, "step": 5902 }, { "epoch": 2.7447698744769875, "grad_norm": 2.976139545440674, "learning_rate": 2.837063815521497e-07, "loss": 0.264, "step": 5904 }, { "epoch": 2.7456996745699676, "grad_norm": 2.3639776706695557, "learning_rate": 2.7202968070095654e-07, "loss": 0.2471, "step": 5906 }, { "epoch": 2.7466294746629476, "grad_norm": 1.928092360496521, "learning_rate": 2.6059703493373337e-07, "loss": 0.1749, "step": 5908 }, { "epoch": 2.747559274755927, "grad_norm": 2.5446362495422363, "learning_rate": 2.494085570860627e-07, "loss": 0.3047, "step": 5910 }, { "epoch": 2.7484890748489077, "grad_norm": 2.7620794773101807, "learning_rate": 2.3846435758372413e-07, "loss": 0.334, "step": 5912 }, { "epoch": 2.7494188749418873, "grad_norm": 2.1332836151123047, "learning_rate": 2.2776454444153704e-07, "loss": 0.2092, "step": 5914 }, { "epoch": 2.7503486750348674, "grad_norm": 2.5818490982055664, "learning_rate": 2.1730922326233343e-07, "loss": 0.2298, "step": 5916 }, { "epoch": 2.7512784751278474, "grad_norm": 2.689347505569458, "learning_rate": 2.0709849723592832e-07, "loss": 0.2747, "step": 5918 }, { "epoch": 2.7522082752208274, "grad_norm": 2.721565008163452, "learning_rate": 1.9713246713805948e-07, "loss": 0.294, "step": 5920 }, { "epoch": 2.7531380753138075, "grad_norm": 2.3525829315185547, "learning_rate": 1.8741123132940765e-07, "loss": 0.2618, "step": 5922 }, { "epoch": 2.7540678754067875, "grad_norm": 2.824775218963623, "learning_rate": 1.779348857546583e-07, "loss": 0.3073, "step": 5924 }, { "epoch": 2.7549976754997676, "grad_norm": 2.6478946208953857, "learning_rate": 1.687035239415165e-07, "loss": 0.2654, "step": 5926 }, { "epoch": 2.7559274755927476, "grad_norm": 3.131631374359131, "learning_rate": 1.597172369997908e-07, "loss": 0.3565, "step": 5928 }, { "epoch": 2.7568572756857277, "grad_norm": 2.0935070514678955, "learning_rate": 1.5097611362051077e-07, "loss": 0.2997, "step": 5930 }, { "epoch": 2.7577870757787077, "grad_norm": 2.8828423023223877, "learning_rate": 1.4248024007503032e-07, "loss": 0.3162, "step": 5932 }, { "epoch": 2.758716875871688, "grad_norm": 2.815387487411499, "learning_rate": 1.3422970021419515e-07, "loss": 0.2938, "step": 5934 }, { "epoch": 2.7596466759646674, "grad_norm": 2.5559678077697754, "learning_rate": 1.2622457546749622e-07, "loss": 0.2705, "step": 5936 }, { "epoch": 2.7605764760576474, "grad_norm": 2.0676045417785645, "learning_rate": 1.1846494484229252e-07, "loss": 0.2341, "step": 5938 }, { "epoch": 2.7615062761506275, "grad_norm": 1.782360315322876, "learning_rate": 1.109508849230006e-07, "loss": 0.1837, "step": 5940 }, { "epoch": 2.7624360762436075, "grad_norm": 3.1904067993164062, "learning_rate": 1.0368246987036192e-07, "loss": 0.2646, "step": 5942 }, { "epoch": 2.7633658763365876, "grad_norm": 2.1982874870300293, "learning_rate": 9.66597714206906e-08, "loss": 0.2025, "step": 5944 }, { "epoch": 2.7642956764295676, "grad_norm": 2.8510584831237793, "learning_rate": 8.988285888519062e-08, "loss": 0.2241, "step": 5946 }, { "epoch": 2.7652254765225477, "grad_norm": 2.1245362758636475, "learning_rate": 8.335179914925367e-08, "loss": 0.1805, "step": 5948 }, { "epoch": 2.7661552766155277, "grad_norm": 2.450500011444092, "learning_rate": 7.706665667179849e-08, "loss": 0.2609, "step": 5950 }, { "epoch": 2.767085076708508, "grad_norm": 2.5859549045562744, "learning_rate": 7.102749348465476e-08, "loss": 0.2783, "step": 5952 }, { "epoch": 2.768014876801488, "grad_norm": 2.557736873626709, "learning_rate": 6.523436919191081e-08, "loss": 0.2877, "step": 5954 }, { "epoch": 2.768944676894468, "grad_norm": 2.2622692584991455, "learning_rate": 5.968734096936962e-08, "loss": 0.2943, "step": 5956 }, { "epoch": 2.7698744769874475, "grad_norm": 1.8883389234542847, "learning_rate": 5.438646356396319e-08, "loss": 0.3016, "step": 5958 }, { "epoch": 2.770804277080428, "grad_norm": 2.137006998062134, "learning_rate": 4.9331789293211264e-08, "loss": 0.1674, "step": 5960 }, { "epoch": 2.7717340771734076, "grad_norm": 2.0656561851501465, "learning_rate": 4.452336804470513e-08, "loss": 0.2599, "step": 5962 }, { "epoch": 2.7726638772663876, "grad_norm": 2.5421957969665527, "learning_rate": 3.996124727562464e-08, "loss": 0.2308, "step": 5964 }, { "epoch": 2.7735936773593677, "grad_norm": 2.9036285877227783, "learning_rate": 3.5645472012258047e-08, "loss": 0.351, "step": 5966 }, { "epoch": 2.7745234774523477, "grad_norm": 1.9908108711242676, "learning_rate": 3.157608484956347e-08, "loss": 0.264, "step": 5968 }, { "epoch": 2.775453277545328, "grad_norm": 2.134727716445923, "learning_rate": 2.7753125950749773e-08, "loss": 0.2483, "step": 5970 }, { "epoch": 2.776383077638308, "grad_norm": 1.9661192893981934, "learning_rate": 2.417663304688245e-08, "loss": 0.2563, "step": 5972 }, { "epoch": 2.777312877731288, "grad_norm": 2.6185855865478516, "learning_rate": 2.0846641436497825e-08, "loss": 0.3119, "step": 5974 }, { "epoch": 2.778242677824268, "grad_norm": 2.5611560344696045, "learning_rate": 1.7763183985269965e-08, "loss": 0.3137, "step": 5976 }, { "epoch": 2.779172477917248, "grad_norm": 3.0630550384521484, "learning_rate": 1.492629112567763e-08, "loss": 0.3281, "step": 5978 }, { "epoch": 2.7801022780102276, "grad_norm": 1.9918605089187622, "learning_rate": 1.233599085671006e-08, "loss": 0.2306, "step": 5980 }, { "epoch": 2.781032078103208, "grad_norm": 3.0767102241516113, "learning_rate": 9.992308743586635e-09, "loss": 0.256, "step": 5982 }, { "epoch": 2.7819618781961877, "grad_norm": 2.264526844024658, "learning_rate": 7.895267917498764e-09, "loss": 0.2342, "step": 5984 }, { "epoch": 2.7828916782891677, "grad_norm": 2.881218433380127, "learning_rate": 6.0448890753989355e-09, "loss": 0.258, "step": 5986 }, { "epoch": 2.7838214783821478, "grad_norm": 3.21379017829895, "learning_rate": 4.441190479775891e-09, "loss": 0.2551, "step": 5988 }, { "epoch": 2.784751278475128, "grad_norm": 2.6587181091308594, "learning_rate": 3.0841879584853218e-09, "loss": 0.2419, "step": 5990 }, { "epoch": 2.785681078568108, "grad_norm": 1.7976186275482178, "learning_rate": 1.973894904597216e-09, "loss": 0.2059, "step": 5992 }, { "epoch": 2.786610878661088, "grad_norm": 2.2649178504943848, "learning_rate": 1.1103222762542993e-09, "loss": 0.2372, "step": 5994 }, { "epoch": 2.787540678754068, "grad_norm": 2.4511754512786865, "learning_rate": 4.93478596572119e-10, "loss": 0.2997, "step": 5996 }, { "epoch": 2.788470478847048, "grad_norm": 2.506096124649048, "learning_rate": 1.2336995354467253e-10, "loss": 0.2399, "step": 5998 }, { "epoch": 2.789400278940028, "grad_norm": 2.624849796295166, "learning_rate": 0.0, "loss": 0.2185, "step": 6000 }, { "epoch": 2.789400278940028, "eval_cer": 0.2352008724764127, "eval_loss": 0.3561025559902191, "eval_runtime": 396.8107, "eval_samples_per_second": 31.99, "eval_steps_per_second": 1.0, "step": 6000 }, { "epoch": 2.7903300790330077, "grad_norm": 2.5567548274993896, "learning_rate": 1.2336995354467197e-10, "loss": 0.1794, "step": 6002 }, { "epoch": 2.791259879125988, "grad_norm": 2.4823946952819824, "learning_rate": 4.934785965721167e-10, "loss": 0.2747, "step": 6004 }, { "epoch": 2.7921896792189678, "grad_norm": 2.448347568511963, "learning_rate": 1.1103222762542941e-09, "loss": 0.2345, "step": 6006 }, { "epoch": 2.793119479311948, "grad_norm": 2.3113925457000732, "learning_rate": 1.973894904597207e-09, "loss": 0.3012, "step": 6008 }, { "epoch": 2.794049279404928, "grad_norm": 2.466228723526001, "learning_rate": 3.0841879584853073e-09, "loss": 0.2654, "step": 6010 }, { "epoch": 2.794979079497908, "grad_norm": 3.04380202293396, "learning_rate": 4.441190479775869e-09, "loss": 0.3221, "step": 6012 }, { "epoch": 2.795908879590888, "grad_norm": 2.7768571376800537, "learning_rate": 6.0448890753989065e-09, "loss": 0.2513, "step": 6014 }, { "epoch": 2.796838679683868, "grad_norm": 2.4154765605926514, "learning_rate": 7.895267917501502e-09, "loss": 0.3139, "step": 6016 }, { "epoch": 2.797768479776848, "grad_norm": 2.285459041595459, "learning_rate": 9.992308743586585e-09, "loss": 0.352, "step": 6018 }, { "epoch": 2.798698279869828, "grad_norm": 2.319125175476074, "learning_rate": 1.2335990856709996e-08, "loss": 0.2476, "step": 6020 }, { "epoch": 2.799628079962808, "grad_norm": 2.5459704399108887, "learning_rate": 1.4926291125677555e-08, "loss": 0.2915, "step": 6022 }, { "epoch": 2.8005578800557878, "grad_norm": 2.0710291862487793, "learning_rate": 1.7763183985267097e-08, "loss": 0.2451, "step": 6024 }, { "epoch": 2.8014876801487683, "grad_norm": 2.8444535732269287, "learning_rate": 2.084664143649772e-08, "loss": 0.3328, "step": 6026 }, { "epoch": 2.802417480241748, "grad_norm": 2.374697685241699, "learning_rate": 2.4176633046882325e-08, "loss": 0.2274, "step": 6028 }, { "epoch": 2.803347280334728, "grad_norm": 2.4119269847869873, "learning_rate": 2.775312595074963e-08, "loss": 0.2333, "step": 6030 }, { "epoch": 2.804277080427708, "grad_norm": 2.247133493423462, "learning_rate": 3.157608484956331e-08, "loss": 0.3224, "step": 6032 }, { "epoch": 2.805206880520688, "grad_norm": 2.7475342750549316, "learning_rate": 3.5645472012255095e-08, "loss": 0.2824, "step": 6034 }, { "epoch": 2.806136680613668, "grad_norm": 2.1219069957733154, "learning_rate": 3.996124727562445e-08, "loss": 0.2666, "step": 6036 }, { "epoch": 2.807066480706648, "grad_norm": 2.4888384342193604, "learning_rate": 4.4523368044704916e-08, "loss": 0.1897, "step": 6038 }, { "epoch": 2.807996280799628, "grad_norm": 2.6586015224456787, "learning_rate": 4.933178929321103e-08, "loss": 0.2425, "step": 6040 }, { "epoch": 2.808926080892608, "grad_norm": 2.172358512878418, "learning_rate": 5.4386463563962934e-08, "loss": 0.2373, "step": 6042 }, { "epoch": 2.8098558809855883, "grad_norm": 2.541267156600952, "learning_rate": 5.968734096936658e-08, "loss": 0.2189, "step": 6044 }, { "epoch": 2.810785681078568, "grad_norm": 3.4163122177124023, "learning_rate": 6.523436919191051e-08, "loss": 0.3743, "step": 6046 }, { "epoch": 2.8117154811715483, "grad_norm": 2.0647010803222656, "learning_rate": 7.102749348465167e-08, "loss": 0.2509, "step": 6048 }, { "epoch": 2.812645281264528, "grad_norm": 2.6896021366119385, "learning_rate": 7.706665667180092e-08, "loss": 0.301, "step": 6050 }, { "epoch": 2.813575081357508, "grad_norm": 3.175452947616577, "learning_rate": 8.335179914924775e-08, "loss": 0.2918, "step": 6052 }, { "epoch": 2.814504881450488, "grad_norm": 2.4100139141082764, "learning_rate": 8.988285888519022e-08, "loss": 0.252, "step": 6054 }, { "epoch": 2.815434681543468, "grad_norm": 2.3901336193084717, "learning_rate": 9.665977142068738e-08, "loss": 0.2187, "step": 6056 }, { "epoch": 2.816364481636448, "grad_norm": 2.645719051361084, "learning_rate": 1.0368246987035589e-07, "loss": 0.2641, "step": 6058 }, { "epoch": 2.817294281729428, "grad_norm": 2.0789480209350586, "learning_rate": 1.1095088492300287e-07, "loss": 0.2546, "step": 6060 }, { "epoch": 2.8182240818224082, "grad_norm": 2.601468324661255, "learning_rate": 1.184649448422892e-07, "loss": 0.2944, "step": 6062 }, { "epoch": 2.8191538819153883, "grad_norm": 2.0951900482177734, "learning_rate": 1.262245754674929e-07, "loss": 0.1779, "step": 6064 }, { "epoch": 2.8200836820083683, "grad_norm": 2.4619717597961426, "learning_rate": 1.342297002141918e-07, "loss": 0.273, "step": 6066 }, { "epoch": 2.821013482101348, "grad_norm": 2.926683187484741, "learning_rate": 1.4248024007502694e-07, "loss": 0.2634, "step": 6068 }, { "epoch": 2.8219432821943284, "grad_norm": 2.429720163345337, "learning_rate": 1.5097611362050738e-07, "loss": 0.2099, "step": 6070 }, { "epoch": 2.822873082287308, "grad_norm": 2.1771304607391357, "learning_rate": 1.597172369997902e-07, "loss": 0.2155, "step": 6072 }, { "epoch": 2.823802882380288, "grad_norm": 3.37185001373291, "learning_rate": 1.6870352394151308e-07, "loss": 0.3093, "step": 6074 }, { "epoch": 2.824732682473268, "grad_norm": 2.2367725372314453, "learning_rate": 1.7793488575465764e-07, "loss": 0.2861, "step": 6076 }, { "epoch": 2.825662482566248, "grad_norm": 2.5550003051757812, "learning_rate": 1.874112313294042e-07, "loss": 0.2124, "step": 6078 }, { "epoch": 2.8265922826592282, "grad_norm": 2.7120437622070312, "learning_rate": 1.9713246713805604e-07, "loss": 0.2276, "step": 6080 }, { "epoch": 2.8275220827522083, "grad_norm": 1.9350709915161133, "learning_rate": 2.070984972359304e-07, "loss": 0.1907, "step": 6082 }, { "epoch": 2.8284518828451883, "grad_norm": 2.8723201751708984, "learning_rate": 2.1730922326232993e-07, "loss": 0.3224, "step": 6084 }, { "epoch": 2.8293816829381684, "grad_norm": 1.9876766204833984, "learning_rate": 2.2776454444152793e-07, "loss": 0.2217, "step": 6086 }, { "epoch": 2.8303114830311484, "grad_norm": 1.6045869588851929, "learning_rate": 2.3846435758372053e-07, "loss": 0.2348, "step": 6088 }, { "epoch": 2.831241283124128, "grad_norm": 2.8099205493927, "learning_rate": 2.4940855708605904e-07, "loss": 0.3237, "step": 6090 }, { "epoch": 2.8321710832171085, "grad_norm": 2.0407848358154297, "learning_rate": 2.6059703493372415e-07, "loss": 0.2063, "step": 6092 }, { "epoch": 2.833100883310088, "grad_norm": 2.5670576095581055, "learning_rate": 2.7202968070095845e-07, "loss": 0.2461, "step": 6094 }, { "epoch": 2.834030683403068, "grad_norm": 2.1407699584960938, "learning_rate": 2.83706381552146e-07, "loss": 0.2938, "step": 6096 }, { "epoch": 2.8349604834960482, "grad_norm": 3.3097147941589355, "learning_rate": 2.956270222429838e-07, "loss": 0.2359, "step": 6098 }, { "epoch": 2.8358902835890283, "grad_norm": 1.937959909439087, "learning_rate": 3.077914851215587e-07, "loss": 0.18, "step": 6100 }, { "epoch": 2.8368200836820083, "grad_norm": 2.4911603927612305, "learning_rate": 3.201996501295215e-07, "loss": 0.25, "step": 6102 }, { "epoch": 2.8377498837749884, "grad_norm": 2.5415139198303223, "learning_rate": 3.3285139480329936e-07, "loss": 0.3, "step": 6104 }, { "epoch": 2.8386796838679684, "grad_norm": 2.1749463081359863, "learning_rate": 3.4574659427527053e-07, "loss": 0.2392, "step": 6106 }, { "epoch": 2.8396094839609485, "grad_norm": 2.3261797428131104, "learning_rate": 3.588851212750491e-07, "loss": 0.291, "step": 6108 }, { "epoch": 2.8405392840539285, "grad_norm": 1.9706990718841553, "learning_rate": 3.722668461306509e-07, "loss": 0.2316, "step": 6110 }, { "epoch": 2.8414690841469086, "grad_norm": 2.282243013381958, "learning_rate": 3.8589163676986425e-07, "loss": 0.2341, "step": 6112 }, { "epoch": 2.8423988842398886, "grad_norm": 2.46293044090271, "learning_rate": 3.997593587215107e-07, "loss": 0.2432, "step": 6114 }, { "epoch": 2.8433286843328682, "grad_norm": 1.9051731824874878, "learning_rate": 4.138698751167628e-07, "loss": 0.1878, "step": 6116 }, { "epoch": 2.8442584844258483, "grad_norm": 2.230358362197876, "learning_rate": 4.282230466905155e-07, "loss": 0.1977, "step": 6118 }, { "epoch": 2.8451882845188283, "grad_norm": 2.243295669555664, "learning_rate": 4.428187317827768e-07, "loss": 0.2856, "step": 6120 }, { "epoch": 2.8461180846118084, "grad_norm": 2.38871169090271, "learning_rate": 4.576567863400303e-07, "loss": 0.2776, "step": 6122 }, { "epoch": 2.8470478847047884, "grad_norm": 2.5764153003692627, "learning_rate": 4.727370639166481e-07, "loss": 0.2497, "step": 6124 }, { "epoch": 2.8479776847977685, "grad_norm": 2.730886697769165, "learning_rate": 4.8805941567639e-07, "loss": 0.2902, "step": 6126 }, { "epoch": 2.8489074848907485, "grad_norm": 2.3094592094421387, "learning_rate": 5.036236903938262e-07, "loss": 0.3462, "step": 6128 }, { "epoch": 2.8498372849837286, "grad_norm": 1.4017462730407715, "learning_rate": 5.19429734455854e-07, "loss": 0.1919, "step": 6130 }, { "epoch": 2.8507670850767086, "grad_norm": 2.7176129817962646, "learning_rate": 5.354773918631933e-07, "loss": 0.3064, "step": 6132 }, { "epoch": 2.8516968851696887, "grad_norm": 2.358203411102295, "learning_rate": 5.517665042319491e-07, "loss": 0.2995, "step": 6134 }, { "epoch": 2.8526266852626687, "grad_norm": 2.3600680828094482, "learning_rate": 5.682969107951662e-07, "loss": 0.2618, "step": 6136 }, { "epoch": 2.8535564853556483, "grad_norm": 3.3320369720458984, "learning_rate": 5.850684484043888e-07, "loss": 0.2421, "step": 6138 }, { "epoch": 2.854486285448629, "grad_norm": 2.250293731689453, "learning_rate": 6.020809515313064e-07, "loss": 0.2407, "step": 6140 }, { "epoch": 2.8554160855416084, "grad_norm": 2.1021084785461426, "learning_rate": 6.193342522694162e-07, "loss": 0.1776, "step": 6142 }, { "epoch": 2.8563458856345885, "grad_norm": 2.0238537788391113, "learning_rate": 6.368281803355696e-07, "loss": 0.3217, "step": 6144 }, { "epoch": 2.8572756857275685, "grad_norm": 1.8029626607894897, "learning_rate": 6.54562563071776e-07, "loss": 0.2647, "step": 6146 }, { "epoch": 2.8582054858205486, "grad_norm": 2.741246461868286, "learning_rate": 6.725372254468431e-07, "loss": 0.2154, "step": 6148 }, { "epoch": 2.8591352859135286, "grad_norm": 1.9377201795578003, "learning_rate": 6.907519900580812e-07, "loss": 0.2333, "step": 6150 }, { "epoch": 2.8600650860065087, "grad_norm": 2.228529453277588, "learning_rate": 7.092066771331457e-07, "loss": 0.2293, "step": 6152 }, { "epoch": 2.8609948860994887, "grad_norm": 2.590019702911377, "learning_rate": 7.279011045317174e-07, "loss": 0.23, "step": 6154 }, { "epoch": 2.8619246861924688, "grad_norm": 1.9443414211273193, "learning_rate": 7.468350877473557e-07, "loss": 0.1915, "step": 6156 }, { "epoch": 2.862854486285449, "grad_norm": 2.006225109100342, "learning_rate": 7.660084399092695e-07, "loss": 0.1536, "step": 6158 }, { "epoch": 2.8637842863784284, "grad_norm": 2.2362451553344727, "learning_rate": 7.85420971784224e-07, "loss": 0.3061, "step": 6160 }, { "epoch": 2.864714086471409, "grad_norm": 2.3099842071533203, "learning_rate": 8.050724917783617e-07, "loss": 0.2529, "step": 6162 }, { "epoch": 2.8656438865643885, "grad_norm": 2.5542967319488525, "learning_rate": 8.249628059391232e-07, "loss": 0.2543, "step": 6164 }, { "epoch": 2.8665736866573686, "grad_norm": 2.3077824115753174, "learning_rate": 8.450917179571287e-07, "loss": 0.2228, "step": 6166 }, { "epoch": 2.8675034867503486, "grad_norm": 3.2804768085479736, "learning_rate": 8.654590291681485e-07, "loss": 0.3396, "step": 6168 }, { "epoch": 2.8684332868433287, "grad_norm": 2.2010343074798584, "learning_rate": 8.860645385550547e-07, "loss": 0.3168, "step": 6170 }, { "epoch": 2.8693630869363087, "grad_norm": 2.137640953063965, "learning_rate": 9.069080427497499e-07, "loss": 0.2043, "step": 6172 }, { "epoch": 2.8702928870292888, "grad_norm": 2.97548770904541, "learning_rate": 9.27989336035302e-07, "loss": 0.2668, "step": 6174 }, { "epoch": 2.871222687122269, "grad_norm": 2.3159265518188477, "learning_rate": 9.493082103478557e-07, "loss": 0.3084, "step": 6176 }, { "epoch": 2.872152487215249, "grad_norm": 2.1833555698394775, "learning_rate": 9.70864455278704e-07, "loss": 0.2239, "step": 6178 }, { "epoch": 2.873082287308229, "grad_norm": 3.0379202365875244, "learning_rate": 9.92657858076422e-07, "loss": 0.2454, "step": 6180 }, { "epoch": 2.8740120874012085, "grad_norm": 2.2621288299560547, "learning_rate": 1.0146882036489264e-06, "loss": 0.2558, "step": 6182 }, { "epoch": 2.874941887494189, "grad_norm": 1.7559374570846558, "learning_rate": 1.0369552745655998e-06, "loss": 0.1393, "step": 6184 }, { "epoch": 2.8758716875871686, "grad_norm": 2.442074775695801, "learning_rate": 1.0594588510594402e-06, "loss": 0.2527, "step": 6186 }, { "epoch": 2.8768014876801487, "grad_norm": 2.078319549560547, "learning_rate": 1.0821987110292325e-06, "loss": 0.2075, "step": 6188 }, { "epoch": 2.8777312877731287, "grad_norm": 2.0907082557678223, "learning_rate": 1.1051746300417542e-06, "loss": 0.2414, "step": 6190 }, { "epoch": 2.8786610878661087, "grad_norm": 2.696437358856201, "learning_rate": 1.1283863813339306e-06, "loss": 0.2534, "step": 6192 }, { "epoch": 2.879590887959089, "grad_norm": 2.6453042030334473, "learning_rate": 1.1518337358151513e-06, "loss": 0.3793, "step": 6194 }, { "epoch": 2.880520688052069, "grad_norm": 2.322021961212158, "learning_rate": 1.1755164620695192e-06, "loss": 0.2618, "step": 6196 }, { "epoch": 2.881450488145049, "grad_norm": 2.2429325580596924, "learning_rate": 1.199434326358086e-06, "loss": 0.2904, "step": 6198 }, { "epoch": 2.882380288238029, "grad_norm": 2.281306505203247, "learning_rate": 1.223587092621161e-06, "loss": 0.2183, "step": 6200 }, { "epoch": 2.883310088331009, "grad_norm": 2.6301770210266113, "learning_rate": 1.2479745224807044e-06, "loss": 0.2337, "step": 6202 }, { "epoch": 2.8842398884239886, "grad_norm": 2.7186827659606934, "learning_rate": 1.2725963752426457e-06, "loss": 0.272, "step": 6204 }, { "epoch": 2.885169688516969, "grad_norm": 2.488546371459961, "learning_rate": 1.2974524078991936e-06, "loss": 0.2685, "step": 6206 }, { "epoch": 2.8860994886099487, "grad_norm": 2.4017257690429688, "learning_rate": 1.3225423751313855e-06, "loss": 0.3008, "step": 6208 }, { "epoch": 2.8870292887029287, "grad_norm": 2.3125481605529785, "learning_rate": 1.3478660293113727e-06, "loss": 0.2091, "step": 6210 }, { "epoch": 2.887959088795909, "grad_norm": 2.096195697784424, "learning_rate": 1.3734231205048823e-06, "loss": 0.2205, "step": 6212 }, { "epoch": 2.888888888888889, "grad_norm": 2.7029924392700195, "learning_rate": 1.3992133964737586e-06, "loss": 0.2641, "step": 6214 }, { "epoch": 2.889818688981869, "grad_norm": 2.8447341918945312, "learning_rate": 1.425236602678376e-06, "loss": 0.3301, "step": 6216 }, { "epoch": 2.890748489074849, "grad_norm": 3.1220152378082275, "learning_rate": 1.4514924822802341e-06, "loss": 0.197, "step": 6218 }, { "epoch": 2.891678289167829, "grad_norm": 1.9901995658874512, "learning_rate": 1.477980776144361e-06, "loss": 0.2006, "step": 6220 }, { "epoch": 2.892608089260809, "grad_norm": 2.5768136978149414, "learning_rate": 1.5047012228420032e-06, "loss": 0.2233, "step": 6222 }, { "epoch": 2.893537889353789, "grad_norm": 2.2311246395111084, "learning_rate": 1.5316535586531566e-06, "loss": 0.2949, "step": 6224 }, { "epoch": 2.8944676894467687, "grad_norm": 1.7166619300842285, "learning_rate": 1.558837517569117e-06, "loss": 0.21, "step": 6226 }, { "epoch": 2.895397489539749, "grad_norm": 2.904482126235962, "learning_rate": 1.586252831295182e-06, "loss": 0.2465, "step": 6228 }, { "epoch": 2.896327289632729, "grad_norm": 3.4093449115753174, "learning_rate": 1.6138992292533075e-06, "loss": 0.3004, "step": 6230 }, { "epoch": 2.897257089725709, "grad_norm": 2.2844748497009277, "learning_rate": 1.6417764385847024e-06, "loss": 0.2776, "step": 6232 }, { "epoch": 2.898186889818689, "grad_norm": 2.8449339866638184, "learning_rate": 1.669884184152537e-06, "loss": 0.2178, "step": 6234 }, { "epoch": 2.899116689911669, "grad_norm": 2.8499138355255127, "learning_rate": 1.6982221885447264e-06, "loss": 0.3266, "step": 6236 }, { "epoch": 2.900046490004649, "grad_norm": 1.9145959615707397, "learning_rate": 1.726790172076606e-06, "loss": 0.2948, "step": 6238 }, { "epoch": 2.900976290097629, "grad_norm": 2.594271421432495, "learning_rate": 1.7555878527937105e-06, "loss": 0.2346, "step": 6240 }, { "epoch": 2.901906090190609, "grad_norm": 2.219540596008301, "learning_rate": 1.784614946474561e-06, "loss": 0.3143, "step": 6242 }, { "epoch": 2.902835890283589, "grad_norm": 2.5139358043670654, "learning_rate": 1.813871166633477e-06, "loss": 0.2756, "step": 6244 }, { "epoch": 2.903765690376569, "grad_norm": 2.6197688579559326, "learning_rate": 1.8433562245233408e-06, "loss": 0.2159, "step": 6246 }, { "epoch": 2.904695490469549, "grad_norm": 2.474531650543213, "learning_rate": 1.8730698291385552e-06, "loss": 0.2438, "step": 6248 }, { "epoch": 2.9056252905625293, "grad_norm": 2.4666078090667725, "learning_rate": 1.9030116872178183e-06, "loss": 0.2188, "step": 6250 }, { "epoch": 2.906555090655509, "grad_norm": 2.747718334197998, "learning_rate": 1.9331815032471315e-06, "loss": 0.2205, "step": 6252 }, { "epoch": 2.907484890748489, "grad_norm": 2.9293301105499268, "learning_rate": 1.963578979462539e-06, "loss": 0.3674, "step": 6254 }, { "epoch": 2.908414690841469, "grad_norm": 2.0261175632476807, "learning_rate": 1.994203815853242e-06, "loss": 0.1948, "step": 6256 }, { "epoch": 2.909344490934449, "grad_norm": 2.6732428073883057, "learning_rate": 2.0250557101644823e-06, "loss": 0.2309, "step": 6258 }, { "epoch": 2.910274291027429, "grad_norm": 2.8115603923797607, "learning_rate": 2.056134357900468e-06, "loss": 0.2473, "step": 6260 }, { "epoch": 2.911204091120409, "grad_norm": 2.5912272930145264, "learning_rate": 2.0874394523275435e-06, "loss": 0.2615, "step": 6262 }, { "epoch": 2.912133891213389, "grad_norm": 1.8872019052505493, "learning_rate": 2.118970684477053e-06, "loss": 0.3047, "step": 6264 }, { "epoch": 2.913063691306369, "grad_norm": 2.9026174545288086, "learning_rate": 2.150727743148478e-06, "loss": 0.2267, "step": 6266 }, { "epoch": 2.9139934913993493, "grad_norm": 2.472886323928833, "learning_rate": 2.1827103149124364e-06, "loss": 0.2217, "step": 6268 }, { "epoch": 2.914923291492329, "grad_norm": 2.4166858196258545, "learning_rate": 2.214918084113873e-06, "loss": 0.2548, "step": 6270 }, { "epoch": 2.9158530915853094, "grad_norm": 2.6034111976623535, "learning_rate": 2.2473507328751107e-06, "loss": 0.327, "step": 6272 }, { "epoch": 2.916782891678289, "grad_norm": 2.7687501907348633, "learning_rate": 2.2800079410989962e-06, "loss": 0.2797, "step": 6274 }, { "epoch": 2.917712691771269, "grad_norm": 2.93973970413208, "learning_rate": 2.312889386472075e-06, "loss": 0.2632, "step": 6276 }, { "epoch": 2.918642491864249, "grad_norm": 2.9362919330596924, "learning_rate": 2.34599474446775e-06, "loss": 0.2436, "step": 6278 }, { "epoch": 2.919572291957229, "grad_norm": 2.42242693901062, "learning_rate": 2.3793236883495245e-06, "loss": 0.2702, "step": 6280 }, { "epoch": 2.920502092050209, "grad_norm": 2.1305806636810303, "learning_rate": 2.4128758891741207e-06, "loss": 0.2187, "step": 6282 }, { "epoch": 2.921431892143189, "grad_norm": 2.2349202632904053, "learning_rate": 2.446651015794918e-06, "loss": 0.1797, "step": 6284 }, { "epoch": 2.9223616922361693, "grad_norm": 1.8784295320510864, "learning_rate": 2.480648734865054e-06, "loss": 0.2137, "step": 6286 }, { "epoch": 2.9232914923291493, "grad_norm": 2.5853073596954346, "learning_rate": 2.514868710840728e-06, "loss": 0.2305, "step": 6288 }, { "epoch": 2.9242212924221294, "grad_norm": 2.0571699142456055, "learning_rate": 2.549310605984611e-06, "loss": 0.2629, "step": 6290 }, { "epoch": 2.9251510925151094, "grad_norm": 3.401061773300171, "learning_rate": 2.5839740803691223e-06, "loss": 0.3395, "step": 6292 }, { "epoch": 2.9260808926080895, "grad_norm": 3.3026394844055176, "learning_rate": 2.618858791879705e-06, "loss": 0.3123, "step": 6294 }, { "epoch": 2.927010692701069, "grad_norm": 2.4751200675964355, "learning_rate": 2.6539643962184e-06, "loss": 0.2468, "step": 6296 }, { "epoch": 2.9279404927940496, "grad_norm": 1.5190640687942505, "learning_rate": 2.6892905469070444e-06, "loss": 0.2002, "step": 6298 }, { "epoch": 2.928870292887029, "grad_norm": 2.8737733364105225, "learning_rate": 2.7248368952908138e-06, "loss": 0.3247, "step": 6300 }, { "epoch": 2.929800092980009, "grad_norm": 2.9597744941711426, "learning_rate": 2.760603090541561e-06, "loss": 0.3095, "step": 6302 }, { "epoch": 2.9307298930729893, "grad_norm": 2.576777696609497, "learning_rate": 2.7965887796613744e-06, "loss": 0.265, "step": 6304 }, { "epoch": 2.9316596931659693, "grad_norm": 1.7560986280441284, "learning_rate": 2.8327936074860923e-06, "loss": 0.1813, "step": 6306 }, { "epoch": 2.9325894932589494, "grad_norm": 1.9921371936798096, "learning_rate": 2.8692172166886215e-06, "loss": 0.1849, "step": 6308 }, { "epoch": 2.9335192933519294, "grad_norm": 2.094646692276001, "learning_rate": 2.905859247782663e-06, "loss": 0.2505, "step": 6310 }, { "epoch": 2.9344490934449095, "grad_norm": 2.129343032836914, "learning_rate": 2.9427193391261653e-06, "loss": 0.2038, "step": 6312 }, { "epoch": 2.9353788935378895, "grad_norm": 2.599111318588257, "learning_rate": 2.979797126924927e-06, "loss": 0.2252, "step": 6314 }, { "epoch": 2.9363086936308695, "grad_norm": 2.513546943664551, "learning_rate": 3.0170922452360867e-06, "loss": 0.2893, "step": 6316 }, { "epoch": 2.937238493723849, "grad_norm": 2.663705825805664, "learning_rate": 3.0546043259719503e-06, "loss": 0.2787, "step": 6318 }, { "epoch": 2.9381682938168296, "grad_norm": 2.177443027496338, "learning_rate": 3.09233299890342e-06, "loss": 0.2031, "step": 6320 }, { "epoch": 2.9390980939098093, "grad_norm": 2.8545148372650146, "learning_rate": 3.13027789166369e-06, "loss": 0.3194, "step": 6322 }, { "epoch": 2.9400278940027893, "grad_norm": 2.276538848876953, "learning_rate": 3.1684386297520033e-06, "loss": 0.2378, "step": 6324 }, { "epoch": 2.9409576940957693, "grad_norm": 2.372652292251587, "learning_rate": 3.2068148365372603e-06, "loss": 0.2491, "step": 6326 }, { "epoch": 2.9418874941887494, "grad_norm": 2.1297507286071777, "learning_rate": 3.2454061332618602e-06, "loss": 0.2392, "step": 6328 }, { "epoch": 2.9428172942817294, "grad_norm": 2.4598474502563477, "learning_rate": 3.28421213904522e-06, "loss": 0.2264, "step": 6330 }, { "epoch": 2.9437470943747095, "grad_norm": 2.147122621536255, "learning_rate": 3.323232470887746e-06, "loss": 0.2714, "step": 6332 }, { "epoch": 2.9446768944676895, "grad_norm": 1.6620055437088013, "learning_rate": 3.362466743674544e-06, "loss": 0.2227, "step": 6334 }, { "epoch": 2.9456066945606696, "grad_norm": 1.9054003953933716, "learning_rate": 3.4019145701791323e-06, "loss": 0.2327, "step": 6336 }, { "epoch": 2.9465364946536496, "grad_norm": 2.035144567489624, "learning_rate": 3.4415755610673925e-06, "loss": 0.2543, "step": 6338 }, { "epoch": 2.9474662947466292, "grad_norm": 2.2954864501953125, "learning_rate": 3.4814493249013985e-06, "loss": 0.318, "step": 6340 }, { "epoch": 2.9483960948396097, "grad_norm": 2.229248285293579, "learning_rate": 3.521535468143198e-06, "loss": 0.1537, "step": 6342 }, { "epoch": 2.9493258949325893, "grad_norm": 2.5113325119018555, "learning_rate": 3.5618335951586988e-06, "loss": 0.3087, "step": 6344 }, { "epoch": 2.9502556950255694, "grad_norm": 2.1582376956939697, "learning_rate": 3.60234330822168e-06, "loss": 0.3028, "step": 6346 }, { "epoch": 2.9511854951185494, "grad_norm": 2.611562967300415, "learning_rate": 3.6430642075176233e-06, "loss": 0.3243, "step": 6348 }, { "epoch": 2.9521152952115295, "grad_norm": 2.527963161468506, "learning_rate": 3.683995891147695e-06, "loss": 0.2283, "step": 6350 }, { "epoch": 2.9530450953045095, "grad_norm": 3.150827169418335, "learning_rate": 3.7251379551327007e-06, "loss": 0.3199, "step": 6352 }, { "epoch": 2.9539748953974896, "grad_norm": 2.3946900367736816, "learning_rate": 3.7664899934171047e-06, "loss": 0.3284, "step": 6354 }, { "epoch": 2.9549046954904696, "grad_norm": 2.5040907859802246, "learning_rate": 3.8080515978729357e-06, "loss": 0.2951, "step": 6356 }, { "epoch": 2.9558344955834497, "grad_norm": 2.3917696475982666, "learning_rate": 3.849822358303959e-06, "loss": 0.2284, "step": 6358 }, { "epoch": 2.9567642956764297, "grad_norm": 2.593635082244873, "learning_rate": 3.891801862449609e-06, "loss": 0.2634, "step": 6360 }, { "epoch": 2.9576940957694093, "grad_norm": 2.1934666633605957, "learning_rate": 3.933989695989201e-06, "loss": 0.2227, "step": 6362 }, { "epoch": 2.95862389586239, "grad_norm": 2.715129852294922, "learning_rate": 3.97638544254578e-06, "loss": 0.3062, "step": 6364 }, { "epoch": 2.9595536959553694, "grad_norm": 2.630502700805664, "learning_rate": 4.018988683690462e-06, "loss": 0.3383, "step": 6366 }, { "epoch": 2.9604834960483495, "grad_norm": 2.2403783798217773, "learning_rate": 4.061798998946483e-06, "loss": 0.2917, "step": 6368 }, { "epoch": 2.9614132961413295, "grad_norm": 2.400205373764038, "learning_rate": 4.1048159657932416e-06, "loss": 0.2941, "step": 6370 }, { "epoch": 2.9623430962343096, "grad_norm": 2.6285858154296875, "learning_rate": 4.148039159670717e-06, "loss": 0.2672, "step": 6372 }, { "epoch": 2.9632728963272896, "grad_norm": 2.5492680072784424, "learning_rate": 4.191468153983406e-06, "loss": 0.3032, "step": 6374 }, { "epoch": 2.9642026964202697, "grad_norm": 2.1233949661254883, "learning_rate": 4.2351025201046985e-06, "loss": 0.2633, "step": 6376 }, { "epoch": 2.9651324965132497, "grad_norm": 3.417511224746704, "learning_rate": 4.278941827380964e-06, "loss": 0.2189, "step": 6378 }, { "epoch": 2.9660622966062298, "grad_norm": 2.200267791748047, "learning_rate": 4.322985643135963e-06, "loss": 0.2654, "step": 6380 }, { "epoch": 2.96699209669921, "grad_norm": 2.924786329269409, "learning_rate": 4.367233532675016e-06, "loss": 0.3229, "step": 6382 }, { "epoch": 2.9679218967921894, "grad_norm": 2.3849923610687256, "learning_rate": 4.4116850592893165e-06, "loss": 0.2664, "step": 6384 }, { "epoch": 2.96885169688517, "grad_norm": 2.1958627700805664, "learning_rate": 4.4563397842602444e-06, "loss": 0.1895, "step": 6386 }, { "epoch": 2.9697814969781495, "grad_norm": 2.963195323944092, "learning_rate": 4.501197266863715e-06, "loss": 0.2946, "step": 6388 }, { "epoch": 2.9707112970711296, "grad_norm": 2.0160739421844482, "learning_rate": 4.546257064374435e-06, "loss": 0.1973, "step": 6390 }, { "epoch": 2.9716410971641096, "grad_norm": 2.603403329849243, "learning_rate": 4.591518732070395e-06, "loss": 0.2434, "step": 6392 }, { "epoch": 2.9725708972570897, "grad_norm": 2.9543464183807373, "learning_rate": 4.636981823237256e-06, "loss": 0.3036, "step": 6394 }, { "epoch": 2.9735006973500697, "grad_norm": 2.420292377471924, "learning_rate": 4.682645889172665e-06, "loss": 0.3093, "step": 6396 }, { "epoch": 2.9744304974430498, "grad_norm": 2.361285448074341, "learning_rate": 4.7285104791906766e-06, "loss": 0.2798, "step": 6398 }, { "epoch": 2.97536029753603, "grad_norm": 2.0513370037078857, "learning_rate": 4.774575140626323e-06, "loss": 0.2376, "step": 6400 }, { "epoch": 2.97629009762901, "grad_norm": 2.1844732761383057, "learning_rate": 4.820839418840027e-06, "loss": 0.2952, "step": 6402 }, { "epoch": 2.97721989772199, "grad_norm": 2.306060552597046, "learning_rate": 4.867302857221955e-06, "loss": 0.2561, "step": 6404 }, { "epoch": 2.9781496978149695, "grad_norm": 2.425590753555298, "learning_rate": 4.913964997196812e-06, "loss": 0.2602, "step": 6406 }, { "epoch": 2.97907949790795, "grad_norm": 2.374727249145508, "learning_rate": 4.9608253782280835e-06, "loss": 0.2584, "step": 6408 }, { "epoch": 2.9800092980009296, "grad_norm": 2.300011396408081, "learning_rate": 5.00788353782276e-06, "loss": 0.2329, "step": 6410 }, { "epoch": 2.9809390980939097, "grad_norm": 2.7227087020874023, "learning_rate": 5.055139011535744e-06, "loss": 0.2528, "step": 6412 }, { "epoch": 2.9818688981868897, "grad_norm": 2.5672411918640137, "learning_rate": 5.102591332974593e-06, "loss": 0.3327, "step": 6414 }, { "epoch": 2.9827986982798698, "grad_norm": 1.9883947372436523, "learning_rate": 5.150240033804129e-06, "loss": 0.1915, "step": 6416 }, { "epoch": 2.98372849837285, "grad_norm": 2.0916595458984375, "learning_rate": 5.198084643750835e-06, "loss": 0.2141, "step": 6418 }, { "epoch": 2.98465829846583, "grad_norm": 2.605623245239258, "learning_rate": 5.246124690607746e-06, "loss": 0.2625, "step": 6420 }, { "epoch": 2.98558809855881, "grad_norm": 2.267484664916992, "learning_rate": 5.294359700239002e-06, "loss": 0.2406, "step": 6422 }, { "epoch": 2.98651789865179, "grad_norm": 2.6729469299316406, "learning_rate": 5.342789196584555e-06, "loss": 0.2884, "step": 6424 }, { "epoch": 2.98744769874477, "grad_norm": 2.1673758029937744, "learning_rate": 5.391412701664745e-06, "loss": 0.1975, "step": 6426 }, { "epoch": 2.9883774988377496, "grad_norm": 2.379544734954834, "learning_rate": 5.44022973558529e-06, "loss": 0.2473, "step": 6428 }, { "epoch": 2.98930729893073, "grad_norm": 2.7460875511169434, "learning_rate": 5.489239816541775e-06, "loss": 0.2612, "step": 6430 }, { "epoch": 2.9902370990237097, "grad_norm": 2.234384059906006, "learning_rate": 5.538442460824429e-06, "loss": 0.2388, "step": 6432 }, { "epoch": 2.9911668991166898, "grad_norm": 1.9823139905929565, "learning_rate": 5.587837182823047e-06, "loss": 0.3026, "step": 6434 }, { "epoch": 2.99209669920967, "grad_norm": 2.4297292232513428, "learning_rate": 5.6374234950316365e-06, "loss": 0.2225, "step": 6436 }, { "epoch": 2.99302649930265, "grad_norm": 2.3964240550994873, "learning_rate": 5.687200908053396e-06, "loss": 0.3192, "step": 6438 }, { "epoch": 2.99395629939563, "grad_norm": 2.6157922744750977, "learning_rate": 5.737168930605274e-06, "loss": 0.2239, "step": 6440 }, { "epoch": 2.99488609948861, "grad_norm": 2.0936214923858643, "learning_rate": 5.787327069523085e-06, "loss": 0.243, "step": 6442 }, { "epoch": 2.99581589958159, "grad_norm": 2.732815742492676, "learning_rate": 5.837674829766281e-06, "loss": 0.2032, "step": 6444 }, { "epoch": 2.99674569967457, "grad_norm": 2.6105024814605713, "learning_rate": 5.888211714422733e-06, "loss": 0.2591, "step": 6446 }, { "epoch": 2.99767549976755, "grad_norm": 2.0533926486968994, "learning_rate": 5.938937224713793e-06, "loss": 0.2633, "step": 6448 }, { "epoch": 2.99860529986053, "grad_norm": 2.3846466541290283, "learning_rate": 5.989850859999242e-06, "loss": 0.2477, "step": 6450 }, { "epoch": 2.99953509995351, "grad_norm": 2.729599952697754, "learning_rate": 6.040952117781966e-06, "loss": 0.2467, "step": 6452 }, { "epoch": 3.00046490004649, "grad_norm": 1.3765323162078857, "learning_rate": 6.092240493713212e-06, "loss": 0.1628, "step": 6454 }, { "epoch": 3.00139470013947, "grad_norm": 2.5784945487976074, "learning_rate": 6.14371548159741e-06, "loss": 0.2835, "step": 6456 }, { "epoch": 3.00232450023245, "grad_norm": 2.303962469100952, "learning_rate": 6.1953765733972166e-06, "loss": 0.2579, "step": 6458 }, { "epoch": 3.00325430032543, "grad_norm": 2.1456029415130615, "learning_rate": 6.247223259238511e-06, "loss": 0.2313, "step": 6460 }, { "epoch": 3.00418410041841, "grad_norm": 2.163388252258301, "learning_rate": 6.299255027415433e-06, "loss": 0.2613, "step": 6462 }, { "epoch": 3.00511390051139, "grad_norm": 2.0616250038146973, "learning_rate": 6.351471364395467e-06, "loss": 0.1785, "step": 6464 }, { "epoch": 3.00604370060437, "grad_norm": 2.1231327056884766, "learning_rate": 6.403871754824391e-06, "loss": 0.2434, "step": 6466 }, { "epoch": 3.00697350069735, "grad_norm": 1.799204707145691, "learning_rate": 6.456455681531535e-06, "loss": 0.1471, "step": 6468 }, { "epoch": 3.00790330079033, "grad_norm": 2.0530436038970947, "learning_rate": 6.509222625534739e-06, "loss": 0.19, "step": 6470 }, { "epoch": 3.0088331008833102, "grad_norm": 2.2940824031829834, "learning_rate": 6.56217206604566e-06, "loss": 0.187, "step": 6472 }, { "epoch": 3.0097629009762903, "grad_norm": 2.1983139514923096, "learning_rate": 6.615303480474607e-06, "loss": 0.2455, "step": 6474 }, { "epoch": 3.01069270106927, "grad_norm": 1.7474411725997925, "learning_rate": 6.668616344436001e-06, "loss": 0.1898, "step": 6476 }, { "epoch": 3.01162250116225, "grad_norm": 1.8046715259552002, "learning_rate": 6.722110131753424e-06, "loss": 0.2152, "step": 6478 }, { "epoch": 3.01255230125523, "grad_norm": 1.6866062879562378, "learning_rate": 6.775784314464705e-06, "loss": 0.2116, "step": 6480 }, { "epoch": 3.01348210134821, "grad_norm": 1.9108837842941284, "learning_rate": 6.829638362827419e-06, "loss": 0.208, "step": 6482 }, { "epoch": 3.01441190144119, "grad_norm": 1.9414533376693726, "learning_rate": 6.883671745323822e-06, "loss": 0.19, "step": 6484 }, { "epoch": 3.01534170153417, "grad_norm": 2.5888683795928955, "learning_rate": 6.937883928666269e-06, "loss": 0.2548, "step": 6486 }, { "epoch": 3.01627150162715, "grad_norm": 2.510429859161377, "learning_rate": 6.992274377802341e-06, "loss": 0.2132, "step": 6488 }, { "epoch": 3.0172013017201302, "grad_norm": 2.012803077697754, "learning_rate": 7.046842555920288e-06, "loss": 0.2338, "step": 6490 }, { "epoch": 3.0181311018131103, "grad_norm": 2.247788906097412, "learning_rate": 7.101587924454225e-06, "loss": 0.2788, "step": 6492 }, { "epoch": 3.0190609019060903, "grad_norm": 2.212332248687744, "learning_rate": 7.156509943089468e-06, "loss": 0.214, "step": 6494 }, { "epoch": 3.0199907019990704, "grad_norm": 2.039198637008667, "learning_rate": 7.211608069767864e-06, "loss": 0.2505, "step": 6496 }, { "epoch": 3.02092050209205, "grad_norm": 1.9693794250488281, "learning_rate": 7.26688176069318e-06, "loss": 0.2269, "step": 6498 }, { "epoch": 3.02185030218503, "grad_norm": 2.0583434104919434, "learning_rate": 7.322330470336337e-06, "loss": 0.2246, "step": 6500 }, { "epoch": 3.02278010227801, "grad_norm": 1.9773844480514526, "learning_rate": 7.3779536514409535e-06, "loss": 0.2413, "step": 6502 }, { "epoch": 3.02370990237099, "grad_norm": 1.9807883501052856, "learning_rate": 7.433750755028755e-06, "loss": 0.1858, "step": 6504 }, { "epoch": 3.02463970246397, "grad_norm": 2.5255181789398193, "learning_rate": 7.489721230404854e-06, "loss": 0.2279, "step": 6506 }, { "epoch": 3.0255695025569502, "grad_norm": 2.175166130065918, "learning_rate": 7.545864525163195e-06, "loss": 0.2272, "step": 6508 }, { "epoch": 3.0264993026499303, "grad_norm": 1.876808524131775, "learning_rate": 7.6021800851921514e-06, "loss": 0.2054, "step": 6510 }, { "epoch": 3.0274291027429103, "grad_norm": 2.75274658203125, "learning_rate": 7.658667354679913e-06, "loss": 0.2371, "step": 6512 }, { "epoch": 3.0283589028358904, "grad_norm": 1.3303682804107666, "learning_rate": 7.715325776119842e-06, "loss": 0.1776, "step": 6514 }, { "epoch": 3.0292887029288704, "grad_norm": 1.7509398460388184, "learning_rate": 7.772154790316286e-06, "loss": 0.1734, "step": 6516 }, { "epoch": 3.0302185030218505, "grad_norm": 1.7225544452667236, "learning_rate": 7.82915383638979e-06, "loss": 0.2077, "step": 6518 }, { "epoch": 3.0311483031148305, "grad_norm": 2.619272470474243, "learning_rate": 7.886322351782808e-06, "loss": 0.2336, "step": 6520 }, { "epoch": 3.03207810320781, "grad_norm": 2.185534954071045, "learning_rate": 7.943659772265108e-06, "loss": 0.1694, "step": 6522 }, { "epoch": 3.03300790330079, "grad_norm": 2.650801658630371, "learning_rate": 8.001165531939505e-06, "loss": 0.2357, "step": 6524 }, { "epoch": 3.0339377033937702, "grad_norm": 1.8964749574661255, "learning_rate": 8.058839063247455e-06, "loss": 0.2119, "step": 6526 }, { "epoch": 3.0348675034867503, "grad_norm": 2.129931688308716, "learning_rate": 8.116679796974397e-06, "loss": 0.1978, "step": 6528 }, { "epoch": 3.0357973035797303, "grad_norm": 1.9581621885299683, "learning_rate": 8.174687162255672e-06, "loss": 0.2203, "step": 6530 }, { "epoch": 3.0367271036727104, "grad_norm": 2.988502264022827, "learning_rate": 8.232860586582037e-06, "loss": 0.2555, "step": 6532 }, { "epoch": 3.0376569037656904, "grad_norm": 2.2046091556549072, "learning_rate": 8.291199495805206e-06, "loss": 0.2007, "step": 6534 }, { "epoch": 3.0385867038586705, "grad_norm": 1.1790144443511963, "learning_rate": 8.349703314143704e-06, "loss": 0.1199, "step": 6536 }, { "epoch": 3.0395165039516505, "grad_norm": 1.8670533895492554, "learning_rate": 8.408371464188529e-06, "loss": 0.208, "step": 6538 }, { "epoch": 3.0404463040446306, "grad_norm": 2.192110538482666, "learning_rate": 8.46720336690872e-06, "loss": 0.2238, "step": 6540 }, { "epoch": 3.0413761041376106, "grad_norm": 2.11015248298645, "learning_rate": 8.526198441657093e-06, "loss": 0.2056, "step": 6542 }, { "epoch": 3.04230590423059, "grad_norm": 1.967313289642334, "learning_rate": 8.5853561061761e-06, "loss": 0.2094, "step": 6544 }, { "epoch": 3.0432357043235703, "grad_norm": 2.140281915664673, "learning_rate": 8.64467577660348e-06, "loss": 0.19, "step": 6546 }, { "epoch": 3.0441655044165503, "grad_norm": 1.968183994293213, "learning_rate": 8.704156867478036e-06, "loss": 0.1825, "step": 6548 }, { "epoch": 3.0450953045095304, "grad_norm": 1.4066648483276367, "learning_rate": 8.763798791745408e-06, "loss": 0.1836, "step": 6550 }, { "epoch": 3.0460251046025104, "grad_norm": 2.182344913482666, "learning_rate": 8.823600960763888e-06, "loss": 0.2102, "step": 6552 }, { "epoch": 3.0469549046954905, "grad_norm": 2.0130646228790283, "learning_rate": 8.88356278431024e-06, "loss": 0.2006, "step": 6554 }, { "epoch": 3.0478847047884705, "grad_norm": 2.461865186691284, "learning_rate": 8.943683670585414e-06, "loss": 0.2308, "step": 6556 }, { "epoch": 3.0488145048814506, "grad_norm": 2.4673855304718018, "learning_rate": 9.003963026220523e-06, "loss": 0.3095, "step": 6558 }, { "epoch": 3.0497443049744306, "grad_norm": 1.941782832145691, "learning_rate": 9.064400256282774e-06, "loss": 0.2501, "step": 6560 }, { "epoch": 3.0506741050674107, "grad_norm": 2.3019919395446777, "learning_rate": 9.124994764280996e-06, "loss": 0.1775, "step": 6562 }, { "epoch": 3.0516039051603907, "grad_norm": 2.104947805404663, "learning_rate": 9.185745952171898e-06, "loss": 0.2213, "step": 6564 }, { "epoch": 3.0525337052533703, "grad_norm": 2.9205639362335205, "learning_rate": 9.246653220365778e-06, "loss": 0.347, "step": 6566 }, { "epoch": 3.0534635053463504, "grad_norm": 1.9943691492080688, "learning_rate": 9.307715967732488e-06, "loss": 0.276, "step": 6568 }, { "epoch": 3.0543933054393304, "grad_norm": 2.1635756492614746, "learning_rate": 9.368933591607366e-06, "loss": 0.2025, "step": 6570 }, { "epoch": 3.0553231055323105, "grad_norm": 2.139051914215088, "learning_rate": 9.430305487797178e-06, "loss": 0.2142, "step": 6572 }, { "epoch": 3.0562529056252905, "grad_norm": 2.207473039627075, "learning_rate": 9.491831050586118e-06, "loss": 0.2258, "step": 6574 }, { "epoch": 3.0571827057182706, "grad_norm": 2.268986940383911, "learning_rate": 9.553509672741657e-06, "loss": 0.2413, "step": 6576 }, { "epoch": 3.0581125058112506, "grad_norm": 2.472681999206543, "learning_rate": 9.615340745520723e-06, "loss": 0.2964, "step": 6578 }, { "epoch": 3.0590423059042307, "grad_norm": 2.3891165256500244, "learning_rate": 9.67732365867556e-06, "loss": 0.2544, "step": 6580 }, { "epoch": 3.0599721059972107, "grad_norm": 2.18684720993042, "learning_rate": 9.739457800459938e-06, "loss": 0.182, "step": 6582 }, { "epoch": 3.0609019060901907, "grad_norm": 2.529411554336548, "learning_rate": 9.80174255763486e-06, "loss": 0.3056, "step": 6584 }, { "epoch": 3.061831706183171, "grad_norm": 2.292738437652588, "learning_rate": 9.864177315474956e-06, "loss": 0.2018, "step": 6586 }, { "epoch": 3.062761506276151, "grad_norm": 2.1287927627563477, "learning_rate": 9.926761457774406e-06, "loss": 0.1964, "step": 6588 }, { "epoch": 3.0636913063691305, "grad_norm": 2.0499322414398193, "learning_rate": 9.989494366852883e-06, "loss": 0.2098, "step": 6590 }, { "epoch": 3.0646211064621105, "grad_norm": 2.443011999130249, "learning_rate": 1.0052375423562007e-05, "loss": 0.2406, "step": 6592 }, { "epoch": 3.0655509065550905, "grad_norm": 2.5912625789642334, "learning_rate": 1.0115404007291138e-05, "loss": 0.2281, "step": 6594 }, { "epoch": 3.0664807066480706, "grad_norm": 2.3962478637695312, "learning_rate": 1.0178579495973527e-05, "loss": 0.2145, "step": 6596 }, { "epoch": 3.0674105067410506, "grad_norm": 1.8663135766983032, "learning_rate": 1.0241901266092638e-05, "loss": 0.1835, "step": 6598 }, { "epoch": 3.0683403068340307, "grad_norm": 2.9428505897521973, "learning_rate": 1.0305368692688168e-05, "loss": 0.2938, "step": 6600 }, { "epoch": 3.0692701069270107, "grad_norm": 2.54949951171875, "learning_rate": 1.036898114936224e-05, "loss": 0.2158, "step": 6602 }, { "epoch": 3.070199907019991, "grad_norm": 2.576172113418579, "learning_rate": 1.043273800828559e-05, "loss": 0.1896, "step": 6604 }, { "epoch": 3.071129707112971, "grad_norm": 1.7475072145462036, "learning_rate": 1.0496638640203752e-05, "loss": 0.2237, "step": 6606 }, { "epoch": 3.072059507205951, "grad_norm": 2.021280527114868, "learning_rate": 1.056068241444333e-05, "loss": 0.25, "step": 6608 }, { "epoch": 3.072989307298931, "grad_norm": 2.2958555221557617, "learning_rate": 1.0624868698918052e-05, "loss": 0.1489, "step": 6610 }, { "epoch": 3.0739191073919105, "grad_norm": 2.349440813064575, "learning_rate": 1.0689196860135204e-05, "loss": 0.2433, "step": 6612 }, { "epoch": 3.0748489074848906, "grad_norm": 2.5073578357696533, "learning_rate": 1.075366626320188e-05, "loss": 0.2903, "step": 6614 }, { "epoch": 3.0757787075778706, "grad_norm": 2.265897035598755, "learning_rate": 1.0818276271831096e-05, "loss": 0.1984, "step": 6616 }, { "epoch": 3.0767085076708507, "grad_norm": 2.0772931575775146, "learning_rate": 1.0883026248348078e-05, "loss": 0.2355, "step": 6618 }, { "epoch": 3.0776383077638307, "grad_norm": 2.497431993484497, "learning_rate": 1.0947915553696737e-05, "loss": 0.2024, "step": 6620 }, { "epoch": 3.078568107856811, "grad_norm": 1.756562352180481, "learning_rate": 1.1012943547445858e-05, "loss": 0.2026, "step": 6622 }, { "epoch": 3.079497907949791, "grad_norm": 1.843161702156067, "learning_rate": 1.107810958779529e-05, "loss": 0.1903, "step": 6624 }, { "epoch": 3.080427708042771, "grad_norm": 2.552539825439453, "learning_rate": 1.1143413031582636e-05, "loss": 0.226, "step": 6626 }, { "epoch": 3.081357508135751, "grad_norm": 2.345541000366211, "learning_rate": 1.1208853234289225e-05, "loss": 0.2614, "step": 6628 }, { "epoch": 3.082287308228731, "grad_norm": 2.2899911403656006, "learning_rate": 1.127442955004672e-05, "loss": 0.1593, "step": 6630 }, { "epoch": 3.083217108321711, "grad_norm": 2.46100115776062, "learning_rate": 1.1340141331643281e-05, "loss": 0.2726, "step": 6632 }, { "epoch": 3.0841469084146906, "grad_norm": 2.0303351879119873, "learning_rate": 1.1405987930530157e-05, "loss": 0.1524, "step": 6634 }, { "epoch": 3.0850767085076707, "grad_norm": 1.8471870422363281, "learning_rate": 1.1471968696828106e-05, "loss": 0.2211, "step": 6636 }, { "epoch": 3.0860065086006507, "grad_norm": 2.140768051147461, "learning_rate": 1.1538082979333491e-05, "loss": 0.1873, "step": 6638 }, { "epoch": 3.086936308693631, "grad_norm": 2.3662455081939697, "learning_rate": 1.1604330125525077e-05, "loss": 0.2029, "step": 6640 }, { "epoch": 3.087866108786611, "grad_norm": 2.072021245956421, "learning_rate": 1.1670709481570312e-05, "loss": 0.1521, "step": 6642 }, { "epoch": 3.088795908879591, "grad_norm": 2.5428946018218994, "learning_rate": 1.1737220392331675e-05, "loss": 0.2065, "step": 6644 }, { "epoch": 3.089725708972571, "grad_norm": 2.2688605785369873, "learning_rate": 1.1803862201373323e-05, "loss": 0.2001, "step": 6646 }, { "epoch": 3.090655509065551, "grad_norm": 2.1576590538024902, "learning_rate": 1.1870634250967586e-05, "loss": 0.2332, "step": 6648 }, { "epoch": 3.091585309158531, "grad_norm": 2.749423027038574, "learning_rate": 1.1937535882101292e-05, "loss": 0.1793, "step": 6650 }, { "epoch": 3.092515109251511, "grad_norm": 1.8298230171203613, "learning_rate": 1.2004566434482273e-05, "loss": 0.1586, "step": 6652 }, { "epoch": 3.093444909344491, "grad_norm": 1.7945060729980469, "learning_rate": 1.2071725246546076e-05, "loss": 0.1696, "step": 6654 }, { "epoch": 3.0943747094374707, "grad_norm": 2.2937660217285156, "learning_rate": 1.2139011655462377e-05, "loss": 0.1429, "step": 6656 }, { "epoch": 3.0953045095304508, "grad_norm": 2.1520750522613525, "learning_rate": 1.2206424997141376e-05, "loss": 0.2127, "step": 6658 }, { "epoch": 3.096234309623431, "grad_norm": 2.328232765197754, "learning_rate": 1.2273964606240714e-05, "loss": 0.1947, "step": 6660 }, { "epoch": 3.097164109716411, "grad_norm": 1.581163763999939, "learning_rate": 1.2341629816171681e-05, "loss": 0.1192, "step": 6662 }, { "epoch": 3.098093909809391, "grad_norm": 2.1732823848724365, "learning_rate": 1.2409419959106006e-05, "loss": 0.2103, "step": 6664 }, { "epoch": 3.099023709902371, "grad_norm": 2.2190935611724854, "learning_rate": 1.2477334365982275e-05, "loss": 0.277, "step": 6666 }, { "epoch": 3.099953509995351, "grad_norm": 2.0351552963256836, "learning_rate": 1.2545372366512712e-05, "loss": 0.2001, "step": 6668 }, { "epoch": 3.100883310088331, "grad_norm": 2.094451904296875, "learning_rate": 1.2613533289189829e-05, "loss": 0.2073, "step": 6670 }, { "epoch": 3.101813110181311, "grad_norm": 2.582503318786621, "learning_rate": 1.2681816461292725e-05, "loss": 0.2266, "step": 6672 }, { "epoch": 3.102742910274291, "grad_norm": 1.7954695224761963, "learning_rate": 1.2750221208894093e-05, "loss": 0.2147, "step": 6674 }, { "epoch": 3.103672710367271, "grad_norm": 2.1713736057281494, "learning_rate": 1.2818746856866731e-05, "loss": 0.2172, "step": 6676 }, { "epoch": 3.104602510460251, "grad_norm": 1.8957992792129517, "learning_rate": 1.2887392728890084e-05, "loss": 0.2368, "step": 6678 }, { "epoch": 3.105532310553231, "grad_norm": 2.452910900115967, "learning_rate": 1.295615814745711e-05, "loss": 0.2479, "step": 6680 }, { "epoch": 3.106462110646211, "grad_norm": 2.3206419944763184, "learning_rate": 1.3025042433880965e-05, "loss": 0.1977, "step": 6682 }, { "epoch": 3.107391910739191, "grad_norm": 1.9646177291870117, "learning_rate": 1.3094044908301546e-05, "loss": 0.2114, "step": 6684 }, { "epoch": 3.108321710832171, "grad_norm": 1.8915983438491821, "learning_rate": 1.3163164889692218e-05, "loss": 0.2477, "step": 6686 }, { "epoch": 3.109251510925151, "grad_norm": 1.9396580457687378, "learning_rate": 1.3232401695866706e-05, "loss": 0.1788, "step": 6688 }, { "epoch": 3.110181311018131, "grad_norm": 3.0161783695220947, "learning_rate": 1.3301754643485681e-05, "loss": 0.2549, "step": 6690 }, { "epoch": 3.111111111111111, "grad_norm": 1.8116044998168945, "learning_rate": 1.3371223048063553e-05, "loss": 0.1641, "step": 6692 }, { "epoch": 3.112040911204091, "grad_norm": 1.7759792804718018, "learning_rate": 1.344080622397521e-05, "loss": 0.2431, "step": 6694 }, { "epoch": 3.1129707112970713, "grad_norm": 1.8037848472595215, "learning_rate": 1.3510503484462804e-05, "loss": 0.2602, "step": 6696 }, { "epoch": 3.1139005113900513, "grad_norm": 2.054781913757324, "learning_rate": 1.358031414164255e-05, "loss": 0.218, "step": 6698 }, { "epoch": 3.1148303114830314, "grad_norm": 1.6984471082687378, "learning_rate": 1.3650237506511364e-05, "loss": 0.1633, "step": 6700 }, { "epoch": 3.115760111576011, "grad_norm": 2.4839870929718018, "learning_rate": 1.3720272888953861e-05, "loss": 0.2474, "step": 6702 }, { "epoch": 3.116689911668991, "grad_norm": 2.17707896232605, "learning_rate": 1.3790419597749216e-05, "loss": 0.2039, "step": 6704 }, { "epoch": 3.117619711761971, "grad_norm": 2.1957826614379883, "learning_rate": 1.3860676940577613e-05, "loss": 0.2451, "step": 6706 }, { "epoch": 3.118549511854951, "grad_norm": 2.2222259044647217, "learning_rate": 1.3931044224027478e-05, "loss": 0.194, "step": 6708 }, { "epoch": 3.119479311947931, "grad_norm": 3.4144060611724854, "learning_rate": 1.4001520753602131e-05, "loss": 0.3466, "step": 6710 }, { "epoch": 3.120409112040911, "grad_norm": 2.377511501312256, "learning_rate": 1.4072105833726685e-05, "loss": 0.1916, "step": 6712 }, { "epoch": 3.1213389121338913, "grad_norm": 2.647871732711792, "learning_rate": 1.4142798767754894e-05, "loss": 0.2571, "step": 6714 }, { "epoch": 3.1222687122268713, "grad_norm": 2.297321081161499, "learning_rate": 1.4213598857976029e-05, "loss": 0.1593, "step": 6716 }, { "epoch": 3.1231985123198513, "grad_norm": 1.7660033702850342, "learning_rate": 1.4284505405621829e-05, "loss": 0.2283, "step": 6718 }, { "epoch": 3.1241283124128314, "grad_norm": 1.6722559928894043, "learning_rate": 1.4355517710873214e-05, "loss": 0.1393, "step": 6720 }, { "epoch": 3.1250581125058114, "grad_norm": 1.678761601448059, "learning_rate": 1.442663507286745e-05, "loss": 0.185, "step": 6722 }, { "epoch": 3.125987912598791, "grad_norm": 2.635885238647461, "learning_rate": 1.4497856789704828e-05, "loss": 0.2205, "step": 6724 }, { "epoch": 3.126917712691771, "grad_norm": 2.2447872161865234, "learning_rate": 1.4569182158455888e-05, "loss": 0.2044, "step": 6726 }, { "epoch": 3.127847512784751, "grad_norm": 2.7276358604431152, "learning_rate": 1.464061047516791e-05, "loss": 0.2408, "step": 6728 }, { "epoch": 3.128777312877731, "grad_norm": 2.2091281414031982, "learning_rate": 1.4712141034872292e-05, "loss": 0.2351, "step": 6730 }, { "epoch": 3.1297071129707112, "grad_norm": 1.6019959449768066, "learning_rate": 1.478377313159132e-05, "loss": 0.1969, "step": 6732 }, { "epoch": 3.1306369130636913, "grad_norm": 2.3195979595184326, "learning_rate": 1.4855506058345002e-05, "loss": 0.1805, "step": 6734 }, { "epoch": 3.1315667131566713, "grad_norm": 2.012441873550415, "learning_rate": 1.4927339107158431e-05, "loss": 0.2091, "step": 6736 }, { "epoch": 3.1324965132496514, "grad_norm": 2.2154321670532227, "learning_rate": 1.4999271569068418e-05, "loss": 0.1988, "step": 6738 }, { "epoch": 3.1334263133426314, "grad_norm": 2.6196651458740234, "learning_rate": 1.5071302734130514e-05, "loss": 0.2158, "step": 6740 }, { "epoch": 3.1343561134356115, "grad_norm": 2.1645727157592773, "learning_rate": 1.5143431891426248e-05, "loss": 0.2461, "step": 6742 }, { "epoch": 3.1352859135285915, "grad_norm": 1.9737489223480225, "learning_rate": 1.5215658329069958e-05, "loss": 0.2101, "step": 6744 }, { "epoch": 3.1362157136215716, "grad_norm": 1.7690625190734863, "learning_rate": 1.528798133421586e-05, "loss": 0.1531, "step": 6746 }, { "epoch": 3.137145513714551, "grad_norm": 1.8580467700958252, "learning_rate": 1.5360400193065093e-05, "loss": 0.1974, "step": 6748 }, { "epoch": 3.1380753138075312, "grad_norm": 2.6489346027374268, "learning_rate": 1.543291419087276e-05, "loss": 0.2377, "step": 6750 }, { "epoch": 3.1390051139005113, "grad_norm": 2.6639769077301025, "learning_rate": 1.5505522611955012e-05, "loss": 0.2598, "step": 6752 }, { "epoch": 3.1399349139934913, "grad_norm": 2.665173053741455, "learning_rate": 1.5578224739695968e-05, "loss": 0.2868, "step": 6754 }, { "epoch": 3.1408647140864714, "grad_norm": 2.413398504257202, "learning_rate": 1.5651019856554975e-05, "loss": 0.2418, "step": 6756 }, { "epoch": 3.1417945141794514, "grad_norm": 2.0207443237304688, "learning_rate": 1.5723907244073705e-05, "loss": 0.2117, "step": 6758 }, { "epoch": 3.1427243142724315, "grad_norm": 2.5173850059509277, "learning_rate": 1.5796886182883066e-05, "loss": 0.3105, "step": 6760 }, { "epoch": 3.1436541143654115, "grad_norm": 2.0744690895080566, "learning_rate": 1.586995595271031e-05, "loss": 0.2602, "step": 6762 }, { "epoch": 3.1445839144583916, "grad_norm": 1.898734211921692, "learning_rate": 1.594311583238636e-05, "loss": 0.2036, "step": 6764 }, { "epoch": 3.1455137145513716, "grad_norm": 2.0864098072052, "learning_rate": 1.6016365099852773e-05, "loss": 0.2558, "step": 6766 }, { "epoch": 3.1464435146443517, "grad_norm": 2.5313735008239746, "learning_rate": 1.6089703032168723e-05, "loss": 0.2357, "step": 6768 }, { "epoch": 3.1473733147373313, "grad_norm": 2.5156610012054443, "learning_rate": 1.616312890551856e-05, "loss": 0.2401, "step": 6770 }, { "epoch": 3.1483031148303113, "grad_norm": 1.9705352783203125, "learning_rate": 1.623664199521851e-05, "loss": 0.2325, "step": 6772 }, { "epoch": 3.1492329149232914, "grad_norm": 2.2661309242248535, "learning_rate": 1.63102415757241e-05, "loss": 0.2088, "step": 6774 }, { "epoch": 3.1501627150162714, "grad_norm": 2.2919225692749023, "learning_rate": 1.6383926920637093e-05, "loss": 0.2661, "step": 6776 }, { "epoch": 3.1510925151092515, "grad_norm": 2.7653539180755615, "learning_rate": 1.645769730271289e-05, "loss": 0.2201, "step": 6778 }, { "epoch": 3.1520223152022315, "grad_norm": 2.4429850578308105, "learning_rate": 1.6531551993867727e-05, "loss": 0.1935, "step": 6780 }, { "epoch": 3.1529521152952116, "grad_norm": 1.9646873474121094, "learning_rate": 1.6605490265185492e-05, "loss": 0.2578, "step": 6782 }, { "epoch": 3.1538819153881916, "grad_norm": 1.989832878112793, "learning_rate": 1.6679511386925337e-05, "loss": 0.1635, "step": 6784 }, { "epoch": 3.1548117154811717, "grad_norm": 2.5037693977355957, "learning_rate": 1.6753614628528723e-05, "loss": 0.2967, "step": 6786 }, { "epoch": 3.1557415155741517, "grad_norm": 1.9229291677474976, "learning_rate": 1.682779925862648e-05, "loss": 0.1912, "step": 6788 }, { "epoch": 3.1566713156671318, "grad_norm": 1.7440598011016846, "learning_rate": 1.6902064545046257e-05, "loss": 0.1957, "step": 6790 }, { "epoch": 3.1576011157601114, "grad_norm": 2.280630111694336, "learning_rate": 1.697640975481975e-05, "loss": 0.2388, "step": 6792 }, { "epoch": 3.1585309158530914, "grad_norm": 2.888559341430664, "learning_rate": 1.7050834154189753e-05, "loss": 0.2288, "step": 6794 }, { "epoch": 3.1594607159460715, "grad_norm": 1.773139476776123, "learning_rate": 1.7125337008617406e-05, "loss": 0.1991, "step": 6796 }, { "epoch": 3.1603905160390515, "grad_norm": 2.1284890174865723, "learning_rate": 1.7199917582789646e-05, "loss": 0.2504, "step": 6798 }, { "epoch": 3.1613203161320316, "grad_norm": 1.805159330368042, "learning_rate": 1.7274575140626328e-05, "loss": 0.2015, "step": 6800 }, { "epoch": 3.1622501162250116, "grad_norm": 2.468823194503784, "learning_rate": 1.7349308945287494e-05, "loss": 0.2851, "step": 6802 }, { "epoch": 3.1631799163179917, "grad_norm": 1.8520352840423584, "learning_rate": 1.7424118259180658e-05, "loss": 0.1654, "step": 6804 }, { "epoch": 3.1641097164109717, "grad_norm": 2.187278985977173, "learning_rate": 1.7499002343968095e-05, "loss": 0.2506, "step": 6806 }, { "epoch": 3.1650395165039518, "grad_norm": 1.993016242980957, "learning_rate": 1.757396046057416e-05, "loss": 0.1859, "step": 6808 }, { "epoch": 3.165969316596932, "grad_norm": 2.516200065612793, "learning_rate": 1.7648991869192432e-05, "loss": 0.2461, "step": 6810 }, { "epoch": 3.166899116689912, "grad_norm": 2.9970760345458984, "learning_rate": 1.772409582929319e-05, "loss": 0.2858, "step": 6812 }, { "epoch": 3.1678289167828915, "grad_norm": 1.6939176321029663, "learning_rate": 1.7799271599630793e-05, "loss": 0.2364, "step": 6814 }, { "epoch": 3.1687587168758715, "grad_norm": 1.8053348064422607, "learning_rate": 1.7874518438250617e-05, "loss": 0.159, "step": 6816 }, { "epoch": 3.1696885169688516, "grad_norm": 2.3525912761688232, "learning_rate": 1.7949835602496776e-05, "loss": 0.267, "step": 6818 }, { "epoch": 3.1706183170618316, "grad_norm": 2.1252684593200684, "learning_rate": 1.802522234901932e-05, "loss": 0.1983, "step": 6820 }, { "epoch": 3.1715481171548117, "grad_norm": 2.559988498687744, "learning_rate": 1.8100677933781364e-05, "loss": 0.2099, "step": 6822 }, { "epoch": 3.1724779172477917, "grad_norm": 2.026474714279175, "learning_rate": 1.8176201612066868e-05, "loss": 0.1971, "step": 6824 }, { "epoch": 3.1734077173407718, "grad_norm": 3.032831907272339, "learning_rate": 1.8251792638487586e-05, "loss": 0.2259, "step": 6826 }, { "epoch": 3.174337517433752, "grad_norm": 1.910120964050293, "learning_rate": 1.8327450266990643e-05, "loss": 0.2033, "step": 6828 }, { "epoch": 3.175267317526732, "grad_norm": 2.743335247039795, "learning_rate": 1.84031737508657e-05, "loss": 0.2201, "step": 6830 }, { "epoch": 3.176197117619712, "grad_norm": 1.9322588443756104, "learning_rate": 1.84789623427526e-05, "loss": 0.1548, "step": 6832 }, { "epoch": 3.177126917712692, "grad_norm": 2.048898220062256, "learning_rate": 1.8554815294648524e-05, "loss": 0.2296, "step": 6834 }, { "epoch": 3.1780567178056716, "grad_norm": 1.6356642246246338, "learning_rate": 1.8630731857915456e-05, "loss": 0.2017, "step": 6836 }, { "epoch": 3.1789865178986516, "grad_norm": 2.182549238204956, "learning_rate": 1.870671128328758e-05, "loss": 0.1956, "step": 6838 }, { "epoch": 3.1799163179916317, "grad_norm": 3.011644124984741, "learning_rate": 1.878275282087863e-05, "loss": 0.2121, "step": 6840 }, { "epoch": 3.1808461180846117, "grad_norm": 2.2765743732452393, "learning_rate": 1.885885572018938e-05, "loss": 0.2044, "step": 6842 }, { "epoch": 3.1817759181775918, "grad_norm": 2.3209619522094727, "learning_rate": 1.893501923011481e-05, "loss": 0.2192, "step": 6844 }, { "epoch": 3.182705718270572, "grad_norm": 2.423557996749878, "learning_rate": 1.9011242598951946e-05, "loss": 0.2024, "step": 6846 }, { "epoch": 3.183635518363552, "grad_norm": 3.107394218444824, "learning_rate": 1.908752507440691e-05, "loss": 0.3264, "step": 6848 }, { "epoch": 3.184565318456532, "grad_norm": 2.124770164489746, "learning_rate": 1.9163865903602384e-05, "loss": 0.2294, "step": 6850 }, { "epoch": 3.185495118549512, "grad_norm": 2.400387763977051, "learning_rate": 1.9240264333085245e-05, "loss": 0.2714, "step": 6852 }, { "epoch": 3.186424918642492, "grad_norm": 1.3787965774536133, "learning_rate": 1.9316719608833845e-05, "loss": 0.156, "step": 6854 }, { "epoch": 3.187354718735472, "grad_norm": 2.818955898284912, "learning_rate": 1.939323097626547e-05, "loss": 0.2192, "step": 6856 }, { "epoch": 3.1882845188284517, "grad_norm": 2.3748562335968018, "learning_rate": 1.946979768024381e-05, "loss": 0.2611, "step": 6858 }, { "epoch": 3.1892143189214317, "grad_norm": 2.2491698265075684, "learning_rate": 1.9546418965086422e-05, "loss": 0.2358, "step": 6860 }, { "epoch": 3.1901441190144117, "grad_norm": 2.2822375297546387, "learning_rate": 1.962309407457219e-05, "loss": 0.2755, "step": 6862 }, { "epoch": 3.191073919107392, "grad_norm": 2.241096258163452, "learning_rate": 1.9699822251948657e-05, "loss": 0.216, "step": 6864 }, { "epoch": 3.192003719200372, "grad_norm": 2.112574577331543, "learning_rate": 1.9776602739939677e-05, "loss": 0.2537, "step": 6866 }, { "epoch": 3.192933519293352, "grad_norm": 2.645629644393921, "learning_rate": 1.9853434780752933e-05, "loss": 0.2428, "step": 6868 }, { "epoch": 3.193863319386332, "grad_norm": 1.7694332599639893, "learning_rate": 1.9930317616087193e-05, "loss": 0.1804, "step": 6870 }, { "epoch": 3.194793119479312, "grad_norm": 2.496375322341919, "learning_rate": 2.0007250487139823e-05, "loss": 0.2219, "step": 6872 }, { "epoch": 3.195722919572292, "grad_norm": 2.383777379989624, "learning_rate": 2.0084232634614493e-05, "loss": 0.2155, "step": 6874 }, { "epoch": 3.196652719665272, "grad_norm": 2.80560040473938, "learning_rate": 2.0161263298728525e-05, "loss": 0.2104, "step": 6876 }, { "epoch": 3.197582519758252, "grad_norm": 2.0460216999053955, "learning_rate": 2.0238341719220237e-05, "loss": 0.1628, "step": 6878 }, { "epoch": 3.198512319851232, "grad_norm": 2.4828569889068604, "learning_rate": 2.0315467135356863e-05, "loss": 0.1575, "step": 6880 }, { "epoch": 3.199442119944212, "grad_norm": 2.0274274349212646, "learning_rate": 2.039263878594168e-05, "loss": 0.1488, "step": 6882 }, { "epoch": 3.200371920037192, "grad_norm": 2.7051095962524414, "learning_rate": 2.0469855909321574e-05, "loss": 0.2113, "step": 6884 }, { "epoch": 3.201301720130172, "grad_norm": 3.9396214485168457, "learning_rate": 2.054711774339475e-05, "loss": 0.2209, "step": 6886 }, { "epoch": 3.202231520223152, "grad_norm": 2.5114779472351074, "learning_rate": 2.0624423525618054e-05, "loss": 0.2884, "step": 6888 }, { "epoch": 3.203161320316132, "grad_norm": 1.995715618133545, "learning_rate": 2.0701772493014753e-05, "loss": 0.1953, "step": 6890 }, { "epoch": 3.204091120409112, "grad_norm": 2.269357442855835, "learning_rate": 2.0779163882181638e-05, "loss": 0.2083, "step": 6892 }, { "epoch": 3.205020920502092, "grad_norm": 2.184603452682495, "learning_rate": 2.085659692929699e-05, "loss": 0.1955, "step": 6894 }, { "epoch": 3.205950720595072, "grad_norm": 3.5896048545837402, "learning_rate": 2.093407087012793e-05, "loss": 0.2842, "step": 6896 }, { "epoch": 3.206880520688052, "grad_norm": 1.867485523223877, "learning_rate": 2.101158494003786e-05, "loss": 0.2253, "step": 6898 }, { "epoch": 3.2078103207810322, "grad_norm": 2.3199408054351807, "learning_rate": 2.1089138373994196e-05, "loss": 0.1743, "step": 6900 }, { "epoch": 3.2087401208740123, "grad_norm": 2.105114459991455, "learning_rate": 2.1166730406575947e-05, "loss": 0.1755, "step": 6902 }, { "epoch": 3.2096699209669923, "grad_norm": 2.0009098052978516, "learning_rate": 2.1244360271981077e-05, "loss": 0.1636, "step": 6904 }, { "epoch": 3.210599721059972, "grad_norm": 2.6536614894866943, "learning_rate": 2.1322027204034063e-05, "loss": 0.2112, "step": 6906 }, { "epoch": 3.211529521152952, "grad_norm": 2.6943092346191406, "learning_rate": 2.139973043619369e-05, "loss": 0.2742, "step": 6908 }, { "epoch": 3.212459321245932, "grad_norm": 2.274153232574463, "learning_rate": 2.1477469201560422e-05, "loss": 0.2678, "step": 6910 }, { "epoch": 3.213389121338912, "grad_norm": 3.393389940261841, "learning_rate": 2.1555242732884034e-05, "loss": 0.3176, "step": 6912 }, { "epoch": 3.214318921431892, "grad_norm": 2.2305307388305664, "learning_rate": 2.163305026257117e-05, "loss": 0.2272, "step": 6914 }, { "epoch": 3.215248721524872, "grad_norm": 2.4690747261047363, "learning_rate": 2.1710891022692915e-05, "loss": 0.2097, "step": 6916 }, { "epoch": 3.2161785216178522, "grad_norm": 1.9967762231826782, "learning_rate": 2.1788764244992447e-05, "loss": 0.2136, "step": 6918 }, { "epoch": 3.2171083217108323, "grad_norm": 2.404855966567993, "learning_rate": 2.186666916089241e-05, "loss": 0.1806, "step": 6920 }, { "epoch": 3.2180381218038123, "grad_norm": 2.6674113273620605, "learning_rate": 2.1944605001502735e-05, "loss": 0.3292, "step": 6922 }, { "epoch": 3.2189679218967924, "grad_norm": 2.561439037322998, "learning_rate": 2.2022570997628263e-05, "loss": 0.29, "step": 6924 }, { "epoch": 3.2198977219897724, "grad_norm": 2.9270918369293213, "learning_rate": 2.2100566379775967e-05, "loss": 0.2304, "step": 6926 }, { "epoch": 3.220827522082752, "grad_norm": 2.3871514797210693, "learning_rate": 2.217859037816296e-05, "loss": 0.169, "step": 6928 }, { "epoch": 3.221757322175732, "grad_norm": 1.9491078853607178, "learning_rate": 2.2256642222723913e-05, "loss": 0.1604, "step": 6930 }, { "epoch": 3.222687122268712, "grad_norm": 2.956843376159668, "learning_rate": 2.23347211431185e-05, "loss": 0.2131, "step": 6932 }, { "epoch": 3.223616922361692, "grad_norm": 2.7098565101623535, "learning_rate": 2.2412826368739437e-05, "loss": 0.2582, "step": 6934 }, { "epoch": 3.224546722454672, "grad_norm": 2.184826374053955, "learning_rate": 2.2490957128719624e-05, "loss": 0.1795, "step": 6936 }, { "epoch": 3.2254765225476523, "grad_norm": 3.0739214420318604, "learning_rate": 2.2569112651940053e-05, "loss": 0.2897, "step": 6938 }, { "epoch": 3.2264063226406323, "grad_norm": 2.4494807720184326, "learning_rate": 2.2647292167037175e-05, "loss": 0.2494, "step": 6940 }, { "epoch": 3.2273361227336124, "grad_norm": 1.8986506462097168, "learning_rate": 2.272549490241078e-05, "loss": 0.2397, "step": 6942 }, { "epoch": 3.2282659228265924, "grad_norm": 2.7775259017944336, "learning_rate": 2.2803720086231442e-05, "loss": 0.1936, "step": 6944 }, { "epoch": 3.2291957229195725, "grad_norm": 2.471625328063965, "learning_rate": 2.288196694644818e-05, "loss": 0.2339, "step": 6946 }, { "epoch": 3.2301255230125525, "grad_norm": 2.9339821338653564, "learning_rate": 2.2960234710796077e-05, "loss": 0.2928, "step": 6948 }, { "epoch": 3.231055323105532, "grad_norm": 2.2107038497924805, "learning_rate": 2.303852260680389e-05, "loss": 0.2163, "step": 6950 }, { "epoch": 3.231985123198512, "grad_norm": 2.254758596420288, "learning_rate": 2.3116829861801737e-05, "loss": 0.2365, "step": 6952 }, { "epoch": 3.232914923291492, "grad_norm": 2.9028732776641846, "learning_rate": 2.319515570292849e-05, "loss": 0.2885, "step": 6954 }, { "epoch": 3.2338447233844723, "grad_norm": 2.2561752796173096, "learning_rate": 2.3273499357139852e-05, "loss": 0.222, "step": 6956 }, { "epoch": 3.2347745234774523, "grad_norm": 1.9567325115203857, "learning_rate": 2.3351860051215593e-05, "loss": 0.155, "step": 6958 }, { "epoch": 3.2357043235704324, "grad_norm": 2.953479528427124, "learning_rate": 2.3430237011767194e-05, "loss": 0.2707, "step": 6960 }, { "epoch": 3.2366341236634124, "grad_norm": 2.162062406539917, "learning_rate": 2.3508629465245758e-05, "loss": 0.1858, "step": 6962 }, { "epoch": 3.2375639237563925, "grad_norm": 1.995719313621521, "learning_rate": 2.3587036637949452e-05, "loss": 0.1792, "step": 6964 }, { "epoch": 3.2384937238493725, "grad_norm": 2.8183436393737793, "learning_rate": 2.366545775603101e-05, "loss": 0.2453, "step": 6966 }, { "epoch": 3.2394235239423526, "grad_norm": 2.416067123413086, "learning_rate": 2.374389204550578e-05, "loss": 0.2238, "step": 6968 }, { "epoch": 3.2403533240353326, "grad_norm": 2.214883804321289, "learning_rate": 2.3822338732258944e-05, "loss": 0.2747, "step": 6970 }, { "epoch": 3.241283124128312, "grad_norm": 3.901935577392578, "learning_rate": 2.390079704205343e-05, "loss": 0.336, "step": 6972 }, { "epoch": 3.2422129242212923, "grad_norm": 2.8714559078216553, "learning_rate": 2.3979266200537296e-05, "loss": 0.2221, "step": 6974 }, { "epoch": 3.2431427243142723, "grad_norm": 2.355537176132202, "learning_rate": 2.405774543325163e-05, "loss": 0.2781, "step": 6976 }, { "epoch": 3.2440725244072524, "grad_norm": 2.61814284324646, "learning_rate": 2.4136233965638225e-05, "loss": 0.2212, "step": 6978 }, { "epoch": 3.2450023245002324, "grad_norm": 1.8765755891799927, "learning_rate": 2.4214731023046823e-05, "loss": 0.2081, "step": 6980 }, { "epoch": 3.2459321245932125, "grad_norm": 2.1030330657958984, "learning_rate": 2.4293235830743195e-05, "loss": 0.2053, "step": 6982 }, { "epoch": 3.2468619246861925, "grad_norm": 2.4744415283203125, "learning_rate": 2.4371747613916583e-05, "loss": 0.2575, "step": 6984 }, { "epoch": 3.2477917247791726, "grad_norm": 2.4267163276672363, "learning_rate": 2.445026559768743e-05, "loss": 0.2188, "step": 6986 }, { "epoch": 3.2487215248721526, "grad_norm": 2.6385257244110107, "learning_rate": 2.45287890071148e-05, "loss": 0.2741, "step": 6988 }, { "epoch": 3.2496513249651326, "grad_norm": 3.2177248001098633, "learning_rate": 2.4607317067204482e-05, "loss": 0.3014, "step": 6990 }, { "epoch": 3.2505811250581127, "grad_norm": 1.885506510734558, "learning_rate": 2.4685849002916223e-05, "loss": 0.2128, "step": 6992 }, { "epoch": 3.2515109251510923, "grad_norm": 3.012969493865967, "learning_rate": 2.476438403917143e-05, "loss": 0.2986, "step": 6994 }, { "epoch": 3.2524407252440724, "grad_norm": 2.0069780349731445, "learning_rate": 2.484292140086105e-05, "loss": 0.2038, "step": 6996 }, { "epoch": 3.2533705253370524, "grad_norm": 2.5208170413970947, "learning_rate": 2.492146031285298e-05, "loss": 0.247, "step": 6998 }, { "epoch": 3.2543003254300324, "grad_norm": 2.3664653301239014, "learning_rate": 2.5000000000000008e-05, "loss": 0.2535, "step": 7000 }, { "epoch": 3.2543003254300324, "eval_cer": 0.2398834698388766, "eval_loss": 0.36875680088996887, "eval_runtime": 394.841, "eval_samples_per_second": 32.15, "eval_steps_per_second": 1.005, "step": 7000 }, { "epoch": 3.2552301255230125, "grad_norm": 2.112523078918457, "learning_rate": 2.5078539687146992e-05, "loss": 0.2152, "step": 7002 }, { "epoch": 3.2561599256159925, "grad_norm": 3.462662935256958, "learning_rate": 2.5157078599138974e-05, "loss": 0.2161, "step": 7004 }, { "epoch": 3.2570897257089726, "grad_norm": 2.2364706993103027, "learning_rate": 2.5235615960828633e-05, "loss": 0.2727, "step": 7006 }, { "epoch": 3.2580195258019526, "grad_norm": 2.4228878021240234, "learning_rate": 2.5314150997083847e-05, "loss": 0.2326, "step": 7008 }, { "epoch": 3.2589493258949327, "grad_norm": 2.5349996089935303, "learning_rate": 2.53926829327955e-05, "loss": 0.2365, "step": 7010 }, { "epoch": 3.2598791259879127, "grad_norm": 2.285834550857544, "learning_rate": 2.547121099288519e-05, "loss": 0.2555, "step": 7012 }, { "epoch": 3.260808926080893, "grad_norm": 2.914050817489624, "learning_rate": 2.5549734402312654e-05, "loss": 0.2267, "step": 7014 }, { "epoch": 3.2617387261738724, "grad_norm": 2.2424418926239014, "learning_rate": 2.5628252386083457e-05, "loss": 0.2153, "step": 7016 }, { "epoch": 3.2626685262668524, "grad_norm": 2.788679599761963, "learning_rate": 2.570676416925685e-05, "loss": 0.265, "step": 7018 }, { "epoch": 3.2635983263598325, "grad_norm": 2.83636474609375, "learning_rate": 2.5785268976953217e-05, "loss": 0.3336, "step": 7020 }, { "epoch": 3.2645281264528125, "grad_norm": 1.8348052501678467, "learning_rate": 2.5863766034361814e-05, "loss": 0.1946, "step": 7022 }, { "epoch": 3.2654579265457926, "grad_norm": 2.1653177738189697, "learning_rate": 2.594225456674837e-05, "loss": 0.194, "step": 7024 }, { "epoch": 3.2663877266387726, "grad_norm": 3.2028396129608154, "learning_rate": 2.6020733799462794e-05, "loss": 0.2425, "step": 7026 }, { "epoch": 3.2673175267317527, "grad_norm": 2.641892194747925, "learning_rate": 2.6099202957946665e-05, "loss": 0.273, "step": 7028 }, { "epoch": 3.2682473268247327, "grad_norm": 2.4644923210144043, "learning_rate": 2.6177661267741102e-05, "loss": 0.2509, "step": 7030 }, { "epoch": 3.269177126917713, "grad_norm": 2.302143096923828, "learning_rate": 2.625610795449423e-05, "loss": 0.2089, "step": 7032 }, { "epoch": 3.270106927010693, "grad_norm": 2.825207233428955, "learning_rate": 2.6334542243969038e-05, "loss": 0.1901, "step": 7034 }, { "epoch": 3.271036727103673, "grad_norm": 2.3540823459625244, "learning_rate": 2.6412963362050638e-05, "loss": 0.3082, "step": 7036 }, { "epoch": 3.2719665271966525, "grad_norm": 2.296922206878662, "learning_rate": 2.6491370534754295e-05, "loss": 0.2627, "step": 7038 }, { "epoch": 3.272896327289633, "grad_norm": 1.88584303855896, "learning_rate": 2.65697629882329e-05, "loss": 0.1754, "step": 7040 }, { "epoch": 3.2738261273826126, "grad_norm": 3.204287052154541, "learning_rate": 2.6648139948784464e-05, "loss": 0.215, "step": 7042 }, { "epoch": 3.2747559274755926, "grad_norm": 2.169440507888794, "learning_rate": 2.672650064286016e-05, "loss": 0.2042, "step": 7044 }, { "epoch": 3.2756857275685727, "grad_norm": 2.2963271141052246, "learning_rate": 2.6804844297071522e-05, "loss": 0.2492, "step": 7046 }, { "epoch": 3.2766155276615527, "grad_norm": 2.8539583683013916, "learning_rate": 2.6883170138198367e-05, "loss": 0.2736, "step": 7048 }, { "epoch": 3.2775453277545328, "grad_norm": 2.3066298961639404, "learning_rate": 2.696147739319617e-05, "loss": 0.2788, "step": 7050 }, { "epoch": 3.278475127847513, "grad_norm": 2.789804458618164, "learning_rate": 2.7039765289203976e-05, "loss": 0.2007, "step": 7052 }, { "epoch": 3.279404927940493, "grad_norm": 2.444276809692383, "learning_rate": 2.7118033053551866e-05, "loss": 0.1875, "step": 7054 }, { "epoch": 3.280334728033473, "grad_norm": 2.5689096450805664, "learning_rate": 2.7196279913768608e-05, "loss": 0.2697, "step": 7056 }, { "epoch": 3.281264528126453, "grad_norm": 2.1759066581726074, "learning_rate": 2.7274505097589273e-05, "loss": 0.2354, "step": 7058 }, { "epoch": 3.2821943282194326, "grad_norm": 2.494400978088379, "learning_rate": 2.735270783296288e-05, "loss": 0.2997, "step": 7060 }, { "epoch": 3.283124128312413, "grad_norm": 2.467341423034668, "learning_rate": 2.7430887348060037e-05, "loss": 0.2659, "step": 7062 }, { "epoch": 3.2840539284053927, "grad_norm": 3.326413631439209, "learning_rate": 2.750904287128038e-05, "loss": 0.3264, "step": 7064 }, { "epoch": 3.2849837284983727, "grad_norm": 3.513854742050171, "learning_rate": 2.758717363126057e-05, "loss": 0.2825, "step": 7066 }, { "epoch": 3.2859135285913528, "grad_norm": 2.4390838146209717, "learning_rate": 2.7665278856881542e-05, "loss": 0.2021, "step": 7068 }, { "epoch": 3.286843328684333, "grad_norm": 2.5773229598999023, "learning_rate": 2.774335777727617e-05, "loss": 0.2631, "step": 7070 }, { "epoch": 3.287773128777313, "grad_norm": 2.145435094833374, "learning_rate": 2.7821409621837074e-05, "loss": 0.1961, "step": 7072 }, { "epoch": 3.288702928870293, "grad_norm": 2.238412857055664, "learning_rate": 2.7899433620224107e-05, "loss": 0.2004, "step": 7074 }, { "epoch": 3.289632728963273, "grad_norm": 2.5317165851593018, "learning_rate": 2.7977429002371767e-05, "loss": 0.2211, "step": 7076 }, { "epoch": 3.290562529056253, "grad_norm": 2.3979849815368652, "learning_rate": 2.8055394998497257e-05, "loss": 0.1964, "step": 7078 }, { "epoch": 3.291492329149233, "grad_norm": 2.0729217529296875, "learning_rate": 2.8133330839107622e-05, "loss": 0.1773, "step": 7080 }, { "epoch": 3.292422129242213, "grad_norm": 3.4375858306884766, "learning_rate": 2.8211235755007626e-05, "loss": 0.2857, "step": 7082 }, { "epoch": 3.293351929335193, "grad_norm": 2.7160537242889404, "learning_rate": 2.8289108977307114e-05, "loss": 0.2731, "step": 7084 }, { "epoch": 3.2942817294281728, "grad_norm": 2.5687673091888428, "learning_rate": 2.836694973742881e-05, "loss": 0.3068, "step": 7086 }, { "epoch": 3.295211529521153, "grad_norm": 1.9796570539474487, "learning_rate": 2.844475726711599e-05, "loss": 0.1672, "step": 7088 }, { "epoch": 3.296141329614133, "grad_norm": 2.656562566757202, "learning_rate": 2.85225307984396e-05, "loss": 0.2333, "step": 7090 }, { "epoch": 3.297071129707113, "grad_norm": 1.8316131830215454, "learning_rate": 2.860026956380633e-05, "loss": 0.2261, "step": 7092 }, { "epoch": 3.298000929800093, "grad_norm": 2.3140783309936523, "learning_rate": 2.8677972795965957e-05, "loss": 0.2849, "step": 7094 }, { "epoch": 3.298930729893073, "grad_norm": 2.4429593086242676, "learning_rate": 2.8755639728018983e-05, "loss": 0.2762, "step": 7096 }, { "epoch": 3.299860529986053, "grad_norm": 2.33827805519104, "learning_rate": 2.8833269593424025e-05, "loss": 0.1904, "step": 7098 }, { "epoch": 3.300790330079033, "grad_norm": 2.6243340969085693, "learning_rate": 2.8910861626005776e-05, "loss": 0.2287, "step": 7100 }, { "epoch": 3.301720130172013, "grad_norm": 2.6949169635772705, "learning_rate": 2.8988415059962197e-05, "loss": 0.2731, "step": 7102 }, { "epoch": 3.302649930264993, "grad_norm": 2.4754810333251953, "learning_rate": 2.9065929129872128e-05, "loss": 0.2472, "step": 7104 }, { "epoch": 3.3035797303579733, "grad_norm": 2.9589099884033203, "learning_rate": 2.9143403070703027e-05, "loss": 0.25, "step": 7106 }, { "epoch": 3.304509530450953, "grad_norm": 2.039874792098999, "learning_rate": 2.922083611781833e-05, "loss": 0.1767, "step": 7108 }, { "epoch": 3.305439330543933, "grad_norm": 3.2736918926239014, "learning_rate": 2.929822750698526e-05, "loss": 0.3392, "step": 7110 }, { "epoch": 3.306369130636913, "grad_norm": 3.1339199542999268, "learning_rate": 2.9375576474381918e-05, "loss": 0.3027, "step": 7112 }, { "epoch": 3.307298930729893, "grad_norm": 2.6410250663757324, "learning_rate": 2.9452882256605268e-05, "loss": 0.3085, "step": 7114 }, { "epoch": 3.308228730822873, "grad_norm": 2.416799545288086, "learning_rate": 2.953014409067849e-05, "loss": 0.2372, "step": 7116 }, { "epoch": 3.309158530915853, "grad_norm": 2.5196447372436523, "learning_rate": 2.9607361214058386e-05, "loss": 0.2567, "step": 7118 }, { "epoch": 3.310088331008833, "grad_norm": 2.9457242488861084, "learning_rate": 2.9684532864643126e-05, "loss": 0.2902, "step": 7120 }, { "epoch": 3.311018131101813, "grad_norm": 2.509909152984619, "learning_rate": 2.9761658280779803e-05, "loss": 0.2073, "step": 7122 }, { "epoch": 3.3119479311947932, "grad_norm": 2.584292411804199, "learning_rate": 2.9838736701271555e-05, "loss": 0.2239, "step": 7124 }, { "epoch": 3.3128777312877733, "grad_norm": 2.6596922874450684, "learning_rate": 2.991576736538554e-05, "loss": 0.2683, "step": 7126 }, { "epoch": 3.3138075313807533, "grad_norm": 1.9752849340438843, "learning_rate": 2.999274951286021e-05, "loss": 0.2364, "step": 7128 }, { "epoch": 3.314737331473733, "grad_norm": 3.09576416015625, "learning_rate": 3.0069682383912837e-05, "loss": 0.2817, "step": 7130 }, { "epoch": 3.315667131566713, "grad_norm": 3.1115376949310303, "learning_rate": 3.0146565219247053e-05, "loss": 0.2649, "step": 7132 }, { "epoch": 3.316596931659693, "grad_norm": 2.426393985748291, "learning_rate": 3.0223397260060305e-05, "loss": 0.2672, "step": 7134 }, { "epoch": 3.317526731752673, "grad_norm": 2.649282932281494, "learning_rate": 3.0300177748051424e-05, "loss": 0.2072, "step": 7136 }, { "epoch": 3.318456531845653, "grad_norm": 2.5303456783294678, "learning_rate": 3.0376905925427887e-05, "loss": 0.2916, "step": 7138 }, { "epoch": 3.319386331938633, "grad_norm": 2.5497167110443115, "learning_rate": 3.0453581034913615e-05, "loss": 0.179, "step": 7140 }, { "epoch": 3.3203161320316132, "grad_norm": 2.0302817821502686, "learning_rate": 3.053020231975618e-05, "loss": 0.2248, "step": 7142 }, { "epoch": 3.3212459321245933, "grad_norm": 2.3964858055114746, "learning_rate": 3.060676902373456e-05, "loss": 0.2571, "step": 7144 }, { "epoch": 3.3221757322175733, "grad_norm": 2.499572992324829, "learning_rate": 3.068328039116619e-05, "loss": 0.2263, "step": 7146 }, { "epoch": 3.3231055323105534, "grad_norm": 2.7419142723083496, "learning_rate": 3.075973566691479e-05, "loss": 0.3152, "step": 7148 }, { "epoch": 3.3240353324035334, "grad_norm": 2.032805919647217, "learning_rate": 3.08361340963977e-05, "loss": 0.2168, "step": 7150 }, { "epoch": 3.324965132496513, "grad_norm": 1.9383342266082764, "learning_rate": 3.0912474925593134e-05, "loss": 0.1696, "step": 7152 }, { "epoch": 3.325894932589493, "grad_norm": 2.8124237060546875, "learning_rate": 3.098875740104806e-05, "loss": 0.217, "step": 7154 }, { "epoch": 3.326824732682473, "grad_norm": 2.2443127632141113, "learning_rate": 3.1064980769885187e-05, "loss": 0.1703, "step": 7156 }, { "epoch": 3.327754532775453, "grad_norm": 2.0369818210601807, "learning_rate": 3.11411442798107e-05, "loss": 0.175, "step": 7158 }, { "epoch": 3.3286843328684332, "grad_norm": 2.0681650638580322, "learning_rate": 3.12172471791214e-05, "loss": 0.2582, "step": 7160 }, { "epoch": 3.3296141329614133, "grad_norm": 2.345107316970825, "learning_rate": 3.129328871671246e-05, "loss": 0.2168, "step": 7162 }, { "epoch": 3.3305439330543933, "grad_norm": 2.2967939376831055, "learning_rate": 3.136926814208458e-05, "loss": 0.1802, "step": 7164 }, { "epoch": 3.3314737331473734, "grad_norm": 2.778308868408203, "learning_rate": 3.144518470535151e-05, "loss": 0.2482, "step": 7166 }, { "epoch": 3.3324035332403534, "grad_norm": 3.353341579437256, "learning_rate": 3.1521037657247436e-05, "loss": 0.269, "step": 7168 }, { "epoch": 3.3333333333333335, "grad_norm": 2.5416548252105713, "learning_rate": 3.159682624913437e-05, "loss": 0.2911, "step": 7170 }, { "epoch": 3.3342631334263135, "grad_norm": 2.7830138206481934, "learning_rate": 3.167254973300944e-05, "loss": 0.2791, "step": 7172 }, { "epoch": 3.335192933519293, "grad_norm": 2.842345952987671, "learning_rate": 3.17482073615124e-05, "loss": 0.2622, "step": 7174 }, { "epoch": 3.336122733612273, "grad_norm": 3.046074867248535, "learning_rate": 3.182379838793312e-05, "loss": 0.2485, "step": 7176 }, { "epoch": 3.3370525337052532, "grad_norm": 2.545703649520874, "learning_rate": 3.189932206621867e-05, "loss": 0.2286, "step": 7178 }, { "epoch": 3.3379823337982333, "grad_norm": 2.3410263061523438, "learning_rate": 3.1974777650980756e-05, "loss": 0.2577, "step": 7180 }, { "epoch": 3.3389121338912133, "grad_norm": 2.570188283920288, "learning_rate": 3.205016439750325e-05, "loss": 0.2378, "step": 7182 }, { "epoch": 3.3398419339841934, "grad_norm": 1.7948036193847656, "learning_rate": 3.212548156174946e-05, "loss": 0.1914, "step": 7184 }, { "epoch": 3.3407717340771734, "grad_norm": 3.2963473796844482, "learning_rate": 3.220072840036925e-05, "loss": 0.3769, "step": 7186 }, { "epoch": 3.3417015341701535, "grad_norm": 2.6306636333465576, "learning_rate": 3.22759041707068e-05, "loss": 0.2733, "step": 7188 }, { "epoch": 3.3426313342631335, "grad_norm": 2.3158411979675293, "learning_rate": 3.23510081308076e-05, "loss": 0.254, "step": 7190 }, { "epoch": 3.3435611343561136, "grad_norm": 2.359321355819702, "learning_rate": 3.2426039539425917e-05, "loss": 0.2644, "step": 7192 }, { "epoch": 3.3444909344490936, "grad_norm": 2.7072908878326416, "learning_rate": 3.250099765603194e-05, "loss": 0.2224, "step": 7194 }, { "epoch": 3.3454207345420732, "grad_norm": 2.57844614982605, "learning_rate": 3.257588174081934e-05, "loss": 0.2158, "step": 7196 }, { "epoch": 3.3463505346350533, "grad_norm": 3.17403244972229, "learning_rate": 3.265069105471254e-05, "loss": 0.2656, "step": 7198 }, { "epoch": 3.3472803347280333, "grad_norm": 3.435903310775757, "learning_rate": 3.2725424859373705e-05, "loss": 0.3301, "step": 7200 }, { "epoch": 3.3482101348210134, "grad_norm": 3.4468557834625244, "learning_rate": 3.280008241721039e-05, "loss": 0.2565, "step": 7202 }, { "epoch": 3.3491399349139934, "grad_norm": 3.1903131008148193, "learning_rate": 3.287466299138263e-05, "loss": 0.2066, "step": 7204 }, { "epoch": 3.3500697350069735, "grad_norm": 3.1396515369415283, "learning_rate": 3.2949165845810323e-05, "loss": 0.2861, "step": 7206 }, { "epoch": 3.3509995350999535, "grad_norm": 2.919546365737915, "learning_rate": 3.302359024518024e-05, "loss": 0.2187, "step": 7208 }, { "epoch": 3.3519293351929336, "grad_norm": 2.625387668609619, "learning_rate": 3.309793545495373e-05, "loss": 0.185, "step": 7210 }, { "epoch": 3.3528591352859136, "grad_norm": 2.5149104595184326, "learning_rate": 3.31722007413736e-05, "loss": 0.2424, "step": 7212 }, { "epoch": 3.3537889353788937, "grad_norm": 2.618756055831909, "learning_rate": 3.3246385371471354e-05, "loss": 0.2528, "step": 7214 }, { "epoch": 3.3547187354718737, "grad_norm": 2.5202858448028564, "learning_rate": 3.332048861307469e-05, "loss": 0.2334, "step": 7216 }, { "epoch": 3.3556485355648533, "grad_norm": 3.018610954284668, "learning_rate": 3.339450973481453e-05, "loss": 0.2216, "step": 7218 }, { "epoch": 3.356578335657834, "grad_norm": 2.764427423477173, "learning_rate": 3.34684480061323e-05, "loss": 0.2166, "step": 7220 }, { "epoch": 3.3575081357508134, "grad_norm": 2.5573506355285645, "learning_rate": 3.3542302697287095e-05, "loss": 0.2405, "step": 7222 }, { "epoch": 3.3584379358437935, "grad_norm": 3.4644389152526855, "learning_rate": 3.361607307936293e-05, "loss": 0.2944, "step": 7224 }, { "epoch": 3.3593677359367735, "grad_norm": 3.387708902359009, "learning_rate": 3.3689758424275966e-05, "loss": 0.3099, "step": 7226 }, { "epoch": 3.3602975360297536, "grad_norm": 2.6980652809143066, "learning_rate": 3.3763358004781516e-05, "loss": 0.2439, "step": 7228 }, { "epoch": 3.3612273361227336, "grad_norm": 2.0834836959838867, "learning_rate": 3.383687109448142e-05, "loss": 0.2206, "step": 7230 }, { "epoch": 3.3621571362157137, "grad_norm": 3.4700427055358887, "learning_rate": 3.391029696783129e-05, "loss": 0.2579, "step": 7232 }, { "epoch": 3.3630869363086937, "grad_norm": 2.6298437118530273, "learning_rate": 3.39836349001473e-05, "loss": 0.2173, "step": 7234 }, { "epoch": 3.3640167364016738, "grad_norm": 2.771068572998047, "learning_rate": 3.405688416761367e-05, "loss": 0.2451, "step": 7236 }, { "epoch": 3.364946536494654, "grad_norm": 2.7032060623168945, "learning_rate": 3.413004404728972e-05, "loss": 0.2718, "step": 7238 }, { "epoch": 3.3658763365876334, "grad_norm": 2.925827741622925, "learning_rate": 3.420311381711698e-05, "loss": 0.2647, "step": 7240 }, { "epoch": 3.366806136680614, "grad_norm": 2.3378682136535645, "learning_rate": 3.42760927559263e-05, "loss": 0.2103, "step": 7242 }, { "epoch": 3.3677359367735935, "grad_norm": 3.035954475402832, "learning_rate": 3.4348980143445034e-05, "loss": 0.2403, "step": 7244 }, { "epoch": 3.3686657368665736, "grad_norm": 2.3481860160827637, "learning_rate": 3.442177526030412e-05, "loss": 0.2422, "step": 7246 }, { "epoch": 3.3695955369595536, "grad_norm": 2.554669141769409, "learning_rate": 3.4494477388045075e-05, "loss": 0.2664, "step": 7248 }, { "epoch": 3.3705253370525337, "grad_norm": 2.530829906463623, "learning_rate": 3.456708580912729e-05, "loss": 0.2689, "step": 7250 }, { "epoch": 3.3714551371455137, "grad_norm": 3.2383153438568115, "learning_rate": 3.463959980693492e-05, "loss": 0.2261, "step": 7252 }, { "epoch": 3.3723849372384938, "grad_norm": 1.6688823699951172, "learning_rate": 3.47120186657842e-05, "loss": 0.1674, "step": 7254 }, { "epoch": 3.373314737331474, "grad_norm": 2.457953691482544, "learning_rate": 3.4784341670930105e-05, "loss": 0.248, "step": 7256 }, { "epoch": 3.374244537424454, "grad_norm": 3.4577701091766357, "learning_rate": 3.4856568108573815e-05, "loss": 0.3154, "step": 7258 }, { "epoch": 3.375174337517434, "grad_norm": 3.3921287059783936, "learning_rate": 3.492869726586959e-05, "loss": 0.2722, "step": 7260 }, { "epoch": 3.376104137610414, "grad_norm": 1.9629331827163696, "learning_rate": 3.5000728430931646e-05, "loss": 0.1985, "step": 7262 }, { "epoch": 3.377033937703394, "grad_norm": 2.418720245361328, "learning_rate": 3.5072660892841584e-05, "loss": 0.2231, "step": 7264 }, { "epoch": 3.3779637377963736, "grad_norm": 2.8350701332092285, "learning_rate": 3.514449394165506e-05, "loss": 0.219, "step": 7266 }, { "epoch": 3.3788935378893536, "grad_norm": 2.7528653144836426, "learning_rate": 3.5216226868408794e-05, "loss": 0.1993, "step": 7268 }, { "epoch": 3.3798233379823337, "grad_norm": 2.9944396018981934, "learning_rate": 3.528785896512779e-05, "loss": 0.2473, "step": 7270 }, { "epoch": 3.3807531380753137, "grad_norm": 2.640742063522339, "learning_rate": 3.535938952483217e-05, "loss": 0.1969, "step": 7272 }, { "epoch": 3.381682938168294, "grad_norm": 3.138248920440674, "learning_rate": 3.5430817841544194e-05, "loss": 0.2775, "step": 7274 }, { "epoch": 3.382612738261274, "grad_norm": 2.938450574874878, "learning_rate": 3.550214321029522e-05, "loss": 0.3059, "step": 7276 }, { "epoch": 3.383542538354254, "grad_norm": 2.2703189849853516, "learning_rate": 3.557336492713264e-05, "loss": 0.1913, "step": 7278 }, { "epoch": 3.384472338447234, "grad_norm": 3.0515904426574707, "learning_rate": 3.564448228912691e-05, "loss": 0.2769, "step": 7280 }, { "epoch": 3.385402138540214, "grad_norm": 2.5958216190338135, "learning_rate": 3.57154945943783e-05, "loss": 0.2108, "step": 7282 }, { "epoch": 3.386331938633194, "grad_norm": 2.904259204864502, "learning_rate": 3.578640114202402e-05, "loss": 0.2666, "step": 7284 }, { "epoch": 3.387261738726174, "grad_norm": 3.3591253757476807, "learning_rate": 3.5857201232245154e-05, "loss": 0.2888, "step": 7286 }, { "epoch": 3.3881915388191537, "grad_norm": 2.065072536468506, "learning_rate": 3.59278941662734e-05, "loss": 0.2177, "step": 7288 }, { "epoch": 3.3891213389121337, "grad_norm": 2.4118611812591553, "learning_rate": 3.599847924639795e-05, "loss": 0.1996, "step": 7290 }, { "epoch": 3.390051139005114, "grad_norm": 2.919332981109619, "learning_rate": 3.60689557759726e-05, "loss": 0.2087, "step": 7292 }, { "epoch": 3.390980939098094, "grad_norm": 2.5537478923797607, "learning_rate": 3.613932305942251e-05, "loss": 0.2122, "step": 7294 }, { "epoch": 3.391910739191074, "grad_norm": 3.2673826217651367, "learning_rate": 3.620958040225087e-05, "loss": 0.3182, "step": 7296 }, { "epoch": 3.392840539284054, "grad_norm": 3.3689563274383545, "learning_rate": 3.627972711104619e-05, "loss": 0.2276, "step": 7298 }, { "epoch": 3.393770339377034, "grad_norm": 2.931164026260376, "learning_rate": 3.6349762493488725e-05, "loss": 0.3614, "step": 7300 }, { "epoch": 3.394700139470014, "grad_norm": 3.2133781909942627, "learning_rate": 3.641968585835757e-05, "loss": 0.2543, "step": 7302 }, { "epoch": 3.395629939562994, "grad_norm": 2.787853479385376, "learning_rate": 3.648949651553728e-05, "loss": 0.2685, "step": 7304 }, { "epoch": 3.396559739655974, "grad_norm": 2.2310853004455566, "learning_rate": 3.655919377602483e-05, "loss": 0.259, "step": 7306 }, { "epoch": 3.397489539748954, "grad_norm": 2.3981592655181885, "learning_rate": 3.662877695193653e-05, "loss": 0.2436, "step": 7308 }, { "epoch": 3.398419339841934, "grad_norm": 2.325932025909424, "learning_rate": 3.669824535651441e-05, "loss": 0.2557, "step": 7310 }, { "epoch": 3.399349139934914, "grad_norm": 2.3893299102783203, "learning_rate": 3.6767598304133385e-05, "loss": 0.2352, "step": 7312 }, { "epoch": 3.400278940027894, "grad_norm": 2.3204755783081055, "learning_rate": 3.68368351103079e-05, "loss": 0.2798, "step": 7314 }, { "epoch": 3.401208740120874, "grad_norm": 2.97128963470459, "learning_rate": 3.6905955091698586e-05, "loss": 0.2777, "step": 7316 }, { "epoch": 3.402138540213854, "grad_norm": 2.6210641860961914, "learning_rate": 3.6974957566119085e-05, "loss": 0.2632, "step": 7318 }, { "epoch": 3.403068340306834, "grad_norm": 2.548995018005371, "learning_rate": 3.704384185254294e-05, "loss": 0.2721, "step": 7320 }, { "epoch": 3.403998140399814, "grad_norm": 2.5693821907043457, "learning_rate": 3.711260727111005e-05, "loss": 0.2317, "step": 7322 }, { "epoch": 3.404927940492794, "grad_norm": 3.436734437942505, "learning_rate": 3.718125314313342e-05, "loss": 0.2536, "step": 7324 }, { "epoch": 3.405857740585774, "grad_norm": 2.542398691177368, "learning_rate": 3.724977879110602e-05, "loss": 0.2259, "step": 7326 }, { "epoch": 3.4067875406787542, "grad_norm": 3.0516245365142822, "learning_rate": 3.73181835387074e-05, "loss": 0.2498, "step": 7328 }, { "epoch": 3.4077173407717343, "grad_norm": 2.322134256362915, "learning_rate": 3.7386466710810295e-05, "loss": 0.2522, "step": 7330 }, { "epoch": 3.408647140864714, "grad_norm": 2.7680604457855225, "learning_rate": 3.7454627633487375e-05, "loss": 0.2454, "step": 7332 }, { "epoch": 3.409576940957694, "grad_norm": 3.1592659950256348, "learning_rate": 3.7522665634017846e-05, "loss": 0.2777, "step": 7334 }, { "epoch": 3.410506741050674, "grad_norm": 3.2801218032836914, "learning_rate": 3.7590580040894145e-05, "loss": 0.3058, "step": 7336 }, { "epoch": 3.411436541143654, "grad_norm": 3.5973763465881348, "learning_rate": 3.765837018382843e-05, "loss": 0.2805, "step": 7338 }, { "epoch": 3.412366341236634, "grad_norm": 3.1590750217437744, "learning_rate": 3.772603539375937e-05, "loss": 0.3161, "step": 7340 }, { "epoch": 3.413296141329614, "grad_norm": 2.817110061645508, "learning_rate": 3.779357500285874e-05, "loss": 0.2903, "step": 7342 }, { "epoch": 3.414225941422594, "grad_norm": 2.5640411376953125, "learning_rate": 3.786098834453778e-05, "loss": 0.2498, "step": 7344 }, { "epoch": 3.415155741515574, "grad_norm": 3.1352641582489014, "learning_rate": 3.7928274753454044e-05, "loss": 0.3213, "step": 7346 }, { "epoch": 3.4160855416085543, "grad_norm": 2.8634579181671143, "learning_rate": 3.799543356551784e-05, "loss": 0.2359, "step": 7348 }, { "epoch": 3.4170153417015343, "grad_norm": 3.2572262287139893, "learning_rate": 3.806246411789882e-05, "loss": 0.2309, "step": 7350 }, { "epoch": 3.4179451417945144, "grad_norm": 2.0427021980285645, "learning_rate": 3.812936574903249e-05, "loss": 0.2228, "step": 7352 }, { "epoch": 3.418874941887494, "grad_norm": 2.5354323387145996, "learning_rate": 3.8196137798626755e-05, "loss": 0.2176, "step": 7354 }, { "epoch": 3.419804741980474, "grad_norm": 2.7438745498657227, "learning_rate": 3.8262779607668475e-05, "loss": 0.2286, "step": 7356 }, { "epoch": 3.420734542073454, "grad_norm": 1.9494237899780273, "learning_rate": 3.832929051842983e-05, "loss": 0.2424, "step": 7358 }, { "epoch": 3.421664342166434, "grad_norm": 3.103689432144165, "learning_rate": 3.839566987447503e-05, "loss": 0.2289, "step": 7360 }, { "epoch": 3.422594142259414, "grad_norm": 4.043893337249756, "learning_rate": 3.846191702066662e-05, "loss": 0.2032, "step": 7362 }, { "epoch": 3.423523942352394, "grad_norm": 2.6920900344848633, "learning_rate": 3.8528031303172004e-05, "loss": 0.2128, "step": 7364 }, { "epoch": 3.4244537424453743, "grad_norm": 3.0780510902404785, "learning_rate": 3.859401206946992e-05, "loss": 0.3039, "step": 7366 }, { "epoch": 3.4253835425383543, "grad_norm": 2.7095589637756348, "learning_rate": 3.8659858668356826e-05, "loss": 0.256, "step": 7368 }, { "epoch": 3.4263133426313344, "grad_norm": 2.7334394454956055, "learning_rate": 3.872557044995343e-05, "loss": 0.2124, "step": 7370 }, { "epoch": 3.4272431427243144, "grad_norm": 1.6189643144607544, "learning_rate": 3.879114676571085e-05, "loss": 0.2399, "step": 7372 }, { "epoch": 3.4281729428172945, "grad_norm": 3.6284618377685547, "learning_rate": 3.8856586968417456e-05, "loss": 0.2885, "step": 7374 }, { "epoch": 3.429102742910274, "grad_norm": 3.2381014823913574, "learning_rate": 3.892189041220484e-05, "loss": 0.2158, "step": 7376 }, { "epoch": 3.430032543003254, "grad_norm": 3.2191176414489746, "learning_rate": 3.8987056452554306e-05, "loss": 0.2601, "step": 7378 }, { "epoch": 3.430962343096234, "grad_norm": 2.4879674911499023, "learning_rate": 3.905208444630339e-05, "loss": 0.3338, "step": 7380 }, { "epoch": 3.431892143189214, "grad_norm": 1.9941987991333008, "learning_rate": 3.911697375165205e-05, "loss": 0.2652, "step": 7382 }, { "epoch": 3.4328219432821943, "grad_norm": 2.675233840942383, "learning_rate": 3.9181723728169025e-05, "loss": 0.247, "step": 7384 }, { "epoch": 3.4337517433751743, "grad_norm": 3.502509117126465, "learning_rate": 3.9246333736798203e-05, "loss": 0.2983, "step": 7386 }, { "epoch": 3.4346815434681544, "grad_norm": 2.8796606063842773, "learning_rate": 3.931080313986487e-05, "loss": 0.27, "step": 7388 }, { "epoch": 3.4356113435611344, "grad_norm": 2.5728001594543457, "learning_rate": 3.9375131301082096e-05, "loss": 0.2386, "step": 7390 }, { "epoch": 3.4365411436541144, "grad_norm": 3.048290729522705, "learning_rate": 3.9439317585556824e-05, "loss": 0.2429, "step": 7392 }, { "epoch": 3.4374709437470945, "grad_norm": 2.827524423599243, "learning_rate": 3.9503361359796325e-05, "loss": 0.2856, "step": 7394 }, { "epoch": 3.4384007438400745, "grad_norm": 4.0454230308532715, "learning_rate": 3.9567261991714485e-05, "loss": 0.3694, "step": 7396 }, { "epoch": 3.439330543933054, "grad_norm": 2.2478630542755127, "learning_rate": 3.963101885063787e-05, "loss": 0.1967, "step": 7398 }, { "epoch": 3.4402603440260346, "grad_norm": 3.146052598953247, "learning_rate": 3.9694631307311945e-05, "loss": 0.2478, "step": 7400 }, { "epoch": 3.4411901441190142, "grad_norm": 2.1919894218444824, "learning_rate": 3.975809873390747e-05, "loss": 0.161, "step": 7402 }, { "epoch": 3.4421199442119943, "grad_norm": 2.821669101715088, "learning_rate": 3.9821420504026616e-05, "loss": 0.2748, "step": 7404 }, { "epoch": 3.4430497443049743, "grad_norm": 2.555262804031372, "learning_rate": 3.988459599270896e-05, "loss": 0.2741, "step": 7406 }, { "epoch": 3.4439795443979544, "grad_norm": 2.9550907611846924, "learning_rate": 3.9947624576438056e-05, "loss": 0.2184, "step": 7408 }, { "epoch": 3.4449093444909344, "grad_norm": 4.0812788009643555, "learning_rate": 4.001050563314722e-05, "loss": 0.2827, "step": 7410 }, { "epoch": 3.4458391445839145, "grad_norm": 3.201188802719116, "learning_rate": 4.007323854222573e-05, "loss": 0.2319, "step": 7412 }, { "epoch": 3.4467689446768945, "grad_norm": 2.5569422245025635, "learning_rate": 4.0135822684525146e-05, "loss": 0.2176, "step": 7414 }, { "epoch": 3.4476987447698746, "grad_norm": 3.3364455699920654, "learning_rate": 4.0198257442365206e-05, "loss": 0.2107, "step": 7416 }, { "epoch": 3.4486285448628546, "grad_norm": 2.3846538066864014, "learning_rate": 4.026054219954017e-05, "loss": 0.2065, "step": 7418 }, { "epoch": 3.4495583449558347, "grad_norm": 3.111433982849121, "learning_rate": 4.0322676341324524e-05, "loss": 0.2553, "step": 7420 }, { "epoch": 3.4504881450488147, "grad_norm": 2.889765501022339, "learning_rate": 4.03846592544794e-05, "loss": 0.2182, "step": 7422 }, { "epoch": 3.4514179451417943, "grad_norm": 3.172914505004883, "learning_rate": 4.044649032725851e-05, "loss": 0.2119, "step": 7424 }, { "epoch": 3.4523477452347744, "grad_norm": 3.0546891689300537, "learning_rate": 4.050816894941404e-05, "loss": 0.3012, "step": 7426 }, { "epoch": 3.4532775453277544, "grad_norm": 2.5359928607940674, "learning_rate": 4.056969451220291e-05, "loss": 0.2069, "step": 7428 }, { "epoch": 3.4542073454207345, "grad_norm": 2.3339779376983643, "learning_rate": 4.0631066408392726e-05, "loss": 0.2209, "step": 7430 }, { "epoch": 3.4551371455137145, "grad_norm": 2.0218725204467773, "learning_rate": 4.069228403226763e-05, "loss": 0.2888, "step": 7432 }, { "epoch": 3.4560669456066946, "grad_norm": 3.369561195373535, "learning_rate": 4.0753346779634353e-05, "loss": 0.2803, "step": 7434 }, { "epoch": 3.4569967456996746, "grad_norm": 2.5743467807769775, "learning_rate": 4.081425404782824e-05, "loss": 0.2428, "step": 7436 }, { "epoch": 3.4579265457926547, "grad_norm": 2.7917394638061523, "learning_rate": 4.087500523571914e-05, "loss": 0.2654, "step": 7438 }, { "epoch": 3.4588563458856347, "grad_norm": 2.504002571105957, "learning_rate": 4.093559974371736e-05, "loss": 0.2534, "step": 7440 }, { "epoch": 3.459786145978615, "grad_norm": 3.2829599380493164, "learning_rate": 4.0996036973779587e-05, "loss": 0.2435, "step": 7442 }, { "epoch": 3.460715946071595, "grad_norm": 2.785236120223999, "learning_rate": 4.105631632941473e-05, "loss": 0.2459, "step": 7444 }, { "epoch": 3.4616457461645744, "grad_norm": 2.177964925765991, "learning_rate": 4.111643721568993e-05, "loss": 0.1466, "step": 7446 }, { "epoch": 3.4625755462575545, "grad_norm": 2.858814239501953, "learning_rate": 4.117639903923625e-05, "loss": 0.2264, "step": 7448 }, { "epoch": 3.4635053463505345, "grad_norm": 2.8284292221069336, "learning_rate": 4.123620120825469e-05, "loss": 0.2858, "step": 7450 }, { "epoch": 3.4644351464435146, "grad_norm": 3.6408092975616455, "learning_rate": 4.12958431325221e-05, "loss": 0.2617, "step": 7452 }, { "epoch": 3.4653649465364946, "grad_norm": 3.075484275817871, "learning_rate": 4.1355324223396665e-05, "loss": 0.3536, "step": 7454 }, { "epoch": 3.4662947466294747, "grad_norm": 3.1220850944519043, "learning_rate": 4.141464389382405e-05, "loss": 0.3191, "step": 7456 }, { "epoch": 3.4672245467224547, "grad_norm": 1.9967854022979736, "learning_rate": 4.1473801558343084e-05, "loss": 0.2221, "step": 7458 }, { "epoch": 3.4681543468154348, "grad_norm": 2.6577000617980957, "learning_rate": 4.1532796633091424e-05, "loss": 0.1964, "step": 7460 }, { "epoch": 3.469084146908415, "grad_norm": 2.7803235054016113, "learning_rate": 4.159162853581159e-05, "loss": 0.3144, "step": 7462 }, { "epoch": 3.470013947001395, "grad_norm": 2.4577584266662598, "learning_rate": 4.165029668585642e-05, "loss": 0.2568, "step": 7464 }, { "epoch": 3.470943747094375, "grad_norm": 2.523275852203369, "learning_rate": 4.170880050419499e-05, "loss": 0.2029, "step": 7466 }, { "epoch": 3.4718735471873545, "grad_norm": 2.246753454208374, "learning_rate": 4.1767139413418156e-05, "loss": 0.1754, "step": 7468 }, { "epoch": 3.4728033472803346, "grad_norm": 2.622115135192871, "learning_rate": 4.182531283774449e-05, "loss": 0.2266, "step": 7470 }, { "epoch": 3.4737331473733146, "grad_norm": 2.452831983566284, "learning_rate": 4.188332020302576e-05, "loss": 0.2383, "step": 7472 }, { "epoch": 3.4746629474662947, "grad_norm": 2.9197235107421875, "learning_rate": 4.19411609367527e-05, "loss": 0.2833, "step": 7474 }, { "epoch": 3.4755927475592747, "grad_norm": 2.918656349182129, "learning_rate": 4.1998834468060616e-05, "loss": 0.236, "step": 7476 }, { "epoch": 3.4765225476522548, "grad_norm": 2.5428543090820312, "learning_rate": 4.205634022773505e-05, "loss": 0.2197, "step": 7478 }, { "epoch": 3.477452347745235, "grad_norm": 2.566795825958252, "learning_rate": 4.211367764821739e-05, "loss": 0.2234, "step": 7480 }, { "epoch": 3.478382147838215, "grad_norm": 2.5960896015167236, "learning_rate": 4.2170846163610335e-05, "loss": 0.2827, "step": 7482 }, { "epoch": 3.479311947931195, "grad_norm": 3.06337308883667, "learning_rate": 4.222784520968383e-05, "loss": 0.2867, "step": 7484 }, { "epoch": 3.480241748024175, "grad_norm": 2.246159553527832, "learning_rate": 4.228467422388031e-05, "loss": 0.2702, "step": 7486 }, { "epoch": 3.481171548117155, "grad_norm": 3.247917652130127, "learning_rate": 4.234133264532028e-05, "loss": 0.2646, "step": 7488 }, { "epoch": 3.4821013482101346, "grad_norm": 2.2653725147247314, "learning_rate": 4.239781991480801e-05, "loss": 0.2643, "step": 7490 }, { "epoch": 3.4830311483031147, "grad_norm": 2.6285529136657715, "learning_rate": 4.2454135474836966e-05, "loss": 0.2251, "step": 7492 }, { "epoch": 3.4839609483960947, "grad_norm": 2.127532482147217, "learning_rate": 4.25102787695953e-05, "loss": 0.2594, "step": 7494 }, { "epoch": 3.4848907484890748, "grad_norm": 2.5480077266693115, "learning_rate": 4.2566249244971374e-05, "loss": 0.2743, "step": 7496 }, { "epoch": 3.485820548582055, "grad_norm": 2.776258707046509, "learning_rate": 4.2622046348559186e-05, "loss": 0.305, "step": 7498 }, { "epoch": 3.486750348675035, "grad_norm": 2.538827896118164, "learning_rate": 4.2677669529663866e-05, "loss": 0.1874, "step": 7500 }, { "epoch": 3.487680148768015, "grad_norm": 3.7347121238708496, "learning_rate": 4.2733118239307016e-05, "loss": 0.3019, "step": 7502 }, { "epoch": 3.488609948860995, "grad_norm": 2.7808585166931152, "learning_rate": 4.2788391930232264e-05, "loss": 0.2027, "step": 7504 }, { "epoch": 3.489539748953975, "grad_norm": 3.0419723987579346, "learning_rate": 4.2843490056910683e-05, "loss": 0.2144, "step": 7506 }, { "epoch": 3.490469549046955, "grad_norm": 2.10880446434021, "learning_rate": 4.289841207554593e-05, "loss": 0.2076, "step": 7508 }, { "epoch": 3.491399349139935, "grad_norm": 2.5158839225769043, "learning_rate": 4.295315744407987e-05, "loss": 0.2418, "step": 7510 }, { "epoch": 3.4923291492329147, "grad_norm": 2.288207769393921, "learning_rate": 4.300772562219781e-05, "loss": 0.2746, "step": 7512 }, { "epoch": 3.4932589493258948, "grad_norm": 2.1730597019195557, "learning_rate": 4.3062116071333916e-05, "loss": 0.2648, "step": 7514 }, { "epoch": 3.494188749418875, "grad_norm": 3.340167284011841, "learning_rate": 4.3116328254676315e-05, "loss": 0.244, "step": 7516 }, { "epoch": 3.495118549511855, "grad_norm": 2.222989082336426, "learning_rate": 4.317036163717272e-05, "loss": 0.2195, "step": 7518 }, { "epoch": 3.496048349604835, "grad_norm": 3.0440351963043213, "learning_rate": 4.322421568553547e-05, "loss": 0.2507, "step": 7520 }, { "epoch": 3.496978149697815, "grad_norm": 4.165674209594727, "learning_rate": 4.327788986824678e-05, "loss": 0.2887, "step": 7522 }, { "epoch": 3.497907949790795, "grad_norm": 2.383622169494629, "learning_rate": 4.3331383655564175e-05, "loss": 0.2331, "step": 7524 }, { "epoch": 3.498837749883775, "grad_norm": 3.0880446434020996, "learning_rate": 4.338469651952554e-05, "loss": 0.2829, "step": 7526 }, { "epoch": 3.499767549976755, "grad_norm": 3.6024246215820312, "learning_rate": 4.343782793395451e-05, "loss": 0.2936, "step": 7528 }, { "epoch": 3.500697350069735, "grad_norm": 3.029770612716675, "learning_rate": 4.34907773744654e-05, "loss": 0.2492, "step": 7530 }, { "epoch": 3.501627150162715, "grad_norm": 2.9372706413269043, "learning_rate": 4.354354431846864e-05, "loss": 0.2686, "step": 7532 }, { "epoch": 3.502556950255695, "grad_norm": 3.2224457263946533, "learning_rate": 4.3596128245175805e-05, "loss": 0.2952, "step": 7534 }, { "epoch": 3.5034867503486753, "grad_norm": 2.2099015712738037, "learning_rate": 4.364852863560473e-05, "loss": 0.2114, "step": 7536 }, { "epoch": 3.504416550441655, "grad_norm": 3.373450517654419, "learning_rate": 4.3700744972584704e-05, "loss": 0.3282, "step": 7538 }, { "epoch": 3.505346350534635, "grad_norm": 3.1193830966949463, "learning_rate": 4.3752776740761625e-05, "loss": 0.2324, "step": 7540 }, { "epoch": 3.506276150627615, "grad_norm": 2.73594331741333, "learning_rate": 4.3804623426602954e-05, "loss": 0.2888, "step": 7542 }, { "epoch": 3.507205950720595, "grad_norm": 2.6478769779205322, "learning_rate": 4.385628451840275e-05, "loss": 0.2479, "step": 7544 }, { "epoch": 3.508135750813575, "grad_norm": 3.4365038871765137, "learning_rate": 4.3907759506286956e-05, "loss": 0.2504, "step": 7546 }, { "epoch": 3.509065550906555, "grad_norm": 3.1545886993408203, "learning_rate": 4.39590478822182e-05, "loss": 0.276, "step": 7548 }, { "epoch": 3.509995350999535, "grad_norm": 2.837796449661255, "learning_rate": 4.401014914000092e-05, "loss": 0.2381, "step": 7550 }, { "epoch": 3.5109251510925152, "grad_norm": 2.0583560466766357, "learning_rate": 4.406106277528634e-05, "loss": 0.186, "step": 7552 }, { "epoch": 3.5118549511854953, "grad_norm": 2.488088369369507, "learning_rate": 4.411178828557746e-05, "loss": 0.2247, "step": 7554 }, { "epoch": 3.512784751278475, "grad_norm": 3.0274884700775146, "learning_rate": 4.4162325170233914e-05, "loss": 0.2509, "step": 7556 }, { "epoch": 3.5137145513714554, "grad_norm": 2.8086864948272705, "learning_rate": 4.421267293047707e-05, "loss": 0.2451, "step": 7558 }, { "epoch": 3.514644351464435, "grad_norm": 2.7562005519866943, "learning_rate": 4.4262831069394865e-05, "loss": 0.2568, "step": 7560 }, { "epoch": 3.515574151557415, "grad_norm": 1.9495223760604858, "learning_rate": 4.431279909194676e-05, "loss": 0.2757, "step": 7562 }, { "epoch": 3.516503951650395, "grad_norm": 2.872840642929077, "learning_rate": 4.436257650496849e-05, "loss": 0.2779, "step": 7564 }, { "epoch": 3.517433751743375, "grad_norm": 2.5781774520874023, "learning_rate": 4.441216281717711e-05, "loss": 0.1954, "step": 7566 }, { "epoch": 3.518363551836355, "grad_norm": 3.7175474166870117, "learning_rate": 4.446155753917576e-05, "loss": 0.2928, "step": 7568 }, { "epoch": 3.5192933519293352, "grad_norm": 2.6638898849487305, "learning_rate": 4.451076018345838e-05, "loss": 0.2848, "step": 7570 }, { "epoch": 3.5202231520223153, "grad_norm": 2.134945869445801, "learning_rate": 4.4559770264414844e-05, "loss": 0.2351, "step": 7572 }, { "epoch": 3.5211529521152953, "grad_norm": 2.869666337966919, "learning_rate": 4.4608587298335395e-05, "loss": 0.3496, "step": 7574 }, { "epoch": 3.5220827522082754, "grad_norm": 3.2922911643981934, "learning_rate": 4.465721080341564e-05, "loss": 0.3126, "step": 7576 }, { "epoch": 3.523012552301255, "grad_norm": 3.0450613498687744, "learning_rate": 4.470564029976116e-05, "loss": 0.2438, "step": 7578 }, { "epoch": 3.5239423523942355, "grad_norm": 3.3060967922210693, "learning_rate": 4.475387530939242e-05, "loss": 0.248, "step": 7580 }, { "epoch": 3.524872152487215, "grad_norm": 3.3494160175323486, "learning_rate": 4.480191535624934e-05, "loss": 0.2261, "step": 7582 }, { "epoch": 3.525801952580195, "grad_norm": 3.9044904708862305, "learning_rate": 4.484975996619604e-05, "loss": 0.3391, "step": 7584 }, { "epoch": 3.526731752673175, "grad_norm": 2.5091047286987305, "learning_rate": 4.4897408667025554e-05, "loss": 0.2428, "step": 7586 }, { "epoch": 3.5276615527661552, "grad_norm": 3.2931253910064697, "learning_rate": 4.4944860988464425e-05, "loss": 0.2348, "step": 7588 }, { "epoch": 3.5285913528591353, "grad_norm": 3.3728911876678467, "learning_rate": 4.499211646217743e-05, "loss": 0.3037, "step": 7590 }, { "epoch": 3.5295211529521153, "grad_norm": 3.6923670768737793, "learning_rate": 4.503917462177206e-05, "loss": 0.2353, "step": 7592 }, { "epoch": 3.5304509530450954, "grad_norm": 2.58791446685791, "learning_rate": 4.508603500280333e-05, "loss": 0.2556, "step": 7594 }, { "epoch": 3.5313807531380754, "grad_norm": 2.562046766281128, "learning_rate": 4.513269714277822e-05, "loss": 0.2452, "step": 7596 }, { "epoch": 3.5323105532310555, "grad_norm": 3.1058812141418457, "learning_rate": 4.517916058116018e-05, "loss": 0.2465, "step": 7598 }, { "epoch": 3.533240353324035, "grad_norm": 3.5323116779327393, "learning_rate": 4.5225424859373846e-05, "loss": 0.2428, "step": 7600 }, { "epoch": 3.5341701534170156, "grad_norm": 2.49756121635437, "learning_rate": 4.527148952080952e-05, "loss": 0.2716, "step": 7602 }, { "epoch": 3.535099953509995, "grad_norm": 2.6397817134857178, "learning_rate": 4.5317354110827514e-05, "loss": 0.2144, "step": 7604 }, { "epoch": 3.5360297536029752, "grad_norm": 2.6025853157043457, "learning_rate": 4.53630181767629e-05, "loss": 0.2341, "step": 7606 }, { "epoch": 3.5369595536959553, "grad_norm": 3.453847885131836, "learning_rate": 4.540848126792976e-05, "loss": 0.2922, "step": 7608 }, { "epoch": 3.5378893537889353, "grad_norm": 3.5407557487487793, "learning_rate": 4.545374293562577e-05, "loss": 0.2794, "step": 7610 }, { "epoch": 3.5388191538819154, "grad_norm": 2.385965347290039, "learning_rate": 4.54988027331365e-05, "loss": 0.2158, "step": 7612 }, { "epoch": 3.5397489539748954, "grad_norm": 3.4355947971343994, "learning_rate": 4.5543660215739915e-05, "loss": 0.2626, "step": 7614 }, { "epoch": 3.5406787540678755, "grad_norm": 2.198293447494507, "learning_rate": 4.5588314940710865e-05, "loss": 0.2168, "step": 7616 }, { "epoch": 3.5416085541608555, "grad_norm": 2.4197356700897217, "learning_rate": 4.5632766467325155e-05, "loss": 0.2463, "step": 7618 }, { "epoch": 3.5425383542538356, "grad_norm": 2.7123031616210938, "learning_rate": 4.567701435686421e-05, "loss": 0.1973, "step": 7620 }, { "epoch": 3.543468154346815, "grad_norm": 2.525136709213257, "learning_rate": 4.572105817261921e-05, "loss": 0.2659, "step": 7622 }, { "epoch": 3.5443979544397957, "grad_norm": 3.1492931842803955, "learning_rate": 4.5764897479895506e-05, "loss": 0.2069, "step": 7624 }, { "epoch": 3.5453277545327753, "grad_norm": 3.303525686264038, "learning_rate": 4.580853184601674e-05, "loss": 0.1811, "step": 7626 }, { "epoch": 3.5462575546257553, "grad_norm": 3.396327495574951, "learning_rate": 4.585196084032944e-05, "loss": 0.261, "step": 7628 }, { "epoch": 3.5471873547187354, "grad_norm": 3.1190390586853027, "learning_rate": 4.589518403420694e-05, "loss": 0.319, "step": 7630 }, { "epoch": 3.5481171548117154, "grad_norm": 3.526214599609375, "learning_rate": 4.593820100105372e-05, "loss": 0.2971, "step": 7632 }, { "epoch": 3.5490469549046955, "grad_norm": 3.2215473651885986, "learning_rate": 4.5981011316309725e-05, "loss": 0.2549, "step": 7634 }, { "epoch": 3.5499767549976755, "grad_norm": 3.276425838470459, "learning_rate": 4.602361455745438e-05, "loss": 0.2309, "step": 7636 }, { "epoch": 3.5509065550906556, "grad_norm": 2.890763521194458, "learning_rate": 4.606601030401099e-05, "loss": 0.2372, "step": 7638 }, { "epoch": 3.5518363551836356, "grad_norm": 2.928938150405884, "learning_rate": 4.610819813755057e-05, "loss": 0.2831, "step": 7640 }, { "epoch": 3.5527661552766157, "grad_norm": 2.5201025009155273, "learning_rate": 4.6150177641696245e-05, "loss": 0.2334, "step": 7642 }, { "epoch": 3.5536959553695953, "grad_norm": 3.0808963775634766, "learning_rate": 4.619194840212729e-05, "loss": 0.3109, "step": 7644 }, { "epoch": 3.5546257554625758, "grad_norm": 2.979259729385376, "learning_rate": 4.6233510006583124e-05, "loss": 0.2585, "step": 7646 }, { "epoch": 3.5555555555555554, "grad_norm": 2.5299060344696045, "learning_rate": 4.627486204486748e-05, "loss": 0.2537, "step": 7648 }, { "epoch": 3.5564853556485354, "grad_norm": 2.495581865310669, "learning_rate": 4.631600410885251e-05, "loss": 0.2248, "step": 7650 }, { "epoch": 3.5574151557415155, "grad_norm": 3.1196608543395996, "learning_rate": 4.635693579248258e-05, "loss": 0.2282, "step": 7652 }, { "epoch": 3.5583449558344955, "grad_norm": 2.894141435623169, "learning_rate": 4.639765669177853e-05, "loss": 0.2464, "step": 7654 }, { "epoch": 3.5592747559274756, "grad_norm": 2.7218096256256104, "learning_rate": 4.643816640484151e-05, "loss": 0.211, "step": 7656 }, { "epoch": 3.5602045560204556, "grad_norm": 2.3371822834014893, "learning_rate": 4.647846453185701e-05, "loss": 0.1941, "step": 7658 }, { "epoch": 3.5611343561134357, "grad_norm": 2.8174734115600586, "learning_rate": 4.6518550675098786e-05, "loss": 0.3234, "step": 7660 }, { "epoch": 3.5620641562064157, "grad_norm": 3.394953727722168, "learning_rate": 4.6558424438932795e-05, "loss": 0.2426, "step": 7662 }, { "epoch": 3.5629939562993957, "grad_norm": 2.260657787322998, "learning_rate": 4.65980854298211e-05, "loss": 0.1695, "step": 7664 }, { "epoch": 3.563923756392376, "grad_norm": 3.1959755420684814, "learning_rate": 4.663753325632569e-05, "loss": 0.2815, "step": 7666 }, { "epoch": 3.564853556485356, "grad_norm": 2.7431299686431885, "learning_rate": 4.6676767529112456e-05, "loss": 0.2242, "step": 7668 }, { "epoch": 3.5657833565783355, "grad_norm": 2.728187322616577, "learning_rate": 4.6715787860954966e-05, "loss": 0.204, "step": 7670 }, { "epoch": 3.566713156671316, "grad_norm": 3.747117519378662, "learning_rate": 4.6754593866738344e-05, "loss": 0.3469, "step": 7672 }, { "epoch": 3.5676429567642955, "grad_norm": 2.6632962226867676, "learning_rate": 4.6793185163462915e-05, "loss": 0.241, "step": 7674 }, { "epoch": 3.5685727568572756, "grad_norm": 2.2065749168395996, "learning_rate": 4.68315613702482e-05, "loss": 0.2086, "step": 7676 }, { "epoch": 3.5695025569502556, "grad_norm": 2.7649037837982178, "learning_rate": 4.686972210833654e-05, "loss": 0.2496, "step": 7678 }, { "epoch": 3.5704323570432357, "grad_norm": 3.2581045627593994, "learning_rate": 4.690766700109678e-05, "loss": 0.3663, "step": 7680 }, { "epoch": 3.5713621571362157, "grad_norm": 2.88521146774292, "learning_rate": 4.694539567402823e-05, "loss": 0.3218, "step": 7682 }, { "epoch": 3.572291957229196, "grad_norm": 3.31158185005188, "learning_rate": 4.698290775476409e-05, "loss": 0.2709, "step": 7684 }, { "epoch": 3.573221757322176, "grad_norm": 2.668733835220337, "learning_rate": 4.702020287307529e-05, "loss": 0.3219, "step": 7686 }, { "epoch": 3.574151557415156, "grad_norm": 2.608994483947754, "learning_rate": 4.705728066087404e-05, "loss": 0.2879, "step": 7688 }, { "epoch": 3.575081357508136, "grad_norm": 2.475283145904541, "learning_rate": 4.709414075221753e-05, "loss": 0.2532, "step": 7690 }, { "epoch": 3.5760111576011155, "grad_norm": 2.3272323608398438, "learning_rate": 4.713078278331158e-05, "loss": 0.2206, "step": 7692 }, { "epoch": 3.576940957694096, "grad_norm": 3.35691499710083, "learning_rate": 4.716720639251409e-05, "loss": 0.2529, "step": 7694 }, { "epoch": 3.5778707577870756, "grad_norm": 2.8482017517089844, "learning_rate": 4.720341122033878e-05, "loss": 0.2671, "step": 7696 }, { "epoch": 3.5788005578800557, "grad_norm": 2.0255887508392334, "learning_rate": 4.723939690945864e-05, "loss": 0.2807, "step": 7698 }, { "epoch": 3.5797303579730357, "grad_norm": 2.3097710609436035, "learning_rate": 4.727516310470938e-05, "loss": 0.2155, "step": 7700 }, { "epoch": 3.580660158066016, "grad_norm": 2.6818885803222656, "learning_rate": 4.7310709453093135e-05, "loss": 0.2096, "step": 7702 }, { "epoch": 3.581589958158996, "grad_norm": 3.2840182781219482, "learning_rate": 4.734603560378177e-05, "loss": 0.2173, "step": 7704 }, { "epoch": 3.582519758251976, "grad_norm": 3.671363115310669, "learning_rate": 4.738114120812048e-05, "loss": 0.3101, "step": 7706 }, { "epoch": 3.583449558344956, "grad_norm": 3.0874147415161133, "learning_rate": 4.7416025919631074e-05, "loss": 0.2621, "step": 7708 }, { "epoch": 3.584379358437936, "grad_norm": 2.6933584213256836, "learning_rate": 4.7450689394015576e-05, "loss": 0.235, "step": 7710 }, { "epoch": 3.585309158530916, "grad_norm": 3.2047760486602783, "learning_rate": 4.748513128915947e-05, "loss": 0.254, "step": 7712 }, { "epoch": 3.5862389586238956, "grad_norm": 2.195707321166992, "learning_rate": 4.7519351265135126e-05, "loss": 0.2125, "step": 7714 }, { "epoch": 3.587168758716876, "grad_norm": 3.0831594467163086, "learning_rate": 4.755334898420524e-05, "loss": 0.2271, "step": 7716 }, { "epoch": 3.5880985588098557, "grad_norm": 3.7192463874816895, "learning_rate": 4.7587124110826044e-05, "loss": 0.2995, "step": 7718 }, { "epoch": 3.589028358902836, "grad_norm": 2.862208127975464, "learning_rate": 4.762067631165068e-05, "loss": 0.2632, "step": 7720 }, { "epoch": 3.589958158995816, "grad_norm": 3.556110143661499, "learning_rate": 4.7654005255532434e-05, "loss": 0.2757, "step": 7722 }, { "epoch": 3.590887959088796, "grad_norm": 2.857353448867798, "learning_rate": 4.7687110613528095e-05, "loss": 0.2571, "step": 7724 }, { "epoch": 3.591817759181776, "grad_norm": 2.912627696990967, "learning_rate": 4.771999205890116e-05, "loss": 0.2299, "step": 7726 }, { "epoch": 3.592747559274756, "grad_norm": 2.2422070503234863, "learning_rate": 4.7752649267125055e-05, "loss": 0.2103, "step": 7728 }, { "epoch": 3.593677359367736, "grad_norm": 2.447611093521118, "learning_rate": 4.7785081915886296e-05, "loss": 0.2007, "step": 7730 }, { "epoch": 3.594607159460716, "grad_norm": 2.427457332611084, "learning_rate": 4.7817289685087726e-05, "loss": 0.1835, "step": 7732 }, { "epoch": 3.595536959553696, "grad_norm": 2.6047489643096924, "learning_rate": 4.784927225685171e-05, "loss": 0.2434, "step": 7734 }, { "epoch": 3.5964667596466757, "grad_norm": 2.639940023422241, "learning_rate": 4.788102931552309e-05, "loss": 0.2285, "step": 7736 }, { "epoch": 3.597396559739656, "grad_norm": 2.43754506111145, "learning_rate": 4.79125605476726e-05, "loss": 0.1839, "step": 7738 }, { "epoch": 3.598326359832636, "grad_norm": 3.0298149585723877, "learning_rate": 4.794386564209969e-05, "loss": 0.3012, "step": 7740 }, { "epoch": 3.599256159925616, "grad_norm": 2.4265151023864746, "learning_rate": 4.79749442898357e-05, "loss": 0.2358, "step": 7742 }, { "epoch": 3.600185960018596, "grad_norm": 3.0116007328033447, "learning_rate": 4.8005796184146924e-05, "loss": 0.3127, "step": 7744 }, { "epoch": 3.601115760111576, "grad_norm": 2.9370062351226807, "learning_rate": 4.803642102053764e-05, "loss": 0.2475, "step": 7746 }, { "epoch": 3.602045560204556, "grad_norm": 2.5819900035858154, "learning_rate": 4.806681849675304e-05, "loss": 0.2501, "step": 7748 }, { "epoch": 3.602975360297536, "grad_norm": 2.585880994796753, "learning_rate": 4.8096988312782336e-05, "loss": 0.2133, "step": 7750 }, { "epoch": 3.603905160390516, "grad_norm": 2.333098888397217, "learning_rate": 4.8126930170861606e-05, "loss": 0.2833, "step": 7752 }, { "epoch": 3.604834960483496, "grad_norm": 2.7011311054229736, "learning_rate": 4.815664377547684e-05, "loss": 0.2427, "step": 7754 }, { "epoch": 3.605764760576476, "grad_norm": 3.3584096431732178, "learning_rate": 4.8186128833366714e-05, "loss": 0.2575, "step": 7756 }, { "epoch": 3.606694560669456, "grad_norm": 2.7888262271881104, "learning_rate": 4.8215385053525596e-05, "loss": 0.2739, "step": 7758 }, { "epoch": 3.6076243607624363, "grad_norm": 2.778366804122925, "learning_rate": 4.824441214720646e-05, "loss": 0.2274, "step": 7760 }, { "epoch": 3.608554160855416, "grad_norm": 2.524911642074585, "learning_rate": 4.827320982792356e-05, "loss": 0.197, "step": 7762 }, { "epoch": 3.609483960948396, "grad_norm": 3.4187934398651123, "learning_rate": 4.830177781145544e-05, "loss": 0.2448, "step": 7764 }, { "epoch": 3.610413761041376, "grad_norm": 2.6683993339538574, "learning_rate": 4.833011581584762e-05, "loss": 0.2655, "step": 7766 }, { "epoch": 3.611343561134356, "grad_norm": 2.5112547874450684, "learning_rate": 4.835822356141547e-05, "loss": 0.2242, "step": 7768 }, { "epoch": 3.612273361227336, "grad_norm": 3.639547824859619, "learning_rate": 4.8386100770746837e-05, "loss": 0.2246, "step": 7770 }, { "epoch": 3.613203161320316, "grad_norm": 2.97390079498291, "learning_rate": 4.841374716870496e-05, "loss": 0.2336, "step": 7772 }, { "epoch": 3.614132961413296, "grad_norm": 2.142557382583618, "learning_rate": 4.844116248243107e-05, "loss": 0.2056, "step": 7774 }, { "epoch": 3.6150627615062763, "grad_norm": 3.516589879989624, "learning_rate": 4.846834644134702e-05, "loss": 0.3041, "step": 7776 }, { "epoch": 3.6159925615992563, "grad_norm": 3.0695548057556152, "learning_rate": 4.849529877715817e-05, "loss": 0.2532, "step": 7778 }, { "epoch": 3.616922361692236, "grad_norm": 3.597452163696289, "learning_rate": 4.8522019223855805e-05, "loss": 0.1995, "step": 7780 }, { "epoch": 3.6178521617852164, "grad_norm": 3.6347389221191406, "learning_rate": 4.854850751771995e-05, "loss": 0.2752, "step": 7782 }, { "epoch": 3.618781961878196, "grad_norm": 2.717806339263916, "learning_rate": 4.8574763397321804e-05, "loss": 0.243, "step": 7784 }, { "epoch": 3.619711761971176, "grad_norm": 2.9878475666046143, "learning_rate": 4.860078660352643e-05, "loss": 0.3092, "step": 7786 }, { "epoch": 3.620641562064156, "grad_norm": 2.4426677227020264, "learning_rate": 4.8626576879495316e-05, "loss": 0.1591, "step": 7788 }, { "epoch": 3.621571362157136, "grad_norm": 3.962740898132324, "learning_rate": 4.865213397068883e-05, "loss": 0.3217, "step": 7790 }, { "epoch": 3.622501162250116, "grad_norm": 2.7650933265686035, "learning_rate": 4.867745762486878e-05, "loss": 0.2471, "step": 7792 }, { "epoch": 3.6234309623430963, "grad_norm": 3.108595848083496, "learning_rate": 4.870254759210098e-05, "loss": 0.2108, "step": 7794 }, { "epoch": 3.6243607624360763, "grad_norm": 2.83571195602417, "learning_rate": 4.872740362475754e-05, "loss": 0.245, "step": 7796 }, { "epoch": 3.6252905625290563, "grad_norm": 3.652222156524658, "learning_rate": 4.875202547751946e-05, "loss": 0.2775, "step": 7798 }, { "epoch": 3.6262203626220364, "grad_norm": 2.85150146484375, "learning_rate": 4.8776412907379005e-05, "loss": 0.2403, "step": 7800 }, { "epoch": 3.627150162715016, "grad_norm": 3.2547192573547363, "learning_rate": 4.880056567364208e-05, "loss": 0.234, "step": 7802 }, { "epoch": 3.6280799628079965, "grad_norm": 2.8753137588500977, "learning_rate": 4.882448353793063e-05, "loss": 0.2285, "step": 7804 }, { "epoch": 3.629009762900976, "grad_norm": 2.4512455463409424, "learning_rate": 4.8848166264185e-05, "loss": 0.1831, "step": 7806 }, { "epoch": 3.629939562993956, "grad_norm": 3.6614491939544678, "learning_rate": 4.8871613618666234e-05, "loss": 0.2936, "step": 7808 }, { "epoch": 3.630869363086936, "grad_norm": 3.1265628337860107, "learning_rate": 4.889482536995842e-05, "loss": 0.2492, "step": 7810 }, { "epoch": 3.6317991631799162, "grad_norm": 2.420058488845825, "learning_rate": 4.891780128897092e-05, "loss": 0.2398, "step": 7812 }, { "epoch": 3.6327289632728963, "grad_norm": 3.371392250061035, "learning_rate": 4.89405411489407e-05, "loss": 0.2824, "step": 7814 }, { "epoch": 3.6336587633658763, "grad_norm": 3.300036668777466, "learning_rate": 4.8963044725434557e-05, "loss": 0.2468, "step": 7816 }, { "epoch": 3.6345885634588564, "grad_norm": 2.270955801010132, "learning_rate": 4.898531179635123e-05, "loss": 0.2268, "step": 7818 }, { "epoch": 3.6355183635518364, "grad_norm": 2.3415980339050293, "learning_rate": 4.900734214192374e-05, "loss": 0.2144, "step": 7820 }, { "epoch": 3.6364481636448165, "grad_norm": 3.5607142448425293, "learning_rate": 4.902913554472148e-05, "loss": 0.3406, "step": 7822 }, { "epoch": 3.6373779637377965, "grad_norm": 2.454770088195801, "learning_rate": 4.9050691789652316e-05, "loss": 0.1975, "step": 7824 }, { "epoch": 3.6383077638307766, "grad_norm": 1.9549963474273682, "learning_rate": 4.907201066396486e-05, "loss": 0.1821, "step": 7826 }, { "epoch": 3.639237563923756, "grad_norm": 3.2903878688812256, "learning_rate": 4.909309195725042e-05, "loss": 0.292, "step": 7828 }, { "epoch": 3.6401673640167362, "grad_norm": 3.251258134841919, "learning_rate": 4.911393546144513e-05, "loss": 0.2489, "step": 7830 }, { "epoch": 3.6410971641097163, "grad_norm": 3.0982301235198975, "learning_rate": 4.913454097083202e-05, "loss": 0.2317, "step": 7832 }, { "epoch": 3.6420269642026963, "grad_norm": 2.8693766593933105, "learning_rate": 4.9154908282043026e-05, "loss": 0.2377, "step": 7834 }, { "epoch": 3.6429567642956764, "grad_norm": 2.6368916034698486, "learning_rate": 4.917503719406104e-05, "loss": 0.2346, "step": 7836 }, { "epoch": 3.6438865643886564, "grad_norm": 2.837641477584839, "learning_rate": 4.9194927508221794e-05, "loss": 0.3168, "step": 7838 }, { "epoch": 3.6448163644816365, "grad_norm": 3.238783597946167, "learning_rate": 4.921457902821593e-05, "loss": 0.325, "step": 7840 }, { "epoch": 3.6457461645746165, "grad_norm": 2.6749279499053955, "learning_rate": 4.923399156009089e-05, "loss": 0.225, "step": 7842 }, { "epoch": 3.6466759646675966, "grad_norm": 2.8794147968292236, "learning_rate": 4.925316491225281e-05, "loss": 0.2288, "step": 7844 }, { "epoch": 3.6476057647605766, "grad_norm": 3.067106008529663, "learning_rate": 4.927209889546843e-05, "loss": 0.2821, "step": 7846 }, { "epoch": 3.6485355648535567, "grad_norm": 2.7627665996551514, "learning_rate": 4.9290793322867004e-05, "loss": 0.235, "step": 7848 }, { "epoch": 3.6494653649465363, "grad_norm": 3.1852638721466064, "learning_rate": 4.930924800994208e-05, "loss": 0.2073, "step": 7850 }, { "epoch": 3.6503951650395168, "grad_norm": 3.0244925022125244, "learning_rate": 4.9327462774553335e-05, "loss": 0.2411, "step": 7852 }, { "epoch": 3.6513249651324964, "grad_norm": 3.353541612625122, "learning_rate": 4.9345437436928385e-05, "loss": 0.2792, "step": 7854 }, { "epoch": 3.6522547652254764, "grad_norm": 2.289024591445923, "learning_rate": 4.936317181966461e-05, "loss": 0.2045, "step": 7856 }, { "epoch": 3.6531845653184565, "grad_norm": 3.061635732650757, "learning_rate": 4.938066574773076e-05, "loss": 0.248, "step": 7858 }, { "epoch": 3.6541143654114365, "grad_norm": 4.370569229125977, "learning_rate": 4.9397919048468854e-05, "loss": 0.2534, "step": 7860 }, { "epoch": 3.6550441655044166, "grad_norm": 2.9974634647369385, "learning_rate": 4.941493155159578e-05, "loss": 0.3052, "step": 7862 }, { "epoch": 3.6559739655973966, "grad_norm": 3.328640937805176, "learning_rate": 4.9431703089205015e-05, "loss": 0.2362, "step": 7864 }, { "epoch": 3.6569037656903767, "grad_norm": 2.222116708755493, "learning_rate": 4.944823349576823e-05, "loss": 0.2096, "step": 7866 }, { "epoch": 3.6578335657833567, "grad_norm": 3.296079635620117, "learning_rate": 4.946452260813698e-05, "loss": 0.2258, "step": 7868 }, { "epoch": 3.6587633658763368, "grad_norm": 2.688526153564453, "learning_rate": 4.9480570265544334e-05, "loss": 0.2629, "step": 7870 }, { "epoch": 3.6596931659693164, "grad_norm": 3.356679916381836, "learning_rate": 4.949637630960637e-05, "loss": 0.2473, "step": 7872 }, { "epoch": 3.660622966062297, "grad_norm": 3.2147059440612793, "learning_rate": 4.951194058432381e-05, "loss": 0.2474, "step": 7874 }, { "epoch": 3.6615527661552765, "grad_norm": 3.36542010307312, "learning_rate": 4.9527262936083546e-05, "loss": 0.2429, "step": 7876 }, { "epoch": 3.6624825662482565, "grad_norm": 2.138643503189087, "learning_rate": 4.9542343213660184e-05, "loss": 0.1807, "step": 7878 }, { "epoch": 3.6634123663412366, "grad_norm": 2.9456398487091064, "learning_rate": 4.955718126821742e-05, "loss": 0.2212, "step": 7880 }, { "epoch": 3.6643421664342166, "grad_norm": 2.8238654136657715, "learning_rate": 4.957177695330968e-05, "loss": 0.2595, "step": 7882 }, { "epoch": 3.6652719665271967, "grad_norm": 2.9002981185913086, "learning_rate": 4.9586130124883446e-05, "loss": 0.3129, "step": 7884 }, { "epoch": 3.6662017666201767, "grad_norm": 3.1963083744049072, "learning_rate": 4.9600240641278686e-05, "loss": 0.3066, "step": 7886 }, { "epoch": 3.6671315667131568, "grad_norm": 3.023055076599121, "learning_rate": 4.9614108363230325e-05, "loss": 0.2864, "step": 7888 }, { "epoch": 3.668061366806137, "grad_norm": 3.203054666519165, "learning_rate": 4.9627733153869535e-05, "loss": 0.3166, "step": 7890 }, { "epoch": 3.668991166899117, "grad_norm": 2.320852518081665, "learning_rate": 4.964111487872513e-05, "loss": 0.2408, "step": 7892 }, { "epoch": 3.6699209669920965, "grad_norm": 4.463388919830322, "learning_rate": 4.96542534057249e-05, "loss": 0.3356, "step": 7894 }, { "epoch": 3.670850767085077, "grad_norm": 2.266859531402588, "learning_rate": 4.9667148605196884e-05, "loss": 0.1759, "step": 7896 }, { "epoch": 3.6717805671780566, "grad_norm": 2.9034640789031982, "learning_rate": 4.967980034987068e-05, "loss": 0.2601, "step": 7898 }, { "epoch": 3.6727103672710366, "grad_norm": 2.4287378787994385, "learning_rate": 4.969220851487864e-05, "loss": 0.2605, "step": 7900 }, { "epoch": 3.6736401673640167, "grad_norm": 2.8401217460632324, "learning_rate": 4.97043729777572e-05, "loss": 0.2535, "step": 7902 }, { "epoch": 3.6745699674569967, "grad_norm": 2.7343997955322266, "learning_rate": 4.9716293618448046e-05, "loss": 0.2517, "step": 7904 }, { "epoch": 3.6754997675499768, "grad_norm": 2.3939692974090576, "learning_rate": 4.972797031929924e-05, "loss": 0.2601, "step": 7906 }, { "epoch": 3.676429567642957, "grad_norm": 2.825551986694336, "learning_rate": 4.9739402965066466e-05, "loss": 0.344, "step": 7908 }, { "epoch": 3.677359367735937, "grad_norm": 2.9998838901519775, "learning_rate": 4.9750591442914144e-05, "loss": 0.2122, "step": 7910 }, { "epoch": 3.678289167828917, "grad_norm": 2.973876953125, "learning_rate": 4.976153564241648e-05, "loss": 0.2202, "step": 7912 }, { "epoch": 3.679218967921897, "grad_norm": 2.612277030944824, "learning_rate": 4.977223545555868e-05, "loss": 0.2255, "step": 7914 }, { "epoch": 3.6801487680148766, "grad_norm": 2.2036819458007812, "learning_rate": 4.978269077673788e-05, "loss": 0.2398, "step": 7916 }, { "epoch": 3.681078568107857, "grad_norm": 2.4219489097595215, "learning_rate": 4.97929015027643e-05, "loss": 0.2454, "step": 7918 }, { "epoch": 3.6820083682008367, "grad_norm": 3.8694939613342285, "learning_rate": 4.980286753286217e-05, "loss": 0.299, "step": 7920 }, { "epoch": 3.6829381682938167, "grad_norm": 2.7328972816467285, "learning_rate": 4.9812588768670824e-05, "loss": 0.2964, "step": 7922 }, { "epoch": 3.6838679683867968, "grad_norm": 2.657982587814331, "learning_rate": 4.9822065114245564e-05, "loss": 0.2149, "step": 7924 }, { "epoch": 3.684797768479777, "grad_norm": 2.680103063583374, "learning_rate": 4.983129647605871e-05, "loss": 0.2827, "step": 7926 }, { "epoch": 3.685727568572757, "grad_norm": 3.9053823947906494, "learning_rate": 4.984028276300044e-05, "loss": 0.2552, "step": 7928 }, { "epoch": 3.686657368665737, "grad_norm": 4.631673812866211, "learning_rate": 4.9849023886379714e-05, "loss": 0.326, "step": 7930 }, { "epoch": 3.687587168758717, "grad_norm": 3.52547550201416, "learning_rate": 4.9857519759925204e-05, "loss": 0.2685, "step": 7932 }, { "epoch": 3.688516968851697, "grad_norm": 3.4195241928100586, "learning_rate": 4.986577029978604e-05, "loss": 0.2404, "step": 7934 }, { "epoch": 3.689446768944677, "grad_norm": 2.8277978897094727, "learning_rate": 4.9873775424532725e-05, "loss": 0.2484, "step": 7936 }, { "epoch": 3.6903765690376567, "grad_norm": 2.6767449378967285, "learning_rate": 4.9881535055157934e-05, "loss": 0.2136, "step": 7938 }, { "epoch": 3.691306369130637, "grad_norm": 3.3550848960876465, "learning_rate": 4.988904911507723e-05, "loss": 0.3394, "step": 7940 }, { "epoch": 3.6922361692236167, "grad_norm": 3.098503351211548, "learning_rate": 4.989631753012987e-05, "loss": 0.2494, "step": 7942 }, { "epoch": 3.693165969316597, "grad_norm": 3.3708696365356445, "learning_rate": 4.9903340228579535e-05, "loss": 0.3457, "step": 7944 }, { "epoch": 3.694095769409577, "grad_norm": 3.0565507411956787, "learning_rate": 4.991011714111503e-05, "loss": 0.3551, "step": 7946 }, { "epoch": 3.695025569502557, "grad_norm": 3.9119653701782227, "learning_rate": 4.9916648200850974e-05, "loss": 0.3354, "step": 7948 }, { "epoch": 3.695955369595537, "grad_norm": 3.1161160469055176, "learning_rate": 4.992293334332843e-05, "loss": 0.2558, "step": 7950 }, { "epoch": 3.696885169688517, "grad_norm": 3.827669382095337, "learning_rate": 4.9928972506515585e-05, "loss": 0.2525, "step": 7952 }, { "epoch": 3.697814969781497, "grad_norm": 2.3083951473236084, "learning_rate": 4.993476563080833e-05, "loss": 0.2705, "step": 7954 }, { "epoch": 3.698744769874477, "grad_norm": 2.8407065868377686, "learning_rate": 4.994031265903087e-05, "loss": 0.2098, "step": 7956 }, { "epoch": 3.699674569967457, "grad_norm": 3.9036951065063477, "learning_rate": 4.994561353643627e-05, "loss": 0.3333, "step": 7958 }, { "epoch": 3.7006043700604367, "grad_norm": 3.592925548553467, "learning_rate": 4.995066821070702e-05, "loss": 0.2855, "step": 7960 }, { "epoch": 3.7015341701534172, "grad_norm": 2.8542630672454834, "learning_rate": 4.9955476631955535e-05, "loss": 0.3196, "step": 7962 }, { "epoch": 3.702463970246397, "grad_norm": 2.541085958480835, "learning_rate": 4.996003875272461e-05, "loss": 0.2869, "step": 7964 }, { "epoch": 3.703393770339377, "grad_norm": 2.721511125564575, "learning_rate": 4.996435452798797e-05, "loss": 0.2429, "step": 7966 }, { "epoch": 3.704323570432357, "grad_norm": 3.3943941593170166, "learning_rate": 4.996842391515067e-05, "loss": 0.3094, "step": 7968 }, { "epoch": 3.705253370525337, "grad_norm": 2.2818708419799805, "learning_rate": 4.997224687404948e-05, "loss": 0.2679, "step": 7970 }, { "epoch": 3.706183170618317, "grad_norm": 3.135613441467285, "learning_rate": 4.997582336695335e-05, "loss": 0.2128, "step": 7972 }, { "epoch": 3.707112970711297, "grad_norm": 3.714482307434082, "learning_rate": 4.9979153358563726e-05, "loss": 0.3389, "step": 7974 }, { "epoch": 3.708042770804277, "grad_norm": 2.736750602722168, "learning_rate": 4.9982236816014956e-05, "loss": 0.1724, "step": 7976 }, { "epoch": 3.708972570897257, "grad_norm": 2.785733938217163, "learning_rate": 4.9985073708874545e-05, "loss": 0.2385, "step": 7978 }, { "epoch": 3.7099023709902372, "grad_norm": 3.0003421306610107, "learning_rate": 4.9987664009143515e-05, "loss": 0.2761, "step": 7980 }, { "epoch": 3.710832171083217, "grad_norm": 2.069260358810425, "learning_rate": 4.999000769125664e-05, "loss": 0.1508, "step": 7982 }, { "epoch": 3.7117619711761973, "grad_norm": 3.3932034969329834, "learning_rate": 4.999210473208273e-05, "loss": 0.2782, "step": 7984 }, { "epoch": 3.712691771269177, "grad_norm": 2.2365520000457764, "learning_rate": 4.999395511092484e-05, "loss": 0.1743, "step": 7986 }, { "epoch": 3.713621571362157, "grad_norm": 2.4134230613708496, "learning_rate": 4.9995558809520465e-05, "loss": 0.2402, "step": 7988 }, { "epoch": 3.714551371455137, "grad_norm": 2.3585946559906006, "learning_rate": 4.999691581204176e-05, "loss": 0.1663, "step": 7990 }, { "epoch": 3.715481171548117, "grad_norm": 2.2696988582611084, "learning_rate": 4.999802610509565e-05, "loss": 0.203, "step": 7992 }, { "epoch": 3.716410971641097, "grad_norm": 2.875305414199829, "learning_rate": 4.999888967772399e-05, "loss": 0.246, "step": 7994 }, { "epoch": 3.717340771734077, "grad_norm": 3.325753927230835, "learning_rate": 4.9999506521403664e-05, "loss": 0.2608, "step": 7996 }, { "epoch": 3.7182705718270572, "grad_norm": 3.1921114921569824, "learning_rate": 4.9999876630046704e-05, "loss": 0.2326, "step": 7998 }, { "epoch": 3.7192003719200373, "grad_norm": 2.8543388843536377, "learning_rate": 5.000000000000026e-05, "loss": 0.2843, "step": 8000 }, { "epoch": 3.7192003719200373, "eval_cer": 0.26172471690252863, "eval_loss": 0.3987942636013031, "eval_runtime": 403.0436, "eval_samples_per_second": 31.495, "eval_steps_per_second": 0.985, "step": 8000 }, { "epoch": 3.7201301720130173, "grad_norm": 2.7766966819763184, "learning_rate": 4.999987663004672e-05, "loss": 0.3078, "step": 8002 }, { "epoch": 3.7210599721059974, "grad_norm": 2.1671407222747803, "learning_rate": 4.9999506521403685e-05, "loss": 0.2148, "step": 8004 }, { "epoch": 3.7219897721989774, "grad_norm": 2.4135890007019043, "learning_rate": 4.999888967772401e-05, "loss": 0.2142, "step": 8006 }, { "epoch": 3.722919572291957, "grad_norm": 3.022691011428833, "learning_rate": 4.999802610509567e-05, "loss": 0.2514, "step": 8008 }, { "epoch": 3.723849372384937, "grad_norm": 2.4085938930511475, "learning_rate": 4.999691581204178e-05, "loss": 0.2883, "step": 8010 }, { "epoch": 3.724779172477917, "grad_norm": 4.14946174621582, "learning_rate": 4.999555880952049e-05, "loss": 0.2826, "step": 8012 }, { "epoch": 3.725708972570897, "grad_norm": 4.055613994598389, "learning_rate": 4.999395511092487e-05, "loss": 0.3647, "step": 8014 }, { "epoch": 3.726638772663877, "grad_norm": 3.3122217655181885, "learning_rate": 4.9992104732082766e-05, "loss": 0.2244, "step": 8016 }, { "epoch": 3.7275685727568573, "grad_norm": 4.4033331871032715, "learning_rate": 4.999000769125667e-05, "loss": 0.3076, "step": 8018 }, { "epoch": 3.7284983728498373, "grad_norm": 2.1139936447143555, "learning_rate": 4.998766400914355e-05, "loss": 0.2399, "step": 8020 }, { "epoch": 3.7294281729428174, "grad_norm": 3.174017906188965, "learning_rate": 4.998507370887458e-05, "loss": 0.2624, "step": 8022 }, { "epoch": 3.7303579730357974, "grad_norm": 2.8489444255828857, "learning_rate": 4.9982236816014996e-05, "loss": 0.2481, "step": 8024 }, { "epoch": 3.7312877731287775, "grad_norm": 2.8492448329925537, "learning_rate": 4.997915335856377e-05, "loss": 0.256, "step": 8026 }, { "epoch": 3.7322175732217575, "grad_norm": 3.189265251159668, "learning_rate": 4.997582336695338e-05, "loss": 0.2527, "step": 8028 }, { "epoch": 3.733147373314737, "grad_norm": 2.948866844177246, "learning_rate": 4.997224687404952e-05, "loss": 0.2274, "step": 8030 }, { "epoch": 3.7340771734077176, "grad_norm": 2.3835227489471436, "learning_rate": 4.9968423915150704e-05, "loss": 0.2313, "step": 8032 }, { "epoch": 3.735006973500697, "grad_norm": 2.1010122299194336, "learning_rate": 4.9964354527988006e-05, "loss": 0.2127, "step": 8034 }, { "epoch": 3.7359367735936773, "grad_norm": 2.800577163696289, "learning_rate": 4.996003875272464e-05, "loss": 0.1884, "step": 8036 }, { "epoch": 3.7368665736866573, "grad_norm": 2.4294087886810303, "learning_rate": 4.995547663195556e-05, "loss": 0.2886, "step": 8038 }, { "epoch": 3.7377963737796374, "grad_norm": 2.9741618633270264, "learning_rate": 4.995066821070706e-05, "loss": 0.2283, "step": 8040 }, { "epoch": 3.7387261738726174, "grad_norm": 2.4612722396850586, "learning_rate": 4.99456135364363e-05, "loss": 0.2263, "step": 8042 }, { "epoch": 3.7396559739655975, "grad_norm": 2.6447160243988037, "learning_rate": 4.9940312659030894e-05, "loss": 0.1991, "step": 8044 }, { "epoch": 3.7405857740585775, "grad_norm": 3.4036412239074707, "learning_rate": 4.993476563080835e-05, "loss": 0.2359, "step": 8046 }, { "epoch": 3.7415155741515576, "grad_norm": 2.7422494888305664, "learning_rate": 4.9928972506515605e-05, "loss": 0.2287, "step": 8048 }, { "epoch": 3.7424453742445376, "grad_norm": 2.786564826965332, "learning_rate": 4.992293334332846e-05, "loss": 0.2588, "step": 8050 }, { "epoch": 3.743375174337517, "grad_norm": 2.570756673812866, "learning_rate": 4.9916648200851e-05, "loss": 0.1789, "step": 8052 }, { "epoch": 3.7443049744304977, "grad_norm": 2.4598302841186523, "learning_rate": 4.991011714111506e-05, "loss": 0.3134, "step": 8054 }, { "epoch": 3.7452347745234773, "grad_norm": 2.785999059677124, "learning_rate": 4.990334022857957e-05, "loss": 0.2404, "step": 8056 }, { "epoch": 3.7461645746164574, "grad_norm": 2.641249418258667, "learning_rate": 4.98963175301299e-05, "loss": 0.2715, "step": 8058 }, { "epoch": 3.7470943747094374, "grad_norm": 3.0907766819000244, "learning_rate": 4.9889049115077256e-05, "loss": 0.2735, "step": 8060 }, { "epoch": 3.7480241748024175, "grad_norm": 2.7753095626831055, "learning_rate": 4.988153505515797e-05, "loss": 0.1926, "step": 8062 }, { "epoch": 3.7489539748953975, "grad_norm": 3.0814311504364014, "learning_rate": 4.987377542453276e-05, "loss": 0.2539, "step": 8064 }, { "epoch": 3.7498837749883775, "grad_norm": 2.5546793937683105, "learning_rate": 4.9865770299786066e-05, "loss": 0.2702, "step": 8066 }, { "epoch": 3.7508135750813576, "grad_norm": 2.550631046295166, "learning_rate": 4.9857519759925224e-05, "loss": 0.2402, "step": 8068 }, { "epoch": 3.7517433751743376, "grad_norm": 2.8203680515289307, "learning_rate": 4.984902388637974e-05, "loss": 0.2076, "step": 8070 }, { "epoch": 3.7526731752673177, "grad_norm": 3.195861339569092, "learning_rate": 4.984028276300046e-05, "loss": 0.3414, "step": 8072 }, { "epoch": 3.7536029753602973, "grad_norm": 2.4286227226257324, "learning_rate": 4.9831296476058735e-05, "loss": 0.2164, "step": 8074 }, { "epoch": 3.754532775453278, "grad_norm": 3.0162439346313477, "learning_rate": 4.982206511424559e-05, "loss": 0.254, "step": 8076 }, { "epoch": 3.7554625755462574, "grad_norm": 2.7290685176849365, "learning_rate": 4.9812588768670845e-05, "loss": 0.2088, "step": 8078 }, { "epoch": 3.7563923756392374, "grad_norm": 2.698110342025757, "learning_rate": 4.980286753286219e-05, "loss": 0.325, "step": 8080 }, { "epoch": 3.7573221757322175, "grad_norm": 1.7826166152954102, "learning_rate": 4.979290150276431e-05, "loss": 0.1947, "step": 8082 }, { "epoch": 3.7582519758251975, "grad_norm": 3.2553465366363525, "learning_rate": 4.97826907767379e-05, "loss": 0.2684, "step": 8084 }, { "epoch": 3.7591817759181776, "grad_norm": 2.4725024700164795, "learning_rate": 4.977223545555871e-05, "loss": 0.2553, "step": 8086 }, { "epoch": 3.7601115760111576, "grad_norm": 2.772545337677002, "learning_rate": 4.976153564241652e-05, "loss": 0.2075, "step": 8088 }, { "epoch": 3.7610413761041377, "grad_norm": 2.7550978660583496, "learning_rate": 4.975059144291417e-05, "loss": 0.2143, "step": 8090 }, { "epoch": 3.7619711761971177, "grad_norm": 3.652714729309082, "learning_rate": 4.9739402965066506e-05, "loss": 0.2642, "step": 8092 }, { "epoch": 3.762900976290098, "grad_norm": 3.274480104446411, "learning_rate": 4.9727970319299275e-05, "loss": 0.3387, "step": 8094 }, { "epoch": 3.7638307763830774, "grad_norm": 2.3484017848968506, "learning_rate": 4.9716293618448074e-05, "loss": 0.1598, "step": 8096 }, { "epoch": 3.764760576476058, "grad_norm": 3.0696914196014404, "learning_rate": 4.970437297775724e-05, "loss": 0.1929, "step": 8098 }, { "epoch": 3.7656903765690375, "grad_norm": 2.0794317722320557, "learning_rate": 4.969220851487868e-05, "loss": 0.194, "step": 8100 }, { "epoch": 3.7666201766620175, "grad_norm": 2.8965883255004883, "learning_rate": 4.967980034987072e-05, "loss": 0.2395, "step": 8102 }, { "epoch": 3.7675499767549976, "grad_norm": 2.435157299041748, "learning_rate": 4.966714860519693e-05, "loss": 0.2434, "step": 8104 }, { "epoch": 3.7684797768479776, "grad_norm": 2.1070330142974854, "learning_rate": 4.9654253405724955e-05, "loss": 0.158, "step": 8106 }, { "epoch": 3.7694095769409577, "grad_norm": 3.4382708072662354, "learning_rate": 4.964111487872518e-05, "loss": 0.2587, "step": 8108 }, { "epoch": 3.7703393770339377, "grad_norm": 4.414654731750488, "learning_rate": 4.9627733153869575e-05, "loss": 0.2727, "step": 8110 }, { "epoch": 3.771269177126918, "grad_norm": 2.753021717071533, "learning_rate": 4.9614108363230366e-05, "loss": 0.2199, "step": 8112 }, { "epoch": 3.772198977219898, "grad_norm": 1.8667055368423462, "learning_rate": 4.9600240641278727e-05, "loss": 0.2082, "step": 8114 }, { "epoch": 3.773128777312878, "grad_norm": 3.143601655960083, "learning_rate": 4.958613012488348e-05, "loss": 0.2397, "step": 8116 }, { "epoch": 3.7740585774058575, "grad_norm": 2.872163772583008, "learning_rate": 4.95717769533097e-05, "loss": 0.2739, "step": 8118 }, { "epoch": 3.774988377498838, "grad_norm": 3.464322566986084, "learning_rate": 4.955718126821744e-05, "loss": 0.3014, "step": 8120 }, { "epoch": 3.7759181775918176, "grad_norm": 4.2420172691345215, "learning_rate": 4.954234321366021e-05, "loss": 0.2873, "step": 8122 }, { "epoch": 3.7768479776847976, "grad_norm": 2.7194104194641113, "learning_rate": 4.9527262936083586e-05, "loss": 0.2438, "step": 8124 }, { "epoch": 3.7777777777777777, "grad_norm": 2.8952724933624268, "learning_rate": 4.951194058432385e-05, "loss": 0.3095, "step": 8126 }, { "epoch": 3.7787075778707577, "grad_norm": 3.45328688621521, "learning_rate": 4.94963763096064e-05, "loss": 0.2744, "step": 8128 }, { "epoch": 3.7796373779637378, "grad_norm": 3.5090270042419434, "learning_rate": 4.948057026554437e-05, "loss": 0.2639, "step": 8130 }, { "epoch": 3.780567178056718, "grad_norm": 2.643578290939331, "learning_rate": 4.9464522608137036e-05, "loss": 0.2364, "step": 8132 }, { "epoch": 3.781496978149698, "grad_norm": 3.15549635887146, "learning_rate": 4.944823349576828e-05, "loss": 0.2542, "step": 8134 }, { "epoch": 3.782426778242678, "grad_norm": 3.2814691066741943, "learning_rate": 4.943170308920507e-05, "loss": 0.3017, "step": 8136 }, { "epoch": 3.783356578335658, "grad_norm": 2.8161611557006836, "learning_rate": 4.941493155159584e-05, "loss": 0.2581, "step": 8138 }, { "epoch": 3.7842863784286376, "grad_norm": 3.1337621212005615, "learning_rate": 4.9397919048468915e-05, "loss": 0.2781, "step": 8140 }, { "epoch": 3.785216178521618, "grad_norm": 2.440497398376465, "learning_rate": 4.938066574773082e-05, "loss": 0.1868, "step": 8142 }, { "epoch": 3.7861459786145977, "grad_norm": 2.7108354568481445, "learning_rate": 4.9363171819664665e-05, "loss": 0.2659, "step": 8144 }, { "epoch": 3.7870757787075777, "grad_norm": 3.8851375579833984, "learning_rate": 4.934543743692845e-05, "loss": 0.2956, "step": 8146 }, { "epoch": 3.7880055788005578, "grad_norm": 2.9010705947875977, "learning_rate": 4.9327462774553396e-05, "loss": 0.2607, "step": 8148 }, { "epoch": 3.788935378893538, "grad_norm": 3.6058075428009033, "learning_rate": 4.930924800994215e-05, "loss": 0.2221, "step": 8150 }, { "epoch": 3.789865178986518, "grad_norm": 3.0704843997955322, "learning_rate": 4.9290793322867065e-05, "loss": 0.2792, "step": 8152 }, { "epoch": 3.790794979079498, "grad_norm": 3.390058994293213, "learning_rate": 4.927209889546851e-05, "loss": 0.3076, "step": 8154 }, { "epoch": 3.791724779172478, "grad_norm": 2.8959834575653076, "learning_rate": 4.9253164912252885e-05, "loss": 0.2431, "step": 8156 }, { "epoch": 3.792654579265458, "grad_norm": 2.7071309089660645, "learning_rate": 4.923399156009096e-05, "loss": 0.2008, "step": 8158 }, { "epoch": 3.793584379358438, "grad_norm": 3.1507017612457275, "learning_rate": 4.921457902821601e-05, "loss": 0.2276, "step": 8160 }, { "epoch": 3.7945141794514177, "grad_norm": 4.353388786315918, "learning_rate": 4.9194927508221875e-05, "loss": 0.321, "step": 8162 }, { "epoch": 3.795443979544398, "grad_norm": 2.448948621749878, "learning_rate": 4.917503719406111e-05, "loss": 0.2363, "step": 8164 }, { "epoch": 3.7963737796373778, "grad_norm": 2.532958507537842, "learning_rate": 4.915490828204311e-05, "loss": 0.1542, "step": 8166 }, { "epoch": 3.797303579730358, "grad_norm": 3.088200092315674, "learning_rate": 4.913454097083208e-05, "loss": 0.2962, "step": 8168 }, { "epoch": 3.798233379823338, "grad_norm": 2.6031711101531982, "learning_rate": 4.9113935461445186e-05, "loss": 0.2844, "step": 8170 }, { "epoch": 3.799163179916318, "grad_norm": 3.6954784393310547, "learning_rate": 4.909309195725047e-05, "loss": 0.2386, "step": 8172 }, { "epoch": 3.800092980009298, "grad_norm": 3.119452476501465, "learning_rate": 4.907201066396492e-05, "loss": 0.3009, "step": 8174 }, { "epoch": 3.801022780102278, "grad_norm": 2.7031643390655518, "learning_rate": 4.9050691789652384e-05, "loss": 0.2088, "step": 8176 }, { "epoch": 3.801952580195258, "grad_norm": 2.738098382949829, "learning_rate": 4.902913554472153e-05, "loss": 0.2018, "step": 8178 }, { "epoch": 3.802882380288238, "grad_norm": 2.6290903091430664, "learning_rate": 4.9007342141923815e-05, "loss": 0.2409, "step": 8180 }, { "epoch": 3.803812180381218, "grad_norm": 2.8420073986053467, "learning_rate": 4.89853117963513e-05, "loss": 0.2688, "step": 8182 }, { "epoch": 3.804741980474198, "grad_norm": 2.7122297286987305, "learning_rate": 4.896304472543463e-05, "loss": 0.2924, "step": 8184 }, { "epoch": 3.8056717805671783, "grad_norm": 3.312624931335449, "learning_rate": 4.894054114894078e-05, "loss": 0.2654, "step": 8186 }, { "epoch": 3.806601580660158, "grad_norm": 2.9934282302856445, "learning_rate": 4.8917801288971e-05, "loss": 0.2364, "step": 8188 }, { "epoch": 3.8075313807531384, "grad_norm": 2.0087320804595947, "learning_rate": 4.8894825369958485e-05, "loss": 0.1812, "step": 8190 }, { "epoch": 3.808461180846118, "grad_norm": 2.8301870822906494, "learning_rate": 4.8871613618666295e-05, "loss": 0.2128, "step": 8192 }, { "epoch": 3.809390980939098, "grad_norm": 3.1256184577941895, "learning_rate": 4.884816626418507e-05, "loss": 0.2934, "step": 8194 }, { "epoch": 3.810320781032078, "grad_norm": 2.4052138328552246, "learning_rate": 4.88244835379307e-05, "loss": 0.2563, "step": 8196 }, { "epoch": 3.811250581125058, "grad_norm": 2.6901779174804688, "learning_rate": 4.880056567364216e-05, "loss": 0.1724, "step": 8198 }, { "epoch": 3.812180381218038, "grad_norm": 3.1927080154418945, "learning_rate": 4.8776412907379066e-05, "loss": 0.2194, "step": 8200 }, { "epoch": 3.813110181311018, "grad_norm": 2.6374762058258057, "learning_rate": 4.8752025477519525e-05, "loss": 0.1932, "step": 8202 }, { "epoch": 3.8140399814039982, "grad_norm": 2.8907968997955322, "learning_rate": 4.8727403624757596e-05, "loss": 0.2806, "step": 8204 }, { "epoch": 3.8149697814969783, "grad_norm": 1.7876404523849487, "learning_rate": 4.870254759210102e-05, "loss": 0.1575, "step": 8206 }, { "epoch": 3.8158995815899583, "grad_norm": 2.4112133979797363, "learning_rate": 4.867745762486883e-05, "loss": 0.2025, "step": 8208 }, { "epoch": 3.816829381682938, "grad_norm": 2.6900136470794678, "learning_rate": 4.8652133970688866e-05, "loss": 0.2465, "step": 8210 }, { "epoch": 3.8177591817759184, "grad_norm": 2.7659549713134766, "learning_rate": 4.862657687949535e-05, "loss": 0.2313, "step": 8212 }, { "epoch": 3.818688981868898, "grad_norm": 3.0211691856384277, "learning_rate": 4.860078660352647e-05, "loss": 0.2081, "step": 8214 }, { "epoch": 3.819618781961878, "grad_norm": 2.5793182849884033, "learning_rate": 4.857476339732183e-05, "loss": 0.2759, "step": 8216 }, { "epoch": 3.820548582054858, "grad_norm": 3.15354585647583, "learning_rate": 4.8548507517719985e-05, "loss": 0.2385, "step": 8218 }, { "epoch": 3.821478382147838, "grad_norm": 3.893343210220337, "learning_rate": 4.852201922385586e-05, "loss": 0.2716, "step": 8220 }, { "epoch": 3.8224081822408182, "grad_norm": 3.1675844192504883, "learning_rate": 4.849529877715822e-05, "loss": 0.2551, "step": 8222 }, { "epoch": 3.8233379823337983, "grad_norm": 3.0484158992767334, "learning_rate": 4.846834644134708e-05, "loss": 0.2578, "step": 8224 }, { "epoch": 3.8242677824267783, "grad_norm": 2.362525463104248, "learning_rate": 4.84411624824311e-05, "loss": 0.2047, "step": 8226 }, { "epoch": 3.8251975825197584, "grad_norm": 2.7241673469543457, "learning_rate": 4.8413747168705025e-05, "loss": 0.2219, "step": 8228 }, { "epoch": 3.8261273826127384, "grad_norm": 2.840646982192993, "learning_rate": 4.83861007707469e-05, "loss": 0.1967, "step": 8230 }, { "epoch": 3.827057182705718, "grad_norm": 2.2045156955718994, "learning_rate": 4.8358223561415535e-05, "loss": 0.2055, "step": 8232 }, { "epoch": 3.8279869827986985, "grad_norm": 2.672792673110962, "learning_rate": 4.833011581584769e-05, "loss": 0.2323, "step": 8234 }, { "epoch": 3.828916782891678, "grad_norm": 2.282832384109497, "learning_rate": 4.8301777811455506e-05, "loss": 0.2215, "step": 8236 }, { "epoch": 3.829846582984658, "grad_norm": 2.859915018081665, "learning_rate": 4.827320982792362e-05, "loss": 0.2316, "step": 8238 }, { "epoch": 3.8307763830776382, "grad_norm": 2.9785945415496826, "learning_rate": 4.8244412147206494e-05, "loss": 0.2353, "step": 8240 }, { "epoch": 3.8317061831706183, "grad_norm": 3.2604329586029053, "learning_rate": 4.8215385053525663e-05, "loss": 0.218, "step": 8242 }, { "epoch": 3.8326359832635983, "grad_norm": 2.189638137817383, "learning_rate": 4.818612883336677e-05, "loss": 0.1784, "step": 8244 }, { "epoch": 3.8335657833565784, "grad_norm": 2.8637804985046387, "learning_rate": 4.81566437754769e-05, "loss": 0.2113, "step": 8246 }, { "epoch": 3.8344955834495584, "grad_norm": 2.5387775897979736, "learning_rate": 4.812693017086166e-05, "loss": 0.1981, "step": 8248 }, { "epoch": 3.8354253835425385, "grad_norm": 2.834033727645874, "learning_rate": 4.8096988312782384e-05, "loss": 0.2511, "step": 8250 }, { "epoch": 3.8363551836355185, "grad_norm": 3.0306236743927, "learning_rate": 4.806681849675309e-05, "loss": 0.2035, "step": 8252 }, { "epoch": 3.837284983728498, "grad_norm": 2.0712809562683105, "learning_rate": 4.8036421020537687e-05, "loss": 0.1728, "step": 8254 }, { "epoch": 3.8382147838214786, "grad_norm": 2.7843847274780273, "learning_rate": 4.8005796184146985e-05, "loss": 0.2179, "step": 8256 }, { "epoch": 3.8391445839144582, "grad_norm": 3.5398292541503906, "learning_rate": 4.797494428983576e-05, "loss": 0.2746, "step": 8258 }, { "epoch": 3.8400743840074383, "grad_norm": 2.2988762855529785, "learning_rate": 4.794386564209976e-05, "loss": 0.2208, "step": 8260 }, { "epoch": 3.8410041841004183, "grad_norm": 2.458406448364258, "learning_rate": 4.791256054767267e-05, "loss": 0.2542, "step": 8262 }, { "epoch": 3.8419339841933984, "grad_norm": 2.704108238220215, "learning_rate": 4.788102931552318e-05, "loss": 0.1936, "step": 8264 }, { "epoch": 3.8428637842863784, "grad_norm": 2.1414551734924316, "learning_rate": 4.784927225685178e-05, "loss": 0.2304, "step": 8266 }, { "epoch": 3.8437935843793585, "grad_norm": 1.9662048816680908, "learning_rate": 4.78172896850878e-05, "loss": 0.1861, "step": 8268 }, { "epoch": 3.8447233844723385, "grad_norm": 2.3924639225006104, "learning_rate": 4.778508191588635e-05, "loss": 0.2247, "step": 8270 }, { "epoch": 3.8456531845653186, "grad_norm": 2.1860909461975098, "learning_rate": 4.7752649267125116e-05, "loss": 0.1859, "step": 8272 }, { "epoch": 3.8465829846582986, "grad_norm": 2.597364664077759, "learning_rate": 4.771999205890121e-05, "loss": 0.2278, "step": 8274 }, { "epoch": 3.8475127847512782, "grad_norm": 2.4710965156555176, "learning_rate": 4.768711061352815e-05, "loss": 0.1975, "step": 8276 }, { "epoch": 3.8484425848442587, "grad_norm": 2.3354978561401367, "learning_rate": 4.765400525553247e-05, "loss": 0.2155, "step": 8278 }, { "epoch": 3.8493723849372383, "grad_norm": 3.01297664642334, "learning_rate": 4.762067631165071e-05, "loss": 0.2543, "step": 8280 }, { "epoch": 3.8503021850302184, "grad_norm": 1.9095052480697632, "learning_rate": 4.758712411082608e-05, "loss": 0.2103, "step": 8282 }, { "epoch": 3.8512319851231984, "grad_norm": 2.8993711471557617, "learning_rate": 4.755334898420527e-05, "loss": 0.181, "step": 8284 }, { "epoch": 3.8521617852161785, "grad_norm": 2.45082950592041, "learning_rate": 4.7519351265135174e-05, "loss": 0.2433, "step": 8286 }, { "epoch": 3.8530915853091585, "grad_norm": 2.889491558074951, "learning_rate": 4.7485131289159486e-05, "loss": 0.2624, "step": 8288 }, { "epoch": 3.8540213854021386, "grad_norm": 2.6434824466705322, "learning_rate": 4.745068939401561e-05, "loss": 0.2119, "step": 8290 }, { "epoch": 3.8549511854951186, "grad_norm": 1.834343433380127, "learning_rate": 4.741602591963111e-05, "loss": 0.1734, "step": 8292 }, { "epoch": 3.8558809855880987, "grad_norm": 2.1618881225585938, "learning_rate": 4.7381141208120506e-05, "loss": 0.2322, "step": 8294 }, { "epoch": 3.8568107856810787, "grad_norm": 3.5825581550598145, "learning_rate": 4.7346035603781794e-05, "loss": 0.2465, "step": 8296 }, { "epoch": 3.8577405857740583, "grad_norm": 2.378101110458374, "learning_rate": 4.731070945309316e-05, "loss": 0.1835, "step": 8298 }, { "epoch": 3.858670385867039, "grad_norm": 2.0412776470184326, "learning_rate": 4.7275163104709416e-05, "loss": 0.2188, "step": 8300 }, { "epoch": 3.8596001859600184, "grad_norm": 2.8416526317596436, "learning_rate": 4.7239396909458655e-05, "loss": 0.2593, "step": 8302 }, { "epoch": 3.8605299860529985, "grad_norm": 3.7021570205688477, "learning_rate": 4.720341122033882e-05, "loss": 0.249, "step": 8304 }, { "epoch": 3.8614597861459785, "grad_norm": 2.536156415939331, "learning_rate": 4.716720639251413e-05, "loss": 0.2205, "step": 8306 }, { "epoch": 3.8623895862389586, "grad_norm": 2.9699113368988037, "learning_rate": 4.713078278331162e-05, "loss": 0.2353, "step": 8308 }, { "epoch": 3.8633193863319386, "grad_norm": 2.9944353103637695, "learning_rate": 4.7094140752217546e-05, "loss": 0.3019, "step": 8310 }, { "epoch": 3.8642491864249187, "grad_norm": 2.5622775554656982, "learning_rate": 4.7057280660874045e-05, "loss": 0.2185, "step": 8312 }, { "epoch": 3.8651789865178987, "grad_norm": 2.9187562465667725, "learning_rate": 4.702020287307531e-05, "loss": 0.2512, "step": 8314 }, { "epoch": 3.8661087866108788, "grad_norm": 1.8735779523849487, "learning_rate": 4.6982907754764116e-05, "loss": 0.2216, "step": 8316 }, { "epoch": 3.867038586703859, "grad_norm": 1.859209656715393, "learning_rate": 4.694539567402825e-05, "loss": 0.1703, "step": 8318 }, { "epoch": 3.8679683867968384, "grad_norm": 2.8990399837493896, "learning_rate": 4.690766700109682e-05, "loss": 0.2105, "step": 8320 }, { "epoch": 3.868898186889819, "grad_norm": 2.7220051288604736, "learning_rate": 4.6869722108336533e-05, "loss": 0.2626, "step": 8322 }, { "epoch": 3.8698279869827985, "grad_norm": 2.7200429439544678, "learning_rate": 4.683156137024822e-05, "loss": 0.2154, "step": 8324 }, { "epoch": 3.8707577870757786, "grad_norm": 2.9183402061462402, "learning_rate": 4.679318516346293e-05, "loss": 0.2451, "step": 8326 }, { "epoch": 3.8716875871687586, "grad_norm": 2.8810296058654785, "learning_rate": 4.675459386673836e-05, "loss": 0.1826, "step": 8328 }, { "epoch": 3.8726173872617387, "grad_norm": 2.405285120010376, "learning_rate": 4.6715787860955006e-05, "loss": 0.1529, "step": 8330 }, { "epoch": 3.8735471873547187, "grad_norm": 2.6916592121124268, "learning_rate": 4.667676752911247e-05, "loss": 0.2309, "step": 8332 }, { "epoch": 3.8744769874476988, "grad_norm": 2.495647668838501, "learning_rate": 4.663753325632571e-05, "loss": 0.2181, "step": 8334 }, { "epoch": 3.875406787540679, "grad_norm": 3.0750770568847656, "learning_rate": 4.65980854298211e-05, "loss": 0.1898, "step": 8336 }, { "epoch": 3.876336587633659, "grad_norm": 2.121208429336548, "learning_rate": 4.655842443893282e-05, "loss": 0.1657, "step": 8338 }, { "epoch": 3.877266387726639, "grad_norm": 2.535130739212036, "learning_rate": 4.6518550675098814e-05, "loss": 0.1814, "step": 8340 }, { "epoch": 3.878196187819619, "grad_norm": 2.0544960498809814, "learning_rate": 4.647846453185706e-05, "loss": 0.1486, "step": 8342 }, { "epoch": 3.879125987912599, "grad_norm": 2.390524387359619, "learning_rate": 4.643816640484153e-05, "loss": 0.2347, "step": 8344 }, { "epoch": 3.8800557880055786, "grad_norm": 2.827871322631836, "learning_rate": 4.6397656691778555e-05, "loss": 0.1951, "step": 8346 }, { "epoch": 3.8809855880985586, "grad_norm": 2.0524678230285645, "learning_rate": 4.6356935792482615e-05, "loss": 0.1951, "step": 8348 }, { "epoch": 3.8819153881915387, "grad_norm": 2.388744831085205, "learning_rate": 4.6316004108852515e-05, "loss": 0.206, "step": 8350 }, { "epoch": 3.8828451882845187, "grad_norm": 3.0331225395202637, "learning_rate": 4.6274862044867534e-05, "loss": 0.2073, "step": 8352 }, { "epoch": 3.883774988377499, "grad_norm": 2.9405741691589355, "learning_rate": 4.623351000658316e-05, "loss": 0.2414, "step": 8354 }, { "epoch": 3.884704788470479, "grad_norm": 1.8240925073623657, "learning_rate": 4.6191948402127325e-05, "loss": 0.1762, "step": 8356 }, { "epoch": 3.885634588563459, "grad_norm": 2.9777369499206543, "learning_rate": 4.615017764169628e-05, "loss": 0.32, "step": 8358 }, { "epoch": 3.886564388656439, "grad_norm": 2.5619659423828125, "learning_rate": 4.61081981375506e-05, "loss": 0.2497, "step": 8360 }, { "epoch": 3.887494188749419, "grad_norm": 2.8551290035247803, "learning_rate": 4.606601030401103e-05, "loss": 0.1925, "step": 8362 }, { "epoch": 3.888423988842399, "grad_norm": 2.509394884109497, "learning_rate": 4.6023614557454455e-05, "loss": 0.1588, "step": 8364 }, { "epoch": 3.889353788935379, "grad_norm": 3.258483409881592, "learning_rate": 4.5981011316309766e-05, "loss": 0.2081, "step": 8366 }, { "epoch": 3.8902835890283587, "grad_norm": 3.24672532081604, "learning_rate": 4.593820100105377e-05, "loss": 0.1977, "step": 8368 }, { "epoch": 3.891213389121339, "grad_norm": 3.229588270187378, "learning_rate": 4.589518403420696e-05, "loss": 0.2762, "step": 8370 }, { "epoch": 3.892143189214319, "grad_norm": 2.1287591457366943, "learning_rate": 4.585196084032949e-05, "loss": 0.1782, "step": 8372 }, { "epoch": 3.893072989307299, "grad_norm": 3.231313943862915, "learning_rate": 4.580853184601683e-05, "loss": 0.2735, "step": 8374 }, { "epoch": 3.894002789400279, "grad_norm": 2.2594783306121826, "learning_rate": 4.576489747989556e-05, "loss": 0.2157, "step": 8376 }, { "epoch": 3.894932589493259, "grad_norm": 2.6132869720458984, "learning_rate": 4.5721058172619274e-05, "loss": 0.1979, "step": 8378 }, { "epoch": 3.895862389586239, "grad_norm": 2.7089269161224365, "learning_rate": 4.567701435686428e-05, "loss": 0.229, "step": 8380 }, { "epoch": 3.896792189679219, "grad_norm": 2.754486083984375, "learning_rate": 4.563276646732522e-05, "loss": 0.1851, "step": 8382 }, { "epoch": 3.897721989772199, "grad_norm": 2.882051467895508, "learning_rate": 4.55883149407109e-05, "loss": 0.2364, "step": 8384 }, { "epoch": 3.898651789865179, "grad_norm": 2.644434928894043, "learning_rate": 4.5543660215739996e-05, "loss": 0.2456, "step": 8386 }, { "epoch": 3.899581589958159, "grad_norm": 3.0474047660827637, "learning_rate": 4.549880273313655e-05, "loss": 0.2077, "step": 8388 }, { "epoch": 3.900511390051139, "grad_norm": 2.504011392593384, "learning_rate": 4.5453742935625834e-05, "loss": 0.2674, "step": 8390 }, { "epoch": 3.9014411901441193, "grad_norm": 3.2036054134368896, "learning_rate": 4.540848126792982e-05, "loss": 0.2093, "step": 8392 }, { "epoch": 3.902370990237099, "grad_norm": 2.7996487617492676, "learning_rate": 4.536301817676296e-05, "loss": 0.2005, "step": 8394 }, { "epoch": 3.903300790330079, "grad_norm": 2.760298013687134, "learning_rate": 4.5317354110827595e-05, "loss": 0.2186, "step": 8396 }, { "epoch": 3.904230590423059, "grad_norm": 2.5152859687805176, "learning_rate": 4.527148952080955e-05, "loss": 0.1635, "step": 8398 }, { "epoch": 3.905160390516039, "grad_norm": 2.612818956375122, "learning_rate": 4.52254248593739e-05, "loss": 0.2149, "step": 8400 }, { "epoch": 3.906090190609019, "grad_norm": 2.161641836166382, "learning_rate": 4.517916058116022e-05, "loss": 0.1837, "step": 8402 }, { "epoch": 3.907019990701999, "grad_norm": 3.2393383979797363, "learning_rate": 4.5132697142778266e-05, "loss": 0.2079, "step": 8404 }, { "epoch": 3.907949790794979, "grad_norm": 3.3584046363830566, "learning_rate": 4.508603500280339e-05, "loss": 0.2082, "step": 8406 }, { "epoch": 3.908879590887959, "grad_norm": 2.4305264949798584, "learning_rate": 4.503917462177214e-05, "loss": 0.1784, "step": 8408 }, { "epoch": 3.9098093909809393, "grad_norm": 3.7539126873016357, "learning_rate": 4.49921164621775e-05, "loss": 0.2655, "step": 8410 }, { "epoch": 3.910739191073919, "grad_norm": 3.643847703933716, "learning_rate": 4.494486098846448e-05, "loss": 0.2889, "step": 8412 }, { "epoch": 3.9116689911668994, "grad_norm": 2.564301013946533, "learning_rate": 4.48974086670256e-05, "loss": 0.1734, "step": 8414 }, { "epoch": 3.912598791259879, "grad_norm": 2.805377244949341, "learning_rate": 4.484975996619609e-05, "loss": 0.2309, "step": 8416 }, { "epoch": 3.913528591352859, "grad_norm": 2.4102346897125244, "learning_rate": 4.480191535624938e-05, "loss": 0.2024, "step": 8418 }, { "epoch": 3.914458391445839, "grad_norm": 2.4354000091552734, "learning_rate": 4.475387530939247e-05, "loss": 0.1915, "step": 8420 }, { "epoch": 3.915388191538819, "grad_norm": 1.808992624282837, "learning_rate": 4.470564029976122e-05, "loss": 0.1385, "step": 8422 }, { "epoch": 3.916317991631799, "grad_norm": 2.8585901260375977, "learning_rate": 4.4657210803415694e-05, "loss": 0.2463, "step": 8424 }, { "epoch": 3.917247791724779, "grad_norm": 2.997735023498535, "learning_rate": 4.460858729833545e-05, "loss": 0.2503, "step": 8426 }, { "epoch": 3.9181775918177593, "grad_norm": 2.9814720153808594, "learning_rate": 4.4559770264414905e-05, "loss": 0.2455, "step": 8428 }, { "epoch": 3.9191073919107393, "grad_norm": 2.245917320251465, "learning_rate": 4.451076018345848e-05, "loss": 0.1841, "step": 8430 }, { "epoch": 3.9200371920037194, "grad_norm": 2.743295431137085, "learning_rate": 4.44615575391758e-05, "loss": 0.1796, "step": 8432 }, { "epoch": 3.920966992096699, "grad_norm": 2.2985739707946777, "learning_rate": 4.441216281717718e-05, "loss": 0.2069, "step": 8434 }, { "epoch": 3.9218967921896795, "grad_norm": 2.5192644596099854, "learning_rate": 4.436257650496857e-05, "loss": 0.2839, "step": 8436 }, { "epoch": 3.922826592282659, "grad_norm": 2.6360092163085938, "learning_rate": 4.4312799091946833e-05, "loss": 0.1948, "step": 8438 }, { "epoch": 3.923756392375639, "grad_norm": 2.087210178375244, "learning_rate": 4.426283106939497e-05, "loss": 0.2553, "step": 8440 }, { "epoch": 3.924686192468619, "grad_norm": 2.1117300987243652, "learning_rate": 4.421267293047715e-05, "loss": 0.2251, "step": 8442 }, { "epoch": 3.925615992561599, "grad_norm": 2.3051741123199463, "learning_rate": 4.416232517023399e-05, "loss": 0.1737, "step": 8444 }, { "epoch": 3.9265457926545793, "grad_norm": 2.4551239013671875, "learning_rate": 4.411178828557751e-05, "loss": 0.2127, "step": 8446 }, { "epoch": 3.9274755927475593, "grad_norm": 2.7145121097564697, "learning_rate": 4.406106277528642e-05, "loss": 0.2487, "step": 8448 }, { "epoch": 3.9284053928405394, "grad_norm": 2.472622871398926, "learning_rate": 4.4010149140000995e-05, "loss": 0.2286, "step": 8450 }, { "epoch": 3.9293351929335194, "grad_norm": 3.011075019836426, "learning_rate": 4.39590478822183e-05, "loss": 0.2164, "step": 8452 }, { "epoch": 3.9302649930264995, "grad_norm": 1.664389967918396, "learning_rate": 4.390775950628702e-05, "loss": 0.1982, "step": 8454 }, { "epoch": 3.931194793119479, "grad_norm": 1.9632017612457275, "learning_rate": 4.3856284518402824e-05, "loss": 0.1427, "step": 8456 }, { "epoch": 3.9321245932124596, "grad_norm": 2.1162192821502686, "learning_rate": 4.380462342660301e-05, "loss": 0.2299, "step": 8458 }, { "epoch": 3.933054393305439, "grad_norm": 2.749530076980591, "learning_rate": 4.3752776740761686e-05, "loss": 0.2165, "step": 8460 }, { "epoch": 3.933984193398419, "grad_norm": 3.431551456451416, "learning_rate": 4.370074497258479e-05, "loss": 0.2837, "step": 8462 }, { "epoch": 3.9349139934913993, "grad_norm": 2.5480902194976807, "learning_rate": 4.3648528635604786e-05, "loss": 0.1829, "step": 8464 }, { "epoch": 3.9358437935843793, "grad_norm": 2.746234178543091, "learning_rate": 4.359612824517583e-05, "loss": 0.2005, "step": 8466 }, { "epoch": 3.9367735936773594, "grad_norm": 3.205883502960205, "learning_rate": 4.354354431846869e-05, "loss": 0.2503, "step": 8468 }, { "epoch": 3.9377033937703394, "grad_norm": 2.797685384750366, "learning_rate": 4.349077737446546e-05, "loss": 0.2391, "step": 8470 }, { "epoch": 3.9386331938633194, "grad_norm": 2.8414721488952637, "learning_rate": 4.3437827933954553e-05, "loss": 0.2216, "step": 8472 }, { "epoch": 3.9395629939562995, "grad_norm": 2.5119800567626953, "learning_rate": 4.3384696519525606e-05, "loss": 0.1434, "step": 8474 }, { "epoch": 3.9404927940492795, "grad_norm": 2.646545171737671, "learning_rate": 4.3331383655564216e-05, "loss": 0.2516, "step": 8476 }, { "epoch": 3.941422594142259, "grad_norm": 1.7792067527770996, "learning_rate": 4.3277889868246824e-05, "loss": 0.1466, "step": 8478 }, { "epoch": 3.9423523942352396, "grad_norm": 2.830963134765625, "learning_rate": 4.322421568553548e-05, "loss": 0.2272, "step": 8480 }, { "epoch": 3.9432821943282192, "grad_norm": 3.2019808292388916, "learning_rate": 4.317036163717275e-05, "loss": 0.2701, "step": 8482 }, { "epoch": 3.9442119944211993, "grad_norm": 2.712547779083252, "learning_rate": 4.311632825467639e-05, "loss": 0.2239, "step": 8484 }, { "epoch": 3.9451417945141793, "grad_norm": 2.120455503463745, "learning_rate": 4.306211607133397e-05, "loss": 0.2263, "step": 8486 }, { "epoch": 3.9460715946071594, "grad_norm": 2.5154736042022705, "learning_rate": 4.300772562219787e-05, "loss": 0.2084, "step": 8488 }, { "epoch": 3.9470013947001394, "grad_norm": 2.7215237617492676, "learning_rate": 4.2953157444079916e-05, "loss": 0.2282, "step": 8490 }, { "epoch": 3.9479311947931195, "grad_norm": 1.8944098949432373, "learning_rate": 4.289841207554598e-05, "loss": 0.1604, "step": 8492 }, { "epoch": 3.9488609948860995, "grad_norm": 1.8433828353881836, "learning_rate": 4.2843490056910704e-05, "loss": 0.1651, "step": 8494 }, { "epoch": 3.9497907949790796, "grad_norm": 2.5410048961639404, "learning_rate": 4.278839193023233e-05, "loss": 0.1533, "step": 8496 }, { "epoch": 3.9507205950720596, "grad_norm": 1.6937899589538574, "learning_rate": 4.273311823930705e-05, "loss": 0.1388, "step": 8498 }, { "epoch": 3.9516503951650392, "grad_norm": 3.1855337619781494, "learning_rate": 4.267766952966389e-05, "loss": 0.1939, "step": 8500 }, { "epoch": 3.9525801952580197, "grad_norm": 2.5961058139801025, "learning_rate": 4.262204634855921e-05, "loss": 0.2144, "step": 8502 }, { "epoch": 3.9535099953509993, "grad_norm": 2.8282582759857178, "learning_rate": 4.2566249244971415e-05, "loss": 0.2228, "step": 8504 }, { "epoch": 3.9544397954439794, "grad_norm": 3.3933703899383545, "learning_rate": 4.251027876959538e-05, "loss": 0.2725, "step": 8506 }, { "epoch": 3.9553695955369594, "grad_norm": 2.6198999881744385, "learning_rate": 4.245413547483701e-05, "loss": 0.1658, "step": 8508 }, { "epoch": 3.9562993956299395, "grad_norm": 2.700222969055176, "learning_rate": 4.239781991480805e-05, "loss": 0.1885, "step": 8510 }, { "epoch": 3.9572291957229195, "grad_norm": 2.546473264694214, "learning_rate": 4.234133264532032e-05, "loss": 0.1632, "step": 8512 }, { "epoch": 3.9581589958158996, "grad_norm": 3.14447021484375, "learning_rate": 4.228467422388033e-05, "loss": 0.2105, "step": 8514 }, { "epoch": 3.9590887959088796, "grad_norm": 3.15266752243042, "learning_rate": 4.2227845209683886e-05, "loss": 0.2518, "step": 8516 }, { "epoch": 3.9600185960018597, "grad_norm": 2.374213933944702, "learning_rate": 4.2170846163610416e-05, "loss": 0.1761, "step": 8518 }, { "epoch": 3.9609483960948397, "grad_norm": 2.3704278469085693, "learning_rate": 4.2113677648217435e-05, "loss": 0.199, "step": 8520 }, { "epoch": 3.96187819618782, "grad_norm": 2.7927823066711426, "learning_rate": 4.20563402277351e-05, "loss": 0.2097, "step": 8522 }, { "epoch": 3.9628079962808, "grad_norm": 2.6909492015838623, "learning_rate": 4.199883446806068e-05, "loss": 0.2196, "step": 8524 }, { "epoch": 3.9637377963737794, "grad_norm": 2.313643217086792, "learning_rate": 4.194116093675275e-05, "loss": 0.1469, "step": 8526 }, { "epoch": 3.9646675964667595, "grad_norm": 2.8027071952819824, "learning_rate": 4.18833202030258e-05, "loss": 0.1907, "step": 8528 }, { "epoch": 3.9655973965597395, "grad_norm": 2.28627347946167, "learning_rate": 4.182531283774453e-05, "loss": 0.1962, "step": 8530 }, { "epoch": 3.9665271966527196, "grad_norm": 2.027620792388916, "learning_rate": 4.176713941341821e-05, "loss": 0.1977, "step": 8532 }, { "epoch": 3.9674569967456996, "grad_norm": 2.425851345062256, "learning_rate": 4.1708800504195037e-05, "loss": 0.2481, "step": 8534 }, { "epoch": 3.9683867968386797, "grad_norm": 2.655757427215576, "learning_rate": 4.165029668585647e-05, "loss": 0.1903, "step": 8536 }, { "epoch": 3.9693165969316597, "grad_norm": 1.8428852558135986, "learning_rate": 4.159162853581165e-05, "loss": 0.129, "step": 8538 }, { "epoch": 3.9702463970246398, "grad_norm": 2.6966781616210938, "learning_rate": 4.153279663309152e-05, "loss": 0.2208, "step": 8540 }, { "epoch": 3.97117619711762, "grad_norm": 2.58829665184021, "learning_rate": 4.147380155834312e-05, "loss": 0.2169, "step": 8542 }, { "epoch": 3.9721059972106, "grad_norm": 2.2883591651916504, "learning_rate": 4.141464389382412e-05, "loss": 0.165, "step": 8544 }, { "epoch": 3.97303579730358, "grad_norm": 2.1577186584472656, "learning_rate": 4.135532422339673e-05, "loss": 0.2145, "step": 8546 }, { "epoch": 3.9739655973965595, "grad_norm": 2.730070114135742, "learning_rate": 4.129584313252218e-05, "loss": 0.2399, "step": 8548 }, { "epoch": 3.97489539748954, "grad_norm": 2.235725164413452, "learning_rate": 4.1236201208254806e-05, "loss": 0.1999, "step": 8550 }, { "epoch": 3.9758251975825196, "grad_norm": 2.6411521434783936, "learning_rate": 4.117639903923633e-05, "loss": 0.2027, "step": 8552 }, { "epoch": 3.9767549976754997, "grad_norm": 3.1344783306121826, "learning_rate": 4.111643721569001e-05, "loss": 0.2292, "step": 8554 }, { "epoch": 3.9776847977684797, "grad_norm": 2.3142566680908203, "learning_rate": 4.10563163294148e-05, "loss": 0.2396, "step": 8556 }, { "epoch": 3.9786145978614598, "grad_norm": 2.243251085281372, "learning_rate": 4.0996036973779654e-05, "loss": 0.1591, "step": 8558 }, { "epoch": 3.97954439795444, "grad_norm": 2.3057799339294434, "learning_rate": 4.093559974371744e-05, "loss": 0.135, "step": 8560 }, { "epoch": 3.98047419804742, "grad_norm": 4.3783183097839355, "learning_rate": 4.087500523571921e-05, "loss": 0.175, "step": 8562 }, { "epoch": 3.9814039981404, "grad_norm": 2.2466962337493896, "learning_rate": 4.081425404782831e-05, "loss": 0.2239, "step": 8564 }, { "epoch": 3.98233379823338, "grad_norm": 2.90397047996521, "learning_rate": 4.075334677963443e-05, "loss": 0.1818, "step": 8566 }, { "epoch": 3.98326359832636, "grad_norm": 2.333691120147705, "learning_rate": 4.069228403226772e-05, "loss": 0.1649, "step": 8568 }, { "epoch": 3.9841933984193396, "grad_norm": 2.250447988510132, "learning_rate": 4.0631066408392794e-05, "loss": 0.1444, "step": 8570 }, { "epoch": 3.98512319851232, "grad_norm": 1.6195836067199707, "learning_rate": 4.0569694512203014e-05, "loss": 0.1261, "step": 8572 }, { "epoch": 3.9860529986052997, "grad_norm": 2.5721726417541504, "learning_rate": 4.050816894941411e-05, "loss": 0.1858, "step": 8574 }, { "epoch": 3.9869827986982798, "grad_norm": 3.087644577026367, "learning_rate": 4.044649032725853e-05, "loss": 0.2036, "step": 8576 }, { "epoch": 3.98791259879126, "grad_norm": 2.3334462642669678, "learning_rate": 4.038465925447946e-05, "loss": 0.229, "step": 8578 }, { "epoch": 3.98884239888424, "grad_norm": 2.788666009902954, "learning_rate": 4.032267634132459e-05, "loss": 0.1942, "step": 8580 }, { "epoch": 3.98977219897722, "grad_norm": 2.6716699600219727, "learning_rate": 4.026054219954024e-05, "loss": 0.1903, "step": 8582 }, { "epoch": 3.9907019990702, "grad_norm": 2.8085978031158447, "learning_rate": 4.0198257442365315e-05, "loss": 0.2402, "step": 8584 }, { "epoch": 3.99163179916318, "grad_norm": 2.9915082454681396, "learning_rate": 4.013582268452523e-05, "loss": 0.1676, "step": 8586 }, { "epoch": 3.99256159925616, "grad_norm": 3.094282627105713, "learning_rate": 4.007323854222581e-05, "loss": 0.2512, "step": 8588 }, { "epoch": 3.99349139934914, "grad_norm": 2.6982288360595703, "learning_rate": 4.0010505633147255e-05, "loss": 0.1613, "step": 8590 }, { "epoch": 3.9944211994421197, "grad_norm": 2.0159807205200195, "learning_rate": 3.994762457643813e-05, "loss": 0.1241, "step": 8592 }, { "epoch": 3.9953509995351, "grad_norm": 2.9604392051696777, "learning_rate": 3.988459599270907e-05, "loss": 0.2525, "step": 8594 }, { "epoch": 3.99628079962808, "grad_norm": 3.1802802085876465, "learning_rate": 3.9821420504026684e-05, "loss": 0.2771, "step": 8596 }, { "epoch": 3.99721059972106, "grad_norm": 2.9706482887268066, "learning_rate": 3.975809873390754e-05, "loss": 0.2077, "step": 8598 }, { "epoch": 3.99814039981404, "grad_norm": 2.584894895553589, "learning_rate": 3.9694631307312e-05, "loss": 0.1901, "step": 8600 }, { "epoch": 3.99907019990702, "grad_norm": 2.4548208713531494, "learning_rate": 3.963101885063793e-05, "loss": 0.2203, "step": 8602 }, { "epoch": 4.0, "grad_norm": 2.3018877506256104, "learning_rate": 3.956726199171455e-05, "loss": 0.2204, "step": 8604 }, { "epoch": 4.00092980009298, "grad_norm": 1.9359424114227295, "learning_rate": 3.950336135979642e-05, "loss": 0.135, "step": 8606 }, { "epoch": 4.00185960018596, "grad_norm": 1.8586690425872803, "learning_rate": 3.943931758555687e-05, "loss": 0.1205, "step": 8608 }, { "epoch": 4.00278940027894, "grad_norm": 2.013913869857788, "learning_rate": 3.937513130108211e-05, "loss": 0.1345, "step": 8610 }, { "epoch": 4.00371920037192, "grad_norm": 2.2094380855560303, "learning_rate": 3.9310803139864924e-05, "loss": 0.1589, "step": 8612 }, { "epoch": 4.0046490004649, "grad_norm": 1.5908727645874023, "learning_rate": 3.924633373679825e-05, "loss": 0.1464, "step": 8614 }, { "epoch": 4.00557880055788, "grad_norm": 2.7369682788848877, "learning_rate": 3.9181723728169106e-05, "loss": 0.1688, "step": 8616 }, { "epoch": 4.00650860065086, "grad_norm": 1.7771931886672974, "learning_rate": 3.911697375165209e-05, "loss": 0.1086, "step": 8618 }, { "epoch": 4.00743840074384, "grad_norm": 1.6295017004013062, "learning_rate": 3.905208444630343e-05, "loss": 0.1356, "step": 8620 }, { "epoch": 4.00836820083682, "grad_norm": 1.688825011253357, "learning_rate": 3.898705645255434e-05, "loss": 0.1074, "step": 8622 }, { "epoch": 4.0092980009298005, "grad_norm": 2.131201982498169, "learning_rate": 3.892189041220483e-05, "loss": 0.1624, "step": 8624 }, { "epoch": 4.01022780102278, "grad_norm": 2.100759267807007, "learning_rate": 3.885658696841749e-05, "loss": 0.1177, "step": 8626 }, { "epoch": 4.01115760111576, "grad_norm": 2.35491681098938, "learning_rate": 3.8791146765710935e-05, "loss": 0.1543, "step": 8628 }, { "epoch": 4.01208740120874, "grad_norm": 2.490833044052124, "learning_rate": 3.8725570449953475e-05, "loss": 0.1863, "step": 8630 }, { "epoch": 4.01301720130172, "grad_norm": 2.1475765705108643, "learning_rate": 3.8659858668356874e-05, "loss": 0.1194, "step": 8632 }, { "epoch": 4.0139470013947, "grad_norm": 2.058671712875366, "learning_rate": 3.8594012069469966e-05, "loss": 0.1489, "step": 8634 }, { "epoch": 4.01487680148768, "grad_norm": 1.789192795753479, "learning_rate": 3.852803130317206e-05, "loss": 0.1385, "step": 8636 }, { "epoch": 4.01580660158066, "grad_norm": 1.448337435722351, "learning_rate": 3.8461917020666674e-05, "loss": 0.1332, "step": 8638 }, { "epoch": 4.01673640167364, "grad_norm": 2.235502004623413, "learning_rate": 3.839566987447509e-05, "loss": 0.2227, "step": 8640 }, { "epoch": 4.0176662017666205, "grad_norm": 1.5890706777572632, "learning_rate": 3.832929051842989e-05, "loss": 0.1347, "step": 8642 }, { "epoch": 4.0185960018596, "grad_norm": 1.7938998937606812, "learning_rate": 3.8262779607668536e-05, "loss": 0.1114, "step": 8644 }, { "epoch": 4.019525801952581, "grad_norm": 2.2123873233795166, "learning_rate": 3.81961377986268e-05, "loss": 0.1589, "step": 8646 }, { "epoch": 4.02045560204556, "grad_norm": 2.0181539058685303, "learning_rate": 3.812936574903254e-05, "loss": 0.1439, "step": 8648 }, { "epoch": 4.02138540213854, "grad_norm": 1.8747661113739014, "learning_rate": 3.806246411789892e-05, "loss": 0.148, "step": 8650 }, { "epoch": 4.02231520223152, "grad_norm": 1.874699592590332, "learning_rate": 3.79954335655179e-05, "loss": 0.1359, "step": 8652 }, { "epoch": 4.0232450023245, "grad_norm": 2.0642099380493164, "learning_rate": 3.792827475345409e-05, "loss": 0.1594, "step": 8654 }, { "epoch": 4.02417480241748, "grad_norm": 1.5725458860397339, "learning_rate": 3.786098834453782e-05, "loss": 0.1443, "step": 8656 }, { "epoch": 4.02510460251046, "grad_norm": 1.95473051071167, "learning_rate": 3.779357500285876e-05, "loss": 0.1098, "step": 8658 }, { "epoch": 4.0260344026034405, "grad_norm": 2.508592128753662, "learning_rate": 3.7726035393759454e-05, "loss": 0.1691, "step": 8660 }, { "epoch": 4.02696420269642, "grad_norm": 2.441331148147583, "learning_rate": 3.765837018382848e-05, "loss": 0.1848, "step": 8662 }, { "epoch": 4.027894002789401, "grad_norm": 1.6050902605056763, "learning_rate": 3.759058004089419e-05, "loss": 0.1395, "step": 8664 }, { "epoch": 4.02882380288238, "grad_norm": 3.1629159450531006, "learning_rate": 3.7522665634017886e-05, "loss": 0.1853, "step": 8666 }, { "epoch": 4.029753602975361, "grad_norm": 2.2717065811157227, "learning_rate": 3.745462763348741e-05, "loss": 0.144, "step": 8668 }, { "epoch": 4.03068340306834, "grad_norm": 2.418461561203003, "learning_rate": 3.7386466710810336e-05, "loss": 0.1518, "step": 8670 }, { "epoch": 4.03161320316132, "grad_norm": 1.659953236579895, "learning_rate": 3.7318183538707443e-05, "loss": 0.1248, "step": 8672 }, { "epoch": 4.0325430032543, "grad_norm": 1.6314901113510132, "learning_rate": 3.7249778791106076e-05, "loss": 0.0932, "step": 8674 }, { "epoch": 4.03347280334728, "grad_norm": 2.923434019088745, "learning_rate": 3.718125314313347e-05, "loss": 0.1816, "step": 8676 }, { "epoch": 4.0344026034402605, "grad_norm": 1.646056890487671, "learning_rate": 3.711260727111012e-05, "loss": 0.0956, "step": 8678 }, { "epoch": 4.03533240353324, "grad_norm": 2.148261070251465, "learning_rate": 3.7043841852543023e-05, "loss": 0.1298, "step": 8680 }, { "epoch": 4.036262203626221, "grad_norm": 1.9827866554260254, "learning_rate": 3.6974957566119214e-05, "loss": 0.1439, "step": 8682 }, { "epoch": 4.0371920037192, "grad_norm": 2.520278215408325, "learning_rate": 3.690595509169867e-05, "loss": 0.1471, "step": 8684 }, { "epoch": 4.038121803812181, "grad_norm": 2.472949504852295, "learning_rate": 3.683683511030796e-05, "loss": 0.1434, "step": 8686 }, { "epoch": 4.03905160390516, "grad_norm": 2.4457592964172363, "learning_rate": 3.676759830413347e-05, "loss": 0.1897, "step": 8688 }, { "epoch": 4.039981403998141, "grad_norm": 1.940828800201416, "learning_rate": 3.66982453565145e-05, "loss": 0.139, "step": 8690 }, { "epoch": 4.04091120409112, "grad_norm": 2.319276809692383, "learning_rate": 3.662877695193662e-05, "loss": 0.1631, "step": 8692 }, { "epoch": 4.0418410041841, "grad_norm": 1.9390219449996948, "learning_rate": 3.6559193776024964e-05, "loss": 0.1334, "step": 8694 }, { "epoch": 4.0427708042770805, "grad_norm": 1.6610161066055298, "learning_rate": 3.6489496515537367e-05, "loss": 0.1038, "step": 8696 }, { "epoch": 4.04370060437006, "grad_norm": 2.3302724361419678, "learning_rate": 3.641968585835766e-05, "loss": 0.1929, "step": 8698 }, { "epoch": 4.044630404463041, "grad_norm": 2.948601722717285, "learning_rate": 3.6349762493488806e-05, "loss": 0.1354, "step": 8700 }, { "epoch": 4.04556020455602, "grad_norm": 2.594327688217163, "learning_rate": 3.627972711104627e-05, "loss": 0.1122, "step": 8702 }, { "epoch": 4.046490004649001, "grad_norm": 1.6393804550170898, "learning_rate": 3.620958040225095e-05, "loss": 0.1685, "step": 8704 }, { "epoch": 4.04741980474198, "grad_norm": 2.079442262649536, "learning_rate": 3.613932305942256e-05, "loss": 0.1327, "step": 8706 }, { "epoch": 4.048349604834961, "grad_norm": 1.645986795425415, "learning_rate": 3.6068955775972683e-05, "loss": 0.1041, "step": 8708 }, { "epoch": 4.04927940492794, "grad_norm": 2.0580337047576904, "learning_rate": 3.599847924639803e-05, "loss": 0.1056, "step": 8710 }, { "epoch": 4.050209205020921, "grad_norm": 1.8379114866256714, "learning_rate": 3.592789416627348e-05, "loss": 0.1184, "step": 8712 }, { "epoch": 4.0511390051139005, "grad_norm": 1.5529322624206543, "learning_rate": 3.5857201232245235e-05, "loss": 0.1449, "step": 8714 }, { "epoch": 4.052068805206881, "grad_norm": 1.8456223011016846, "learning_rate": 3.5786401142024145e-05, "loss": 0.1514, "step": 8716 }, { "epoch": 4.052998605299861, "grad_norm": 2.2476770877838135, "learning_rate": 3.571549459437838e-05, "loss": 0.1276, "step": 8718 }, { "epoch": 4.05392840539284, "grad_norm": 3.092193126678467, "learning_rate": 3.564448228912695e-05, "loss": 0.1923, "step": 8720 }, { "epoch": 4.054858205485821, "grad_norm": 1.8490275144577026, "learning_rate": 3.557336492713272e-05, "loss": 0.1137, "step": 8722 }, { "epoch": 4.0557880055788, "grad_norm": 1.8929691314697266, "learning_rate": 3.55021432102953e-05, "loss": 0.1406, "step": 8724 }, { "epoch": 4.056717805671781, "grad_norm": 2.390601396560669, "learning_rate": 3.543081784154428e-05, "loss": 0.122, "step": 8726 }, { "epoch": 4.05764760576476, "grad_norm": 2.2190911769866943, "learning_rate": 3.5359389524832255e-05, "loss": 0.1541, "step": 8728 }, { "epoch": 4.058577405857741, "grad_norm": 1.555443286895752, "learning_rate": 3.528785896512787e-05, "loss": 0.1324, "step": 8730 }, { "epoch": 4.0595072059507205, "grad_norm": 1.5929981470108032, "learning_rate": 3.521622686840888e-05, "loss": 0.1231, "step": 8732 }, { "epoch": 4.060437006043701, "grad_norm": 2.279627561569214, "learning_rate": 3.5144493941655116e-05, "loss": 0.1403, "step": 8734 }, { "epoch": 4.0613668061366806, "grad_norm": 2.0746614933013916, "learning_rate": 3.507266089284168e-05, "loss": 0.156, "step": 8736 }, { "epoch": 4.062296606229661, "grad_norm": 2.6366467475891113, "learning_rate": 3.5000728430931775e-05, "loss": 0.1277, "step": 8738 }, { "epoch": 4.063226406322641, "grad_norm": 1.5773578882217407, "learning_rate": 3.492869726586967e-05, "loss": 0.1779, "step": 8740 }, { "epoch": 4.06415620641562, "grad_norm": 1.8989284038543701, "learning_rate": 3.4856568108573896e-05, "loss": 0.1498, "step": 8742 }, { "epoch": 4.065086006508601, "grad_norm": 2.547520399093628, "learning_rate": 3.478434167093018e-05, "loss": 0.1823, "step": 8744 }, { "epoch": 4.06601580660158, "grad_norm": 2.269199848175049, "learning_rate": 3.471201866578428e-05, "loss": 0.1278, "step": 8746 }, { "epoch": 4.066945606694561, "grad_norm": 1.3337143659591675, "learning_rate": 3.4639599806935e-05, "loss": 0.0956, "step": 8748 }, { "epoch": 4.0678754067875404, "grad_norm": 2.4945218563079834, "learning_rate": 3.456708580912737e-05, "loss": 0.1538, "step": 8750 }, { "epoch": 4.068805206880521, "grad_norm": 1.6370385885238647, "learning_rate": 3.4494477388045157e-05, "loss": 0.1595, "step": 8752 }, { "epoch": 4.0697350069735005, "grad_norm": 1.938462734222412, "learning_rate": 3.4421775260304156e-05, "loss": 0.1489, "step": 8754 }, { "epoch": 4.070664807066481, "grad_norm": 2.3068690299987793, "learning_rate": 3.43489801434451e-05, "loss": 0.1305, "step": 8756 }, { "epoch": 4.071594607159461, "grad_norm": 2.1083457469940186, "learning_rate": 3.427609275592637e-05, "loss": 0.1467, "step": 8758 }, { "epoch": 4.072524407252441, "grad_norm": 2.0113160610198975, "learning_rate": 3.4203113817117085e-05, "loss": 0.1632, "step": 8760 }, { "epoch": 4.073454207345421, "grad_norm": 1.688586711883545, "learning_rate": 3.4130044047289796e-05, "loss": 0.1383, "step": 8762 }, { "epoch": 4.0743840074384, "grad_norm": 1.7786203622817993, "learning_rate": 3.4056884167613745e-05, "loss": 0.1395, "step": 8764 }, { "epoch": 4.075313807531381, "grad_norm": 2.422356128692627, "learning_rate": 3.398363490014738e-05, "loss": 0.1587, "step": 8766 }, { "epoch": 4.07624360762436, "grad_norm": 1.644031286239624, "learning_rate": 3.391029696783135e-05, "loss": 0.1349, "step": 8768 }, { "epoch": 4.077173407717341, "grad_norm": 2.687730550765991, "learning_rate": 3.383687109448151e-05, "loss": 0.1754, "step": 8770 }, { "epoch": 4.0781032078103205, "grad_norm": 1.9313055276870728, "learning_rate": 3.3763358004781604e-05, "loss": 0.1219, "step": 8772 }, { "epoch": 4.079033007903301, "grad_norm": 2.4294803142547607, "learning_rate": 3.368975842427606e-05, "loss": 0.1235, "step": 8774 }, { "epoch": 4.079962807996281, "grad_norm": 1.5048147439956665, "learning_rate": 3.361607307936303e-05, "loss": 0.1194, "step": 8776 }, { "epoch": 4.080892608089261, "grad_norm": 2.0906360149383545, "learning_rate": 3.354230269728719e-05, "loss": 0.1804, "step": 8778 }, { "epoch": 4.081822408182241, "grad_norm": 2.156257390975952, "learning_rate": 3.3468448006132394e-05, "loss": 0.0943, "step": 8780 }, { "epoch": 4.082752208275221, "grad_norm": 2.15158748626709, "learning_rate": 3.3394509734814626e-05, "loss": 0.1299, "step": 8782 }, { "epoch": 4.083682008368201, "grad_norm": 1.9619295597076416, "learning_rate": 3.3320488613074784e-05, "loss": 0.1394, "step": 8784 }, { "epoch": 4.08461180846118, "grad_norm": 1.9154822826385498, "learning_rate": 3.324638537147144e-05, "loss": 0.092, "step": 8786 }, { "epoch": 4.085541608554161, "grad_norm": 1.8930957317352295, "learning_rate": 3.317220074137369e-05, "loss": 0.133, "step": 8788 }, { "epoch": 4.0864714086471405, "grad_norm": 2.221050262451172, "learning_rate": 3.3097935454953826e-05, "loss": 0.1296, "step": 8790 }, { "epoch": 4.087401208740121, "grad_norm": 2.456143379211426, "learning_rate": 3.3023590245180333e-05, "loss": 0.1123, "step": 8792 }, { "epoch": 4.088331008833101, "grad_norm": 2.5530149936676025, "learning_rate": 3.294916584581041e-05, "loss": 0.1469, "step": 8794 }, { "epoch": 4.089260808926081, "grad_norm": 1.6311389207839966, "learning_rate": 3.287466299138272e-05, "loss": 0.1611, "step": 8796 }, { "epoch": 4.090190609019061, "grad_norm": 1.936434030532837, "learning_rate": 3.280008241721047e-05, "loss": 0.1397, "step": 8798 }, { "epoch": 4.091120409112041, "grad_norm": 2.2347521781921387, "learning_rate": 3.272542485937379e-05, "loss": 0.1427, "step": 8800 }, { "epoch": 4.092050209205021, "grad_norm": 2.0134694576263428, "learning_rate": 3.2650691054712587e-05, "loss": 0.0997, "step": 8802 }, { "epoch": 4.092980009298001, "grad_norm": 2.1216628551483154, "learning_rate": 3.257588174081947e-05, "loss": 0.1586, "step": 8804 }, { "epoch": 4.093909809390981, "grad_norm": 2.8288023471832275, "learning_rate": 3.250099765603203e-05, "loss": 0.1518, "step": 8806 }, { "epoch": 4.0948396094839605, "grad_norm": 2.063258171081543, "learning_rate": 3.2426039539426e-05, "loss": 0.1092, "step": 8808 }, { "epoch": 4.095769409576941, "grad_norm": 2.967385768890381, "learning_rate": 3.235100813080768e-05, "loss": 0.1618, "step": 8810 }, { "epoch": 4.096699209669921, "grad_norm": 2.656219244003296, "learning_rate": 3.2275904170706885e-05, "loss": 0.1434, "step": 8812 }, { "epoch": 4.097629009762901, "grad_norm": 2.5165443420410156, "learning_rate": 3.220072840036932e-05, "loss": 0.1651, "step": 8814 }, { "epoch": 4.098558809855881, "grad_norm": 2.475511312484741, "learning_rate": 3.2125481561749504e-05, "loss": 0.2165, "step": 8816 }, { "epoch": 4.099488609948861, "grad_norm": 1.89553964138031, "learning_rate": 3.205016439750334e-05, "loss": 0.1241, "step": 8818 }, { "epoch": 4.100418410041841, "grad_norm": 2.4525146484375, "learning_rate": 3.197477765098084e-05, "loss": 0.1692, "step": 8820 }, { "epoch": 4.101348210134821, "grad_norm": 2.1613121032714844, "learning_rate": 3.189932206621876e-05, "loss": 0.1486, "step": 8822 }, { "epoch": 4.102278010227801, "grad_norm": 2.1937756538391113, "learning_rate": 3.182379838793321e-05, "loss": 0.1702, "step": 8824 }, { "epoch": 4.103207810320781, "grad_norm": 1.8967865705490112, "learning_rate": 3.174820736151253e-05, "loss": 0.1059, "step": 8826 }, { "epoch": 4.104137610413761, "grad_norm": 1.6937848329544067, "learning_rate": 3.1672549733009525e-05, "loss": 0.1386, "step": 8828 }, { "epoch": 4.105067410506741, "grad_norm": 1.9536058902740479, "learning_rate": 3.159682624913442e-05, "loss": 0.1188, "step": 8830 }, { "epoch": 4.105997210599721, "grad_norm": 1.8619335889816284, "learning_rate": 3.1521037657247524e-05, "loss": 0.1582, "step": 8832 }, { "epoch": 4.106927010692701, "grad_norm": 1.5240894556045532, "learning_rate": 3.14451847053516e-05, "loss": 0.127, "step": 8834 }, { "epoch": 4.107856810785681, "grad_norm": 1.7582343816757202, "learning_rate": 3.1369268142084665e-05, "loss": 0.1019, "step": 8836 }, { "epoch": 4.108786610878661, "grad_norm": 1.510502815246582, "learning_rate": 3.129328871671254e-05, "loss": 0.1502, "step": 8838 }, { "epoch": 4.109716410971641, "grad_norm": 1.6446545124053955, "learning_rate": 3.1217247179121487e-05, "loss": 0.1207, "step": 8840 }, { "epoch": 4.110646211064621, "grad_norm": 2.384709358215332, "learning_rate": 3.114114427981078e-05, "loss": 0.1354, "step": 8842 }, { "epoch": 4.111576011157601, "grad_norm": 2.4219512939453125, "learning_rate": 3.106498076988527e-05, "loss": 0.1676, "step": 8844 }, { "epoch": 4.112505811250581, "grad_norm": 1.8479502201080322, "learning_rate": 3.098875740104813e-05, "loss": 0.1179, "step": 8846 }, { "epoch": 4.1134356113435615, "grad_norm": 2.848133087158203, "learning_rate": 3.091247492559325e-05, "loss": 0.1622, "step": 8848 }, { "epoch": 4.114365411436541, "grad_norm": 1.8074356317520142, "learning_rate": 3.083613409639773e-05, "loss": 0.1257, "step": 8850 }, { "epoch": 4.115295211529521, "grad_norm": 1.6198174953460693, "learning_rate": 3.0759735666914866e-05, "loss": 0.1072, "step": 8852 }, { "epoch": 4.116225011622501, "grad_norm": 2.417107105255127, "learning_rate": 3.068328039116626e-05, "loss": 0.1073, "step": 8854 }, { "epoch": 4.117154811715481, "grad_norm": 1.6634008884429932, "learning_rate": 3.060676902373463e-05, "loss": 0.1336, "step": 8856 }, { "epoch": 4.118084611808461, "grad_norm": 2.2551348209381104, "learning_rate": 3.053020231975624e-05, "loss": 0.1501, "step": 8858 }, { "epoch": 4.119014411901441, "grad_norm": 2.3268134593963623, "learning_rate": 3.0453581034913672e-05, "loss": 0.1233, "step": 8860 }, { "epoch": 4.119944211994421, "grad_norm": 1.9784228801727295, "learning_rate": 3.0376905925427948e-05, "loss": 0.1517, "step": 8862 }, { "epoch": 4.120874012087401, "grad_norm": 2.4048309326171875, "learning_rate": 3.030017774805144e-05, "loss": 0.1039, "step": 8864 }, { "epoch": 4.1218038121803815, "grad_norm": 2.5714621543884277, "learning_rate": 3.0223397260060373e-05, "loss": 0.1284, "step": 8866 }, { "epoch": 4.122733612273361, "grad_norm": 2.1544840335845947, "learning_rate": 3.0146565219247117e-05, "loss": 0.1832, "step": 8868 }, { "epoch": 4.123663412366342, "grad_norm": 2.6379928588867188, "learning_rate": 3.0069682383912942e-05, "loss": 0.1926, "step": 8870 }, { "epoch": 4.124593212459321, "grad_norm": 2.4789328575134277, "learning_rate": 2.9992749512860264e-05, "loss": 0.1591, "step": 8872 }, { "epoch": 4.125523012552302, "grad_norm": 2.0738353729248047, "learning_rate": 2.991576736538559e-05, "loss": 0.098, "step": 8874 }, { "epoch": 4.126452812645281, "grad_norm": 2.105306625366211, "learning_rate": 2.9838736701271606e-05, "loss": 0.1113, "step": 8876 }, { "epoch": 4.127382612738261, "grad_norm": 2.560331344604492, "learning_rate": 2.9761658280779803e-05, "loss": 0.1812, "step": 8878 }, { "epoch": 4.128312412831241, "grad_norm": 2.283006429672241, "learning_rate": 2.9684532864643177e-05, "loss": 0.1904, "step": 8880 }, { "epoch": 4.129242212924221, "grad_norm": 1.7949907779693604, "learning_rate": 2.9607361214058443e-05, "loss": 0.0721, "step": 8882 }, { "epoch": 4.1301720130172015, "grad_norm": 2.1159026622772217, "learning_rate": 2.9530144090678547e-05, "loss": 0.1397, "step": 8884 }, { "epoch": 4.131101813110181, "grad_norm": 1.6544599533081055, "learning_rate": 2.945288225660533e-05, "loss": 0.1408, "step": 8886 }, { "epoch": 4.132031613203162, "grad_norm": 1.6735193729400635, "learning_rate": 2.9375576474381983e-05, "loss": 0.1457, "step": 8888 }, { "epoch": 4.132961413296141, "grad_norm": 1.8779798746109009, "learning_rate": 2.9298227506985327e-05, "loss": 0.1454, "step": 8890 }, { "epoch": 4.133891213389122, "grad_norm": 3.3482556343078613, "learning_rate": 2.9220836117818436e-05, "loss": 0.1081, "step": 8892 }, { "epoch": 4.134821013482101, "grad_norm": 2.484726905822754, "learning_rate": 2.9143403070703085e-05, "loss": 0.1222, "step": 8894 }, { "epoch": 4.135750813575082, "grad_norm": 2.0185203552246094, "learning_rate": 2.9065929129872192e-05, "loss": 0.0993, "step": 8896 }, { "epoch": 4.136680613668061, "grad_norm": 2.0175647735595703, "learning_rate": 2.8988415059962228e-05, "loss": 0.1281, "step": 8898 }, { "epoch": 4.137610413761041, "grad_norm": 1.6825306415557861, "learning_rate": 2.8910861626005844e-05, "loss": 0.1326, "step": 8900 }, { "epoch": 4.1385402138540215, "grad_norm": 1.9804850816726685, "learning_rate": 2.883326959342409e-05, "loss": 0.1134, "step": 8902 }, { "epoch": 4.139470013947001, "grad_norm": 1.9145228862762451, "learning_rate": 2.8755639728019048e-05, "loss": 0.1546, "step": 8904 }, { "epoch": 4.140399814039982, "grad_norm": 1.8898780345916748, "learning_rate": 2.8677972795966014e-05, "loss": 0.1331, "step": 8906 }, { "epoch": 4.141329614132961, "grad_norm": 1.8189023733139038, "learning_rate": 2.8600269563806387e-05, "loss": 0.1074, "step": 8908 }, { "epoch": 4.142259414225942, "grad_norm": 2.076430320739746, "learning_rate": 2.8522530798439655e-05, "loss": 0.1649, "step": 8910 }, { "epoch": 4.143189214318921, "grad_norm": 1.7413972616195679, "learning_rate": 2.8444757267116e-05, "loss": 0.1174, "step": 8912 }, { "epoch": 4.144119014411902, "grad_norm": 2.089752674102783, "learning_rate": 2.8366949737428912e-05, "loss": 0.2174, "step": 8914 }, { "epoch": 4.145048814504881, "grad_norm": 2.49438214302063, "learning_rate": 2.828910897730716e-05, "loss": 0.1423, "step": 8916 }, { "epoch": 4.145978614597862, "grad_norm": 1.60934317111969, "learning_rate": 2.8211235755007674e-05, "loss": 0.1051, "step": 8918 }, { "epoch": 4.1469084146908415, "grad_norm": 2.1585750579833984, "learning_rate": 2.8133330839107666e-05, "loss": 0.1509, "step": 8920 }, { "epoch": 4.147838214783821, "grad_norm": 1.875830888748169, "learning_rate": 2.8055394998497298e-05, "loss": 0.1176, "step": 8922 }, { "epoch": 4.148768014876802, "grad_norm": 2.6295783519744873, "learning_rate": 2.797742900237181e-05, "loss": 0.1059, "step": 8924 }, { "epoch": 4.149697814969781, "grad_norm": 2.920438289642334, "learning_rate": 2.7899433620224107e-05, "loss": 0.1432, "step": 8926 }, { "epoch": 4.150627615062762, "grad_norm": 0.9276366233825684, "learning_rate": 2.7821409621837118e-05, "loss": 0.0504, "step": 8928 }, { "epoch": 4.151557415155741, "grad_norm": 1.5957540273666382, "learning_rate": 2.774335777727621e-05, "loss": 0.1209, "step": 8930 }, { "epoch": 4.152487215248722, "grad_norm": 3.6980583667755127, "learning_rate": 2.7665278856881586e-05, "loss": 0.1746, "step": 8932 }, { "epoch": 4.153417015341701, "grad_norm": 1.8670623302459717, "learning_rate": 2.7587173631260606e-05, "loss": 0.12, "step": 8934 }, { "epoch": 4.154346815434682, "grad_norm": 1.6885950565338135, "learning_rate": 2.7509042871280467e-05, "loss": 0.1382, "step": 8936 }, { "epoch": 4.1552766155276615, "grad_norm": 1.9123601913452148, "learning_rate": 2.743088734806008e-05, "loss": 0.159, "step": 8938 }, { "epoch": 4.156206415620642, "grad_norm": 1.8196223974227905, "learning_rate": 2.735270783296292e-05, "loss": 0.1292, "step": 8940 }, { "epoch": 4.157136215713622, "grad_norm": 1.4017826318740845, "learning_rate": 2.7274505097589314e-05, "loss": 0.0985, "step": 8942 }, { "epoch": 4.158066015806601, "grad_norm": 1.4768779277801514, "learning_rate": 2.719627991376865e-05, "loss": 0.0955, "step": 8944 }, { "epoch": 4.158995815899582, "grad_norm": 1.5395301580429077, "learning_rate": 2.7118033053551863e-05, "loss": 0.142, "step": 8946 }, { "epoch": 4.159925615992561, "grad_norm": 1.759901762008667, "learning_rate": 2.703976528920402e-05, "loss": 0.1526, "step": 8948 }, { "epoch": 4.160855416085542, "grad_norm": 2.6469855308532715, "learning_rate": 2.696147739319621e-05, "loss": 0.1522, "step": 8950 }, { "epoch": 4.161785216178521, "grad_norm": 1.8408637046813965, "learning_rate": 2.6883170138198408e-05, "loss": 0.117, "step": 8952 }, { "epoch": 4.162715016271502, "grad_norm": 1.4117122888565063, "learning_rate": 2.6804844297071563e-05, "loss": 0.1211, "step": 8954 }, { "epoch": 4.1636448163644815, "grad_norm": 1.5954604148864746, "learning_rate": 2.6726500642860198e-05, "loss": 0.1226, "step": 8956 }, { "epoch": 4.164574616457462, "grad_norm": 1.5614900588989258, "learning_rate": 2.6648139948784545e-05, "loss": 0.1274, "step": 8958 }, { "epoch": 4.165504416550442, "grad_norm": 1.9190250635147095, "learning_rate": 2.6569762988232896e-05, "loss": 0.1147, "step": 8960 }, { "epoch": 4.166434216643422, "grad_norm": 2.0189318656921387, "learning_rate": 2.649137053475434e-05, "loss": 0.1377, "step": 8962 }, { "epoch": 4.167364016736402, "grad_norm": 1.2434581518173218, "learning_rate": 2.641296336205069e-05, "loss": 0.0803, "step": 8964 }, { "epoch": 4.168293816829381, "grad_norm": 1.9629489183425903, "learning_rate": 2.6334542243969092e-05, "loss": 0.1838, "step": 8966 }, { "epoch": 4.169223616922362, "grad_norm": 2.005875825881958, "learning_rate": 2.6256107954494276e-05, "loss": 0.1368, "step": 8968 }, { "epoch": 4.170153417015341, "grad_norm": 1.9025599956512451, "learning_rate": 2.6177661267741157e-05, "loss": 0.1036, "step": 8970 }, { "epoch": 4.171083217108322, "grad_norm": 1.5280488729476929, "learning_rate": 2.6099202957946716e-05, "loss": 0.0979, "step": 8972 }, { "epoch": 4.1720130172013015, "grad_norm": 1.6347898244857788, "learning_rate": 2.6020733799462804e-05, "loss": 0.0783, "step": 8974 }, { "epoch": 4.172942817294282, "grad_norm": 2.2607648372650146, "learning_rate": 2.594225456674843e-05, "loss": 0.1324, "step": 8976 }, { "epoch": 4.173872617387262, "grad_norm": 1.637965440750122, "learning_rate": 2.5863766034361875e-05, "loss": 0.0971, "step": 8978 }, { "epoch": 4.174802417480242, "grad_norm": 2.5933620929718018, "learning_rate": 2.5785268976953325e-05, "loss": 0.1511, "step": 8980 }, { "epoch": 4.175732217573222, "grad_norm": 2.543494939804077, "learning_rate": 2.570676416925691e-05, "loss": 0.1448, "step": 8982 }, { "epoch": 4.176662017666202, "grad_norm": 2.237884759902954, "learning_rate": 2.562825238608352e-05, "loss": 0.1358, "step": 8984 }, { "epoch": 4.177591817759182, "grad_norm": 1.4922295808792114, "learning_rate": 2.5549734402312718e-05, "loss": 0.136, "step": 8986 }, { "epoch": 4.178521617852161, "grad_norm": 1.5659202337265015, "learning_rate": 2.5471210992885256e-05, "loss": 0.1007, "step": 8988 }, { "epoch": 4.179451417945142, "grad_norm": 1.44708251953125, "learning_rate": 2.539268293279557e-05, "loss": 0.1075, "step": 8990 }, { "epoch": 4.1803812180381215, "grad_norm": 2.072784423828125, "learning_rate": 2.5314150997083918e-05, "loss": 0.1331, "step": 8992 }, { "epoch": 4.181311018131102, "grad_norm": 2.3211703300476074, "learning_rate": 2.523561596082866e-05, "loss": 0.1579, "step": 8994 }, { "epoch": 4.182240818224082, "grad_norm": 2.045109748840332, "learning_rate": 2.5157078599139038e-05, "loss": 0.1111, "step": 8996 }, { "epoch": 4.183170618317062, "grad_norm": 1.986261248588562, "learning_rate": 2.507853968714706e-05, "loss": 0.1215, "step": 8998 }, { "epoch": 4.184100418410042, "grad_norm": 1.5461784601211548, "learning_rate": 2.500000000000007e-05, "loss": 0.0868, "step": 9000 }, { "epoch": 4.184100418410042, "eval_cer": 0.18795257930150594, "eval_loss": 0.27691519260406494, "eval_runtime": 397.4885, "eval_samples_per_second": 31.936, "eval_steps_per_second": 0.999, "step": 9000 }, { "epoch": 4.185030218503022, "grad_norm": 1.826816201210022, "learning_rate": 2.492146031285309e-05, "loss": 0.1526, "step": 9002 }, { "epoch": 4.185960018596002, "grad_norm": 1.614965558052063, "learning_rate": 2.484292140086111e-05, "loss": 0.1281, "step": 9004 }, { "epoch": 4.186889818688982, "grad_norm": 1.8579047918319702, "learning_rate": 2.476438403917149e-05, "loss": 0.12, "step": 9006 }, { "epoch": 4.187819618781962, "grad_norm": 1.8857017755508423, "learning_rate": 2.4685849002916234e-05, "loss": 0.1021, "step": 9008 }, { "epoch": 4.1887494188749415, "grad_norm": 2.038986921310425, "learning_rate": 2.4607317067204533e-05, "loss": 0.167, "step": 9010 }, { "epoch": 4.189679218967922, "grad_norm": 1.6913840770721436, "learning_rate": 2.4528789007114848e-05, "loss": 0.1287, "step": 9012 }, { "epoch": 4.1906090190609016, "grad_norm": 1.774849772453308, "learning_rate": 2.4450265597687474e-05, "loss": 0.0716, "step": 9014 }, { "epoch": 4.191538819153882, "grad_norm": 1.8138517141342163, "learning_rate": 2.4371747613916627e-05, "loss": 0.143, "step": 9016 }, { "epoch": 4.192468619246862, "grad_norm": 2.0451653003692627, "learning_rate": 2.429323583074324e-05, "loss": 0.0767, "step": 9018 }, { "epoch": 4.193398419339842, "grad_norm": 1.904071569442749, "learning_rate": 2.421473102304687e-05, "loss": 0.0641, "step": 9020 }, { "epoch": 4.194328219432822, "grad_norm": 2.202256679534912, "learning_rate": 2.413623396563823e-05, "loss": 0.1356, "step": 9022 }, { "epoch": 4.195258019525802, "grad_norm": 2.3973138332366943, "learning_rate": 2.4057745433251723e-05, "loss": 0.1201, "step": 9024 }, { "epoch": 4.196187819618782, "grad_norm": 2.2005679607391357, "learning_rate": 2.397926620053734e-05, "loss": 0.1004, "step": 9026 }, { "epoch": 4.197117619711762, "grad_norm": 1.9113932847976685, "learning_rate": 2.390079704205347e-05, "loss": 0.129, "step": 9028 }, { "epoch": 4.198047419804742, "grad_norm": 1.010654330253601, "learning_rate": 2.3822338732258988e-05, "loss": 0.0751, "step": 9030 }, { "epoch": 4.198977219897722, "grad_norm": 1.4380295276641846, "learning_rate": 2.374389204550582e-05, "loss": 0.096, "step": 9032 }, { "epoch": 4.199907019990702, "grad_norm": 2.3662219047546387, "learning_rate": 2.3665457756031053e-05, "loss": 0.0593, "step": 9034 }, { "epoch": 4.200836820083682, "grad_norm": 1.4566357135772705, "learning_rate": 2.3587036637949456e-05, "loss": 0.1169, "step": 9036 }, { "epoch": 4.201766620176662, "grad_norm": 1.9287450313568115, "learning_rate": 2.350862946524581e-05, "loss": 0.1151, "step": 9038 }, { "epoch": 4.202696420269642, "grad_norm": 1.1018098592758179, "learning_rate": 2.343023701176725e-05, "loss": 0.1073, "step": 9040 }, { "epoch": 4.203626220362622, "grad_norm": 2.0697593688964844, "learning_rate": 2.33518600512156e-05, "loss": 0.0983, "step": 9042 }, { "epoch": 4.204556020455602, "grad_norm": 1.4017467498779297, "learning_rate": 2.3273499357139903e-05, "loss": 0.1003, "step": 9044 }, { "epoch": 4.205485820548582, "grad_norm": 2.8266000747680664, "learning_rate": 2.3195155702928582e-05, "loss": 0.1623, "step": 9046 }, { "epoch": 4.206415620641562, "grad_norm": 1.441250205039978, "learning_rate": 2.3116829861801785e-05, "loss": 0.1079, "step": 9048 }, { "epoch": 4.207345420734542, "grad_norm": 1.4605463743209839, "learning_rate": 2.303852260680394e-05, "loss": 0.1117, "step": 9050 }, { "epoch": 4.208275220827522, "grad_norm": 2.1141107082366943, "learning_rate": 2.2960234710796124e-05, "loss": 0.1452, "step": 9052 }, { "epoch": 4.209205020920502, "grad_norm": 1.8993206024169922, "learning_rate": 2.288196694644823e-05, "loss": 0.1417, "step": 9054 }, { "epoch": 4.210134821013482, "grad_norm": 1.693664789199829, "learning_rate": 2.2803720086231445e-05, "loss": 0.1181, "step": 9056 }, { "epoch": 4.211064621106462, "grad_norm": 1.3553472757339478, "learning_rate": 2.2725494902410828e-05, "loss": 0.0905, "step": 9058 }, { "epoch": 4.211994421199442, "grad_norm": 1.552571177482605, "learning_rate": 2.2647292167037222e-05, "loss": 0.1533, "step": 9060 }, { "epoch": 4.212924221292422, "grad_norm": 1.6068847179412842, "learning_rate": 2.25691126519401e-05, "loss": 0.1078, "step": 9062 }, { "epoch": 4.213854021385402, "grad_norm": 1.8110007047653198, "learning_rate": 2.2490957128719675e-05, "loss": 0.097, "step": 9064 }, { "epoch": 4.214783821478382, "grad_norm": 1.3972396850585938, "learning_rate": 2.2412826368739488e-05, "loss": 0.0943, "step": 9066 }, { "epoch": 4.215713621571362, "grad_norm": 1.7206701040267944, "learning_rate": 2.2334721143118602e-05, "loss": 0.1164, "step": 9068 }, { "epoch": 4.216643421664342, "grad_norm": 2.180149555206299, "learning_rate": 2.225664222272393e-05, "loss": 0.094, "step": 9070 }, { "epoch": 4.2175732217573225, "grad_norm": 1.8735058307647705, "learning_rate": 2.2178590378163024e-05, "loss": 0.0971, "step": 9072 }, { "epoch": 4.218503021850302, "grad_norm": 1.5304677486419678, "learning_rate": 2.2100566379776035e-05, "loss": 0.0958, "step": 9074 }, { "epoch": 4.219432821943283, "grad_norm": 2.4124317169189453, "learning_rate": 2.202257099762833e-05, "loss": 0.1178, "step": 9076 }, { "epoch": 4.220362622036262, "grad_norm": 2.150428056716919, "learning_rate": 2.19446050015028e-05, "loss": 0.0971, "step": 9078 }, { "epoch": 4.221292422129242, "grad_norm": 1.2198940515518188, "learning_rate": 2.186666916089248e-05, "loss": 0.0919, "step": 9080 }, { "epoch": 4.222222222222222, "grad_norm": 1.7758680582046509, "learning_rate": 2.1788764244992515e-05, "loss": 0.1442, "step": 9082 }, { "epoch": 4.223152022315202, "grad_norm": 1.2506588697433472, "learning_rate": 2.1710891022692986e-05, "loss": 0.0783, "step": 9084 }, { "epoch": 4.224081822408182, "grad_norm": 1.445269227027893, "learning_rate": 2.163305026257124e-05, "loss": 0.1033, "step": 9086 }, { "epoch": 4.225011622501162, "grad_norm": 1.1328339576721191, "learning_rate": 2.155524273288411e-05, "loss": 0.105, "step": 9088 }, { "epoch": 4.2259414225941425, "grad_norm": 1.9101141691207886, "learning_rate": 2.147746920156054e-05, "loss": 0.1212, "step": 9090 }, { "epoch": 4.226871222687122, "grad_norm": 1.619890570640564, "learning_rate": 2.139973043619376e-05, "loss": 0.1064, "step": 9092 }, { "epoch": 4.227801022780103, "grad_norm": 2.582002639770508, "learning_rate": 2.1322027204034134e-05, "loss": 0.1572, "step": 9094 }, { "epoch": 4.228730822873082, "grad_norm": 1.5198643207550049, "learning_rate": 2.124436027198115e-05, "loss": 0.0781, "step": 9096 }, { "epoch": 4.229660622966063, "grad_norm": 2.4531073570251465, "learning_rate": 2.1166730406576022e-05, "loss": 0.1258, "step": 9098 }, { "epoch": 4.230590423059042, "grad_norm": 1.2722208499908447, "learning_rate": 2.1089138373994274e-05, "loss": 0.1157, "step": 9100 }, { "epoch": 4.231520223152022, "grad_norm": 1.3758631944656372, "learning_rate": 2.1011584940037937e-05, "loss": 0.0824, "step": 9102 }, { "epoch": 4.232450023245002, "grad_norm": 1.9301704168319702, "learning_rate": 2.0934070870127966e-05, "loss": 0.1196, "step": 9104 }, { "epoch": 4.233379823337982, "grad_norm": 1.9165527820587158, "learning_rate": 2.0856596929297067e-05, "loss": 0.1166, "step": 9106 }, { "epoch": 4.2343096234309625, "grad_norm": 1.8717012405395508, "learning_rate": 2.0779163882181716e-05, "loss": 0.1091, "step": 9108 }, { "epoch": 4.235239423523942, "grad_norm": 1.5037717819213867, "learning_rate": 2.0701772493014828e-05, "loss": 0.1117, "step": 9110 }, { "epoch": 4.236169223616923, "grad_norm": 1.5051053762435913, "learning_rate": 2.062442352561817e-05, "loss": 0.128, "step": 9112 }, { "epoch": 4.237099023709902, "grad_norm": 2.143270254135132, "learning_rate": 2.0547117743394822e-05, "loss": 0.1173, "step": 9114 }, { "epoch": 4.238028823802883, "grad_norm": 1.6145291328430176, "learning_rate": 2.046985590932165e-05, "loss": 0.0626, "step": 9116 }, { "epoch": 4.238958623895862, "grad_norm": 1.5166842937469482, "learning_rate": 2.0392638785941708e-05, "loss": 0.0896, "step": 9118 }, { "epoch": 4.239888423988843, "grad_norm": 1.3930467367172241, "learning_rate": 2.0315467135356927e-05, "loss": 0.0793, "step": 9120 }, { "epoch": 4.240818224081822, "grad_norm": 1.243298888206482, "learning_rate": 2.0238341719220305e-05, "loss": 0.123, "step": 9122 }, { "epoch": 4.241748024174802, "grad_norm": 1.6866384744644165, "learning_rate": 2.0161263298728597e-05, "loss": 0.1069, "step": 9124 }, { "epoch": 4.2426778242677825, "grad_norm": 1.5327588319778442, "learning_rate": 2.0084232634614564e-05, "loss": 0.1397, "step": 9126 }, { "epoch": 4.243607624360762, "grad_norm": 1.8308486938476562, "learning_rate": 2.0007250487139894e-05, "loss": 0.0886, "step": 9128 }, { "epoch": 4.244537424453743, "grad_norm": 1.7570527791976929, "learning_rate": 1.993031761608726e-05, "loss": 0.0957, "step": 9130 }, { "epoch": 4.245467224546722, "grad_norm": 2.222832679748535, "learning_rate": 1.9853434780753004e-05, "loss": 0.09, "step": 9132 }, { "epoch": 4.246397024639703, "grad_norm": 1.549939751625061, "learning_rate": 1.9776602739939792e-05, "loss": 0.0688, "step": 9134 }, { "epoch": 4.247326824732682, "grad_norm": 2.1958835124969482, "learning_rate": 1.9699822251948725e-05, "loss": 0.146, "step": 9136 }, { "epoch": 4.248256624825663, "grad_norm": 1.0548409223556519, "learning_rate": 1.9623094074572217e-05, "loss": 0.0954, "step": 9138 }, { "epoch": 4.249186424918642, "grad_norm": 2.3829832077026367, "learning_rate": 1.9546418965086493e-05, "loss": 0.2006, "step": 9140 }, { "epoch": 4.250116225011623, "grad_norm": 2.125120162963867, "learning_rate": 1.9469797680243882e-05, "loss": 0.1165, "step": 9142 }, { "epoch": 4.2510460251046025, "grad_norm": 2.297895669937134, "learning_rate": 1.9393230976265534e-05, "loss": 0.1337, "step": 9144 }, { "epoch": 4.251975825197582, "grad_norm": 1.7925649881362915, "learning_rate": 1.931671960883391e-05, "loss": 0.1326, "step": 9146 }, { "epoch": 4.252905625290563, "grad_norm": 1.262437343597412, "learning_rate": 1.924026433308531e-05, "loss": 0.0775, "step": 9148 }, { "epoch": 4.253835425383542, "grad_norm": 1.8091434240341187, "learning_rate": 1.916386590360244e-05, "loss": 0.1366, "step": 9150 }, { "epoch": 4.254765225476523, "grad_norm": 1.4964572191238403, "learning_rate": 1.908752507440692e-05, "loss": 0.1254, "step": 9152 }, { "epoch": 4.255695025569502, "grad_norm": 1.5327305793762207, "learning_rate": 1.9011242598951993e-05, "loss": 0.1209, "step": 9154 }, { "epoch": 4.256624825662483, "grad_norm": 2.361661195755005, "learning_rate": 1.89350192301149e-05, "loss": 0.159, "step": 9156 }, { "epoch": 4.257554625755462, "grad_norm": 1.7522386312484741, "learning_rate": 1.885885572018943e-05, "loss": 0.1192, "step": 9158 }, { "epoch": 4.258484425848443, "grad_norm": 1.8168787956237793, "learning_rate": 1.8782752820878682e-05, "loss": 0.1799, "step": 9160 }, { "epoch": 4.2594142259414225, "grad_norm": 1.7256462574005127, "learning_rate": 1.870671128328763e-05, "loss": 0.0943, "step": 9162 }, { "epoch": 4.260344026034403, "grad_norm": 3.1489577293395996, "learning_rate": 1.863073185791551e-05, "loss": 0.2134, "step": 9164 }, { "epoch": 4.261273826127383, "grad_norm": 1.5088729858398438, "learning_rate": 1.8554815294648534e-05, "loss": 0.0957, "step": 9166 }, { "epoch": 4.262203626220362, "grad_norm": 1.8090474605560303, "learning_rate": 1.8478962342752654e-05, "loss": 0.1161, "step": 9168 }, { "epoch": 4.263133426313343, "grad_norm": 1.8677211999893188, "learning_rate": 1.8403173750865763e-05, "loss": 0.1009, "step": 9170 }, { "epoch": 4.264063226406322, "grad_norm": 1.7862111330032349, "learning_rate": 1.83274502669907e-05, "loss": 0.1315, "step": 9172 }, { "epoch": 4.264993026499303, "grad_norm": 2.1054837703704834, "learning_rate": 1.825179263848764e-05, "loss": 0.1061, "step": 9174 }, { "epoch": 4.265922826592282, "grad_norm": 2.17394757270813, "learning_rate": 1.8176201612066926e-05, "loss": 0.1589, "step": 9176 }, { "epoch": 4.266852626685263, "grad_norm": 2.0094549655914307, "learning_rate": 1.810067793378146e-05, "loss": 0.116, "step": 9178 }, { "epoch": 4.2677824267782425, "grad_norm": 1.2706780433654785, "learning_rate": 1.802522234901933e-05, "loss": 0.0754, "step": 9180 }, { "epoch": 4.268712226871223, "grad_norm": 2.089446544647217, "learning_rate": 1.7949835602496834e-05, "loss": 0.0924, "step": 9182 }, { "epoch": 4.269642026964203, "grad_norm": 2.0565483570098877, "learning_rate": 1.787451843825067e-05, "loss": 0.126, "step": 9184 }, { "epoch": 4.270571827057183, "grad_norm": 2.38724946975708, "learning_rate": 1.779927159963081e-05, "loss": 0.0823, "step": 9186 }, { "epoch": 4.271501627150163, "grad_norm": 1.931361198425293, "learning_rate": 1.772409582929325e-05, "loss": 0.1333, "step": 9188 }, { "epoch": 4.272431427243143, "grad_norm": 1.6748217344284058, "learning_rate": 1.7648991869192493e-05, "loss": 0.108, "step": 9190 }, { "epoch": 4.273361227336123, "grad_norm": 2.183269500732422, "learning_rate": 1.7573960460574225e-05, "loss": 0.1263, "step": 9192 }, { "epoch": 4.274291027429102, "grad_norm": 2.1879522800445557, "learning_rate": 1.7499002343968155e-05, "loss": 0.1147, "step": 9194 }, { "epoch": 4.275220827522083, "grad_norm": 2.1501898765563965, "learning_rate": 1.7424118259180715e-05, "loss": 0.1325, "step": 9196 }, { "epoch": 4.2761506276150625, "grad_norm": 2.3527774810791016, "learning_rate": 1.7349308945287548e-05, "loss": 0.1708, "step": 9198 }, { "epoch": 4.277080427708043, "grad_norm": 2.0576741695404053, "learning_rate": 1.7274575140626382e-05, "loss": 0.1182, "step": 9200 }, { "epoch": 4.278010227801023, "grad_norm": 2.4407660961151123, "learning_rate": 1.7199917582789704e-05, "loss": 0.1278, "step": 9202 }, { "epoch": 4.278940027894003, "grad_norm": 1.6357687711715698, "learning_rate": 1.712533700861746e-05, "loss": 0.0893, "step": 9204 }, { "epoch": 4.279869827986983, "grad_norm": 1.942182183265686, "learning_rate": 1.705083415418981e-05, "loss": 0.0848, "step": 9206 }, { "epoch": 4.280799628079963, "grad_norm": 1.0905190706253052, "learning_rate": 1.697640975481981e-05, "loss": 0.0555, "step": 9208 }, { "epoch": 4.281729428172943, "grad_norm": 1.5050804615020752, "learning_rate": 1.6902064545046315e-05, "loss": 0.0939, "step": 9210 }, { "epoch": 4.282659228265922, "grad_norm": 1.3924570083618164, "learning_rate": 1.6827799258626537e-05, "loss": 0.0473, "step": 9212 }, { "epoch": 4.283589028358903, "grad_norm": 1.8301931619644165, "learning_rate": 1.675361462852874e-05, "loss": 0.1024, "step": 9214 }, { "epoch": 4.2845188284518825, "grad_norm": 1.8077096939086914, "learning_rate": 1.6679511386925404e-05, "loss": 0.1068, "step": 9216 }, { "epoch": 4.285448628544863, "grad_norm": 0.807613730430603, "learning_rate": 1.6605490265185556e-05, "loss": 0.0866, "step": 9218 }, { "epoch": 4.286378428637843, "grad_norm": 1.5585280656814575, "learning_rate": 1.6531551993867795e-05, "loss": 0.1478, "step": 9220 }, { "epoch": 4.287308228730823, "grad_norm": 2.0995991230010986, "learning_rate": 1.6457697302713e-05, "loss": 0.1083, "step": 9222 }, { "epoch": 4.288238028823803, "grad_norm": 1.8113811016082764, "learning_rate": 1.638392692063716e-05, "loss": 0.1064, "step": 9224 }, { "epoch": 4.289167828916783, "grad_norm": 2.693011999130249, "learning_rate": 1.6310241575724168e-05, "loss": 0.1287, "step": 9226 }, { "epoch": 4.290097629009763, "grad_norm": 1.9639601707458496, "learning_rate": 1.6236641995218585e-05, "loss": 0.0999, "step": 9228 }, { "epoch": 4.291027429102743, "grad_norm": 1.5405235290527344, "learning_rate": 1.6163128905518635e-05, "loss": 0.0832, "step": 9230 }, { "epoch": 4.291957229195723, "grad_norm": 2.151531457901001, "learning_rate": 1.6089703032168798e-05, "loss": 0.0973, "step": 9232 }, { "epoch": 4.292887029288703, "grad_norm": 2.1999807357788086, "learning_rate": 1.6016365099852844e-05, "loss": 0.1061, "step": 9234 }, { "epoch": 4.293816829381683, "grad_norm": 1.5624241828918457, "learning_rate": 1.5943115832386434e-05, "loss": 0.1151, "step": 9236 }, { "epoch": 4.294746629474663, "grad_norm": 2.0371932983398438, "learning_rate": 1.5869955952710386e-05, "loss": 0.1501, "step": 9238 }, { "epoch": 4.295676429567643, "grad_norm": 2.151686668395996, "learning_rate": 1.579688618288314e-05, "loss": 0.1177, "step": 9240 }, { "epoch": 4.296606229660623, "grad_norm": 1.549902081489563, "learning_rate": 1.5723907244073773e-05, "loss": 0.1075, "step": 9242 }, { "epoch": 4.297536029753603, "grad_norm": 1.880340814590454, "learning_rate": 1.5651019856555087e-05, "loss": 0.0992, "step": 9244 }, { "epoch": 4.298465829846583, "grad_norm": 1.542337417602539, "learning_rate": 1.5578224739696033e-05, "loss": 0.1035, "step": 9246 }, { "epoch": 4.299395629939563, "grad_norm": 1.033549189567566, "learning_rate": 1.5505522611955036e-05, "loss": 0.1034, "step": 9248 }, { "epoch": 4.300325430032543, "grad_norm": 1.0696747303009033, "learning_rate": 1.5432914190872828e-05, "loss": 0.0788, "step": 9250 }, { "epoch": 4.301255230125523, "grad_norm": 2.4763991832733154, "learning_rate": 1.536040019306516e-05, "loss": 0.145, "step": 9252 }, { "epoch": 4.302185030218503, "grad_norm": 2.093979597091675, "learning_rate": 1.528798133421593e-05, "loss": 0.113, "step": 9254 }, { "epoch": 4.303114830311483, "grad_norm": 1.5071817636489868, "learning_rate": 1.5215658329070024e-05, "loss": 0.104, "step": 9256 }, { "epoch": 4.304044630404463, "grad_norm": 1.7571791410446167, "learning_rate": 1.5143431891426313e-05, "loss": 0.1101, "step": 9258 }, { "epoch": 4.304974430497443, "grad_norm": 1.3268654346466064, "learning_rate": 1.5071302734130577e-05, "loss": 0.1297, "step": 9260 }, { "epoch": 4.305904230590423, "grad_norm": 1.7986915111541748, "learning_rate": 1.4999271569068436e-05, "loss": 0.1383, "step": 9262 }, { "epoch": 4.306834030683403, "grad_norm": 1.189923644065857, "learning_rate": 1.4927339107158492e-05, "loss": 0.0653, "step": 9264 }, { "epoch": 4.307763830776383, "grad_norm": 2.009671926498413, "learning_rate": 1.4855506058345105e-05, "loss": 0.1369, "step": 9266 }, { "epoch": 4.308693630869363, "grad_norm": 1.538968801498413, "learning_rate": 1.4783773131591381e-05, "loss": 0.1126, "step": 9268 }, { "epoch": 4.309623430962343, "grad_norm": 2.4219391345977783, "learning_rate": 1.4712141034872355e-05, "loss": 0.1429, "step": 9270 }, { "epoch": 4.310553231055323, "grad_norm": 1.6105774641036987, "learning_rate": 1.4640610475167974e-05, "loss": 0.1005, "step": 9272 }, { "epoch": 4.3114830311483034, "grad_norm": 1.4383113384246826, "learning_rate": 1.456918215845595e-05, "loss": 0.1327, "step": 9274 }, { "epoch": 4.312412831241283, "grad_norm": 1.5598093271255493, "learning_rate": 1.4497856789704887e-05, "loss": 0.1221, "step": 9276 }, { "epoch": 4.3133426313342635, "grad_norm": 1.882737636566162, "learning_rate": 1.442663507286751e-05, "loss": 0.153, "step": 9278 }, { "epoch": 4.314272431427243, "grad_norm": 1.440523624420166, "learning_rate": 1.4355517710873277e-05, "loss": 0.0694, "step": 9280 }, { "epoch": 4.315202231520223, "grad_norm": 1.9846402406692505, "learning_rate": 1.428450540562185e-05, "loss": 0.1314, "step": 9282 }, { "epoch": 4.316132031613203, "grad_norm": 1.6586366891860962, "learning_rate": 1.421359885797609e-05, "loss": 0.0861, "step": 9284 }, { "epoch": 4.317061831706183, "grad_norm": 2.647674798965454, "learning_rate": 1.4142798767754953e-05, "loss": 0.1229, "step": 9286 }, { "epoch": 4.317991631799163, "grad_norm": 1.8733240365982056, "learning_rate": 1.4072105833726787e-05, "loss": 0.1168, "step": 9288 }, { "epoch": 4.318921431892143, "grad_norm": 2.0952529907226562, "learning_rate": 1.4001520753602195e-05, "loss": 0.1137, "step": 9290 }, { "epoch": 4.319851231985123, "grad_norm": 1.7138901948928833, "learning_rate": 1.3931044224027543e-05, "loss": 0.0968, "step": 9292 }, { "epoch": 4.320781032078103, "grad_norm": 1.1683721542358398, "learning_rate": 1.3860676940577677e-05, "loss": 0.0794, "step": 9294 }, { "epoch": 4.3217108321710835, "grad_norm": 1.9927749633789062, "learning_rate": 1.379041959774924e-05, "loss": 0.0974, "step": 9296 }, { "epoch": 4.322640632264063, "grad_norm": 1.3038296699523926, "learning_rate": 1.3720272888953922e-05, "loss": 0.0793, "step": 9298 }, { "epoch": 4.323570432357044, "grad_norm": 1.0347528457641602, "learning_rate": 1.3650237506511426e-05, "loss": 0.1038, "step": 9300 }, { "epoch": 4.324500232450023, "grad_norm": 1.5061438083648682, "learning_rate": 1.358031414164261e-05, "loss": 0.0946, "step": 9302 }, { "epoch": 4.325430032543003, "grad_norm": 1.4742721319198608, "learning_rate": 1.3510503484462866e-05, "loss": 0.1117, "step": 9304 }, { "epoch": 4.326359832635983, "grad_norm": 1.8256251811981201, "learning_rate": 1.344080622397527e-05, "loss": 0.113, "step": 9306 }, { "epoch": 4.327289632728963, "grad_norm": 1.255029320716858, "learning_rate": 1.3371223048063612e-05, "loss": 0.104, "step": 9308 }, { "epoch": 4.328219432821943, "grad_norm": 1.9155360460281372, "learning_rate": 1.3301754643485739e-05, "loss": 0.0801, "step": 9310 }, { "epoch": 4.329149232914923, "grad_norm": 1.9376918077468872, "learning_rate": 1.3232401695866763e-05, "loss": 0.079, "step": 9312 }, { "epoch": 4.3300790330079035, "grad_norm": 1.0970414876937866, "learning_rate": 1.3163164889692277e-05, "loss": 0.0934, "step": 9314 }, { "epoch": 4.331008833100883, "grad_norm": 1.9411462545394897, "learning_rate": 1.3094044908301607e-05, "loss": 0.1521, "step": 9316 }, { "epoch": 4.331938633193864, "grad_norm": 1.6062793731689453, "learning_rate": 1.3025042433881024e-05, "loss": 0.064, "step": 9318 }, { "epoch": 4.332868433286843, "grad_norm": 1.7424527406692505, "learning_rate": 1.2956158147457169e-05, "loss": 0.1213, "step": 9320 }, { "epoch": 4.333798233379824, "grad_norm": 1.9697941541671753, "learning_rate": 1.2887392728890143e-05, "loss": 0.1092, "step": 9322 }, { "epoch": 4.334728033472803, "grad_norm": 1.4972898960113525, "learning_rate": 1.281874685686675e-05, "loss": 0.1176, "step": 9324 }, { "epoch": 4.335657833565783, "grad_norm": 1.1110836267471313, "learning_rate": 1.2750221208894148e-05, "loss": 0.0902, "step": 9326 }, { "epoch": 4.336587633658763, "grad_norm": 1.565492033958435, "learning_rate": 1.2681816461292776e-05, "loss": 0.0856, "step": 9328 }, { "epoch": 4.337517433751743, "grad_norm": 1.55863356590271, "learning_rate": 1.261353328918984e-05, "loss": 0.0888, "step": 9330 }, { "epoch": 4.3384472338447235, "grad_norm": 2.3652455806732178, "learning_rate": 1.2545372366512801e-05, "loss": 0.0685, "step": 9332 }, { "epoch": 4.339377033937703, "grad_norm": 1.6623005867004395, "learning_rate": 1.2477334365982326e-05, "loss": 0.1035, "step": 9334 }, { "epoch": 4.340306834030684, "grad_norm": 2.1621856689453125, "learning_rate": 1.2409419959106059e-05, "loss": 0.0955, "step": 9336 }, { "epoch": 4.341236634123663, "grad_norm": 1.2488023042678833, "learning_rate": 1.234162981617173e-05, "loss": 0.0807, "step": 9338 }, { "epoch": 4.342166434216644, "grad_norm": 1.242182970046997, "learning_rate": 1.2273964606240763e-05, "loss": 0.099, "step": 9340 }, { "epoch": 4.343096234309623, "grad_norm": 1.5674885511398315, "learning_rate": 1.2206424997141429e-05, "loss": 0.1091, "step": 9342 }, { "epoch": 4.344026034402604, "grad_norm": 0.960444986820221, "learning_rate": 1.2139011655462394e-05, "loss": 0.055, "step": 9344 }, { "epoch": 4.344955834495583, "grad_norm": 2.1449952125549316, "learning_rate": 1.2071725246546129e-05, "loss": 0.1087, "step": 9346 }, { "epoch": 4.345885634588564, "grad_norm": 1.6255998611450195, "learning_rate": 1.2004566434482325e-05, "loss": 0.1041, "step": 9348 }, { "epoch": 4.3468154346815435, "grad_norm": 1.7423652410507202, "learning_rate": 1.1937535882101344e-05, "loss": 0.1057, "step": 9350 }, { "epoch": 4.347745234774523, "grad_norm": 1.3877346515655518, "learning_rate": 1.187063425096764e-05, "loss": 0.1073, "step": 9352 }, { "epoch": 4.348675034867504, "grad_norm": 1.4434616565704346, "learning_rate": 1.1803862201373413e-05, "loss": 0.0862, "step": 9354 }, { "epoch": 4.349604834960483, "grad_norm": 0.8845992088317871, "learning_rate": 1.1737220392331726e-05, "loss": 0.0453, "step": 9356 }, { "epoch": 4.350534635053464, "grad_norm": 1.6350867748260498, "learning_rate": 1.167070948157033e-05, "loss": 0.127, "step": 9358 }, { "epoch": 4.351464435146443, "grad_norm": 1.6421598196029663, "learning_rate": 1.1604330125525135e-05, "loss": 0.1018, "step": 9360 }, { "epoch": 4.352394235239424, "grad_norm": 1.4969794750213623, "learning_rate": 1.1538082979333547e-05, "loss": 0.0775, "step": 9362 }, { "epoch": 4.353324035332403, "grad_norm": 1.989943265914917, "learning_rate": 1.1471968696828161e-05, "loss": 0.0932, "step": 9364 }, { "epoch": 4.354253835425384, "grad_norm": 1.5482075214385986, "learning_rate": 1.1405987930530248e-05, "loss": 0.1038, "step": 9366 }, { "epoch": 4.3551836355183635, "grad_norm": 1.7041456699371338, "learning_rate": 1.1340141331643337e-05, "loss": 0.0849, "step": 9368 }, { "epoch": 4.356113435611343, "grad_norm": 1.2722928524017334, "learning_rate": 1.1274429550046773e-05, "loss": 0.0751, "step": 9370 }, { "epoch": 4.357043235704324, "grad_norm": 1.030063509941101, "learning_rate": 1.120885323428928e-05, "loss": 0.0926, "step": 9372 }, { "epoch": 4.357973035797303, "grad_norm": 1.5636767148971558, "learning_rate": 1.1143413031582685e-05, "loss": 0.0659, "step": 9374 }, { "epoch": 4.358902835890284, "grad_norm": 1.478726863861084, "learning_rate": 1.1078109587795378e-05, "loss": 0.1162, "step": 9376 }, { "epoch": 4.359832635983263, "grad_norm": 1.4730058908462524, "learning_rate": 1.1012943547445907e-05, "loss": 0.0913, "step": 9378 }, { "epoch": 4.360762436076244, "grad_norm": 1.3981608152389526, "learning_rate": 1.0947915553696784e-05, "loss": 0.102, "step": 9380 }, { "epoch": 4.361692236169223, "grad_norm": 1.2434172630310059, "learning_rate": 1.0883026248348126e-05, "loss": 0.0897, "step": 9382 }, { "epoch": 4.362622036262204, "grad_norm": 2.183781623840332, "learning_rate": 1.0818276271831144e-05, "loss": 0.108, "step": 9384 }, { "epoch": 4.3635518363551835, "grad_norm": 1.4673696756362915, "learning_rate": 1.075366626320193e-05, "loss": 0.0628, "step": 9386 }, { "epoch": 4.364481636448164, "grad_norm": 1.4785045385360718, "learning_rate": 1.0689196860135293e-05, "loss": 0.1319, "step": 9388 }, { "epoch": 4.365411436541144, "grad_norm": 1.0692551136016846, "learning_rate": 1.0624868698918103e-05, "loss": 0.096, "step": 9390 }, { "epoch": 4.366341236634124, "grad_norm": 1.1429011821746826, "learning_rate": 1.0560682414443346e-05, "loss": 0.0799, "step": 9392 }, { "epoch": 4.367271036727104, "grad_norm": 1.6440370082855225, "learning_rate": 1.0496638640203805e-05, "loss": 0.0901, "step": 9394 }, { "epoch": 4.368200836820083, "grad_norm": 0.9684098362922668, "learning_rate": 1.0432738008285642e-05, "loss": 0.058, "step": 9396 }, { "epoch": 4.369130636913064, "grad_norm": 2.0953245162963867, "learning_rate": 1.036898114936233e-05, "loss": 0.0899, "step": 9398 }, { "epoch": 4.370060437006043, "grad_norm": 1.157676100730896, "learning_rate": 1.0305368692688222e-05, "loss": 0.0972, "step": 9400 }, { "epoch": 4.370990237099024, "grad_norm": 1.5656546354293823, "learning_rate": 1.0241901266092689e-05, "loss": 0.0852, "step": 9402 }, { "epoch": 4.3719200371920035, "grad_norm": 1.7809864282608032, "learning_rate": 1.0178579495973576e-05, "loss": 0.1476, "step": 9404 }, { "epoch": 4.372849837284984, "grad_norm": 1.7558435201644897, "learning_rate": 1.0115404007291154e-05, "loss": 0.1137, "step": 9406 }, { "epoch": 4.373779637377964, "grad_norm": 1.8629506826400757, "learning_rate": 1.005237542356206e-05, "loss": 0.0923, "step": 9408 }, { "epoch": 4.374709437470944, "grad_norm": 1.7003018856048584, "learning_rate": 9.989494366852968e-06, "loss": 0.0697, "step": 9410 }, { "epoch": 4.375639237563924, "grad_norm": 1.2810795307159424, "learning_rate": 9.926761457774453e-06, "loss": 0.0973, "step": 9412 }, { "epoch": 4.376569037656903, "grad_norm": 1.4004093408584595, "learning_rate": 9.864177315475004e-06, "loss": 0.0853, "step": 9414 }, { "epoch": 4.377498837749884, "grad_norm": 2.1587703227996826, "learning_rate": 9.80174255763491e-06, "loss": 0.0847, "step": 9416 }, { "epoch": 4.378428637842863, "grad_norm": 1.9018367528915405, "learning_rate": 9.739457800459982e-06, "loss": 0.0862, "step": 9418 }, { "epoch": 4.379358437935844, "grad_norm": 1.245938777923584, "learning_rate": 9.677323658675636e-06, "loss": 0.0912, "step": 9420 }, { "epoch": 4.3802882380288235, "grad_norm": 2.2369399070739746, "learning_rate": 9.615340745520765e-06, "loss": 0.0826, "step": 9422 }, { "epoch": 4.381218038121804, "grad_norm": 1.5076128244400024, "learning_rate": 9.553509672741697e-06, "loss": 0.0878, "step": 9424 }, { "epoch": 4.382147838214784, "grad_norm": 1.634756088256836, "learning_rate": 9.491831050586125e-06, "loss": 0.1257, "step": 9426 }, { "epoch": 4.383077638307764, "grad_norm": 1.2703053951263428, "learning_rate": 9.430305487797217e-06, "loss": 0.0869, "step": 9428 }, { "epoch": 4.384007438400744, "grad_norm": 1.558892011642456, "learning_rate": 9.368933591607404e-06, "loss": 0.156, "step": 9430 }, { "epoch": 4.384937238493724, "grad_norm": 0.9523826837539673, "learning_rate": 9.307715967732557e-06, "loss": 0.0861, "step": 9432 }, { "epoch": 4.385867038586704, "grad_norm": 1.3929122686386108, "learning_rate": 9.246653220365813e-06, "loss": 0.0772, "step": 9434 }, { "epoch": 4.386796838679684, "grad_norm": 1.5782274007797241, "learning_rate": 9.185745952171934e-06, "loss": 0.1062, "step": 9436 }, { "epoch": 4.387726638772664, "grad_norm": 1.4980721473693848, "learning_rate": 9.12499476428103e-06, "loss": 0.0721, "step": 9438 }, { "epoch": 4.3886564388656435, "grad_norm": 1.0085678100585938, "learning_rate": 9.064400256282772e-06, "loss": 0.0596, "step": 9440 }, { "epoch": 4.389586238958624, "grad_norm": 1.466829538345337, "learning_rate": 9.003963026220592e-06, "loss": 0.1304, "step": 9442 }, { "epoch": 4.390516039051604, "grad_norm": 0.8998017907142639, "learning_rate": 8.943683670585448e-06, "loss": 0.072, "step": 9444 }, { "epoch": 4.391445839144584, "grad_norm": 1.368242859840393, "learning_rate": 8.883562784310276e-06, "loss": 0.1103, "step": 9446 }, { "epoch": 4.392375639237564, "grad_norm": 1.886487364768982, "learning_rate": 8.823600960763922e-06, "loss": 0.0839, "step": 9448 }, { "epoch": 4.393305439330544, "grad_norm": 1.3140952587127686, "learning_rate": 8.763798791745442e-06, "loss": 0.0762, "step": 9450 }, { "epoch": 4.394235239423524, "grad_norm": 1.334981918334961, "learning_rate": 8.704156867478068e-06, "loss": 0.0645, "step": 9452 }, { "epoch": 4.395165039516504, "grad_norm": 1.6473406553268433, "learning_rate": 8.644675776603512e-06, "loss": 0.0692, "step": 9454 }, { "epoch": 4.396094839609484, "grad_norm": 1.3900558948516846, "learning_rate": 8.585356106176133e-06, "loss": 0.0827, "step": 9456 }, { "epoch": 4.397024639702464, "grad_norm": 1.8715484142303467, "learning_rate": 8.526198441657122e-06, "loss": 0.1246, "step": 9458 }, { "epoch": 4.397954439795444, "grad_norm": 1.499255657196045, "learning_rate": 8.467203366908753e-06, "loss": 0.0988, "step": 9460 }, { "epoch": 4.398884239888424, "grad_norm": 1.5051559209823608, "learning_rate": 8.40837146418856e-06, "loss": 0.0991, "step": 9462 }, { "epoch": 4.399814039981404, "grad_norm": 1.2312617301940918, "learning_rate": 8.349703314143768e-06, "loss": 0.1189, "step": 9464 }, { "epoch": 4.400743840074384, "grad_norm": 1.6249033212661743, "learning_rate": 8.291199495805233e-06, "loss": 0.0913, "step": 9466 }, { "epoch": 4.401673640167364, "grad_norm": 1.4979948997497559, "learning_rate": 8.232860586582028e-06, "loss": 0.1102, "step": 9468 }, { "epoch": 4.402603440260344, "grad_norm": 1.7667640447616577, "learning_rate": 8.174687162255701e-06, "loss": 0.0852, "step": 9470 }, { "epoch": 4.403533240353324, "grad_norm": 1.403827428817749, "learning_rate": 8.116679796974429e-06, "loss": 0.0904, "step": 9472 }, { "epoch": 4.404463040446304, "grad_norm": 1.1164031028747559, "learning_rate": 8.058839063247455e-06, "loss": 0.0955, "step": 9474 }, { "epoch": 4.405392840539284, "grad_norm": 0.8948830366134644, "learning_rate": 8.001165531939566e-06, "loss": 0.0663, "step": 9476 }, { "epoch": 4.406322640632264, "grad_norm": 1.4157402515411377, "learning_rate": 7.943659772265138e-06, "loss": 0.0798, "step": 9478 }, { "epoch": 4.4072524407252445, "grad_norm": 2.1207549571990967, "learning_rate": 7.886322351782833e-06, "loss": 0.1365, "step": 9480 }, { "epoch": 4.408182240818224, "grad_norm": 1.5447384119033813, "learning_rate": 7.829153836389815e-06, "loss": 0.1096, "step": 9482 }, { "epoch": 4.409112040911204, "grad_norm": 2.410006046295166, "learning_rate": 7.772154790316311e-06, "loss": 0.1041, "step": 9484 }, { "epoch": 4.410041841004184, "grad_norm": 1.4189624786376953, "learning_rate": 7.715325776119902e-06, "loss": 0.1116, "step": 9486 }, { "epoch": 4.410971641097164, "grad_norm": 1.882644534111023, "learning_rate": 7.65866735467991e-06, "loss": 0.1218, "step": 9488 }, { "epoch": 4.411901441190144, "grad_norm": 2.047727346420288, "learning_rate": 7.6021800851921785e-06, "loss": 0.1111, "step": 9490 }, { "epoch": 4.412831241283124, "grad_norm": 0.9375013709068298, "learning_rate": 7.545864525163224e-06, "loss": 0.0821, "step": 9492 }, { "epoch": 4.413761041376104, "grad_norm": 1.3038376569747925, "learning_rate": 7.4897212304048874e-06, "loss": 0.0825, "step": 9494 }, { "epoch": 4.414690841469084, "grad_norm": 1.5657720565795898, "learning_rate": 7.433750755028786e-06, "loss": 0.0721, "step": 9496 }, { "epoch": 4.4156206415620645, "grad_norm": 1.5707577466964722, "learning_rate": 7.377953651441017e-06, "loss": 0.0785, "step": 9498 }, { "epoch": 4.416550441655044, "grad_norm": 1.3338830471038818, "learning_rate": 7.322330470336367e-06, "loss": 0.0603, "step": 9500 }, { "epoch": 4.417480241748025, "grad_norm": 1.516019344329834, "learning_rate": 7.2668817606931785e-06, "loss": 0.0904, "step": 9502 }, { "epoch": 4.418410041841004, "grad_norm": 1.7542157173156738, "learning_rate": 7.211608069767897e-06, "loss": 0.1154, "step": 9504 }, { "epoch": 4.419339841933985, "grad_norm": 1.5412955284118652, "learning_rate": 7.1565099430895e-06, "loss": 0.1125, "step": 9506 }, { "epoch": 4.420269642026964, "grad_norm": 2.528181791305542, "learning_rate": 7.101587924454289e-06, "loss": 0.0848, "step": 9508 }, { "epoch": 4.421199442119944, "grad_norm": 1.4169434309005737, "learning_rate": 7.0468425559203186e-06, "loss": 0.1189, "step": 9510 }, { "epoch": 4.422129242212924, "grad_norm": 0.9393842220306396, "learning_rate": 6.99227437780237e-06, "loss": 0.0789, "step": 9512 }, { "epoch": 4.423059042305904, "grad_norm": 1.36910080909729, "learning_rate": 6.937883928666298e-06, "loss": 0.0612, "step": 9514 }, { "epoch": 4.4239888423988845, "grad_norm": 1.726395845413208, "learning_rate": 6.883671745323851e-06, "loss": 0.0992, "step": 9516 }, { "epoch": 4.424918642491864, "grad_norm": 1.9422825574874878, "learning_rate": 6.829638362827449e-06, "loss": 0.1293, "step": 9518 }, { "epoch": 4.4258484425848446, "grad_norm": 1.8225440979003906, "learning_rate": 6.775784314464766e-06, "loss": 0.0958, "step": 9520 }, { "epoch": 4.426778242677824, "grad_norm": 1.33700692653656, "learning_rate": 6.722110131753455e-06, "loss": 0.0867, "step": 9522 }, { "epoch": 4.427708042770805, "grad_norm": 1.5832892656326294, "learning_rate": 6.668616344436031e-06, "loss": 0.1193, "step": 9524 }, { "epoch": 4.428637842863784, "grad_norm": 1.21122407913208, "learning_rate": 6.615303480474637e-06, "loss": 0.0583, "step": 9526 }, { "epoch": 4.429567642956764, "grad_norm": 1.1315863132476807, "learning_rate": 6.5621720660456895e-06, "loss": 0.077, "step": 9528 }, { "epoch": 4.430497443049744, "grad_norm": 1.1521533727645874, "learning_rate": 6.509222625534798e-06, "loss": 0.0445, "step": 9530 }, { "epoch": 4.431427243142724, "grad_norm": 1.4733864068984985, "learning_rate": 6.456455681531562e-06, "loss": 0.0881, "step": 9532 }, { "epoch": 4.4323570432357045, "grad_norm": 1.2208476066589355, "learning_rate": 6.403871754824419e-06, "loss": 0.0807, "step": 9534 }, { "epoch": 4.433286843328684, "grad_norm": 1.1415858268737793, "learning_rate": 6.351471364395465e-06, "loss": 0.0864, "step": 9536 }, { "epoch": 4.4342166434216645, "grad_norm": 1.290165901184082, "learning_rate": 6.299255027415458e-06, "loss": 0.0939, "step": 9538 }, { "epoch": 4.435146443514644, "grad_norm": 1.358205795288086, "learning_rate": 6.247223259238536e-06, "loss": 0.0832, "step": 9540 }, { "epoch": 4.436076243607625, "grad_norm": 1.5195430517196655, "learning_rate": 6.195376573397272e-06, "loss": 0.1179, "step": 9542 }, { "epoch": 4.437006043700604, "grad_norm": 1.0723932981491089, "learning_rate": 6.143715481597434e-06, "loss": 0.1129, "step": 9544 }, { "epoch": 4.437935843793585, "grad_norm": 1.1815840005874634, "learning_rate": 6.092240493713233e-06, "loss": 0.0564, "step": 9546 }, { "epoch": 4.438865643886564, "grad_norm": 1.6850388050079346, "learning_rate": 6.040952117781989e-06, "loss": 0.0443, "step": 9548 }, { "epoch": 4.439795443979545, "grad_norm": 0.6413979530334473, "learning_rate": 5.989850859999238e-06, "loss": 0.0651, "step": 9550 }, { "epoch": 4.4407252440725244, "grad_norm": 1.3081331253051758, "learning_rate": 5.938937224713844e-06, "loss": 0.0959, "step": 9552 }, { "epoch": 4.441655044165504, "grad_norm": 2.152467727661133, "learning_rate": 5.888211714422755e-06, "loss": 0.0942, "step": 9554 }, { "epoch": 4.4425848442584845, "grad_norm": 1.2280535697937012, "learning_rate": 5.837674829766301e-06, "loss": 0.0518, "step": 9556 }, { "epoch": 4.443514644351464, "grad_norm": 2.027757406234741, "learning_rate": 5.787327069523107e-06, "loss": 0.1294, "step": 9558 }, { "epoch": 4.444444444444445, "grad_norm": 0.8656994104385376, "learning_rate": 5.737168930605296e-06, "loss": 0.1005, "step": 9560 }, { "epoch": 4.445374244537424, "grad_norm": 1.503016471862793, "learning_rate": 5.687200908053419e-06, "loss": 0.0819, "step": 9562 }, { "epoch": 4.446304044630405, "grad_norm": 1.6108720302581787, "learning_rate": 5.6374234950316865e-06, "loss": 0.0711, "step": 9564 }, { "epoch": 4.447233844723384, "grad_norm": 1.8455811738967896, "learning_rate": 5.58783718282307e-06, "loss": 0.0759, "step": 9566 }, { "epoch": 4.448163644816365, "grad_norm": 1.087294340133667, "learning_rate": 5.538442460824451e-06, "loss": 0.0728, "step": 9568 }, { "epoch": 4.449093444909344, "grad_norm": 1.1875433921813965, "learning_rate": 5.489239816541767e-06, "loss": 0.1189, "step": 9570 }, { "epoch": 4.450023245002324, "grad_norm": 1.2562496662139893, "learning_rate": 5.440229735585312e-06, "loss": 0.1159, "step": 9572 }, { "epoch": 4.4509530450953045, "grad_norm": 2.452007293701172, "learning_rate": 5.391412701664794e-06, "loss": 0.0729, "step": 9574 }, { "epoch": 4.451882845188284, "grad_norm": 1.683897614479065, "learning_rate": 5.342789196584574e-06, "loss": 0.077, "step": 9576 }, { "epoch": 4.452812645281265, "grad_norm": 0.8585244417190552, "learning_rate": 5.29435970023902e-06, "loss": 0.0854, "step": 9578 }, { "epoch": 4.453742445374244, "grad_norm": 1.524370551109314, "learning_rate": 5.2461246906077665e-06, "loss": 0.0858, "step": 9580 }, { "epoch": 4.454672245467225, "grad_norm": 1.0827029943466187, "learning_rate": 5.198084643750851e-06, "loss": 0.0499, "step": 9582 }, { "epoch": 4.455602045560204, "grad_norm": 0.84294193983078, "learning_rate": 5.150240033804119e-06, "loss": 0.0837, "step": 9584 }, { "epoch": 4.456531845653185, "grad_norm": 1.5827172994613647, "learning_rate": 5.1025913329746355e-06, "loss": 0.1033, "step": 9586 }, { "epoch": 4.457461645746164, "grad_norm": 1.7508697509765625, "learning_rate": 5.055139011535759e-06, "loss": 0.0956, "step": 9588 }, { "epoch": 4.458391445839145, "grad_norm": 1.6733307838439941, "learning_rate": 5.007883537822775e-06, "loss": 0.0888, "step": 9590 }, { "epoch": 4.4593212459321245, "grad_norm": 1.5819863080978394, "learning_rate": 4.9608253782281e-06, "loss": 0.0813, "step": 9592 }, { "epoch": 4.460251046025105, "grad_norm": 1.9943430423736572, "learning_rate": 4.913964997196826e-06, "loss": 0.1114, "step": 9594 }, { "epoch": 4.461180846118085, "grad_norm": 1.088220477104187, "learning_rate": 4.867302857221997e-06, "loss": 0.1129, "step": 9596 }, { "epoch": 4.462110646211064, "grad_norm": 1.2587836980819702, "learning_rate": 4.820839418840014e-06, "loss": 0.0825, "step": 9598 }, { "epoch": 4.463040446304045, "grad_norm": 1.2720987796783447, "learning_rate": 4.774575140626338e-06, "loss": 0.0618, "step": 9600 }, { "epoch": 4.463970246397024, "grad_norm": 1.7874970436096191, "learning_rate": 4.728510479190691e-06, "loss": 0.0893, "step": 9602 }, { "epoch": 4.464900046490005, "grad_norm": 1.0658190250396729, "learning_rate": 4.682645889172681e-06, "loss": 0.058, "step": 9604 }, { "epoch": 4.465829846582984, "grad_norm": 1.6632353067398071, "learning_rate": 4.636981823237273e-06, "loss": 0.0716, "step": 9606 }, { "epoch": 4.466759646675965, "grad_norm": 1.0230793952941895, "learning_rate": 4.5915187320704365e-06, "loss": 0.0853, "step": 9608 }, { "epoch": 4.4676894467689445, "grad_norm": 1.6836304664611816, "learning_rate": 4.546257064374453e-06, "loss": 0.0853, "step": 9610 }, { "epoch": 4.468619246861925, "grad_norm": 2.6299400329589844, "learning_rate": 4.501197266863706e-06, "loss": 0.1343, "step": 9612 }, { "epoch": 4.469549046954905, "grad_norm": 1.977663516998291, "learning_rate": 4.456339784260262e-06, "loss": 0.0953, "step": 9614 }, { "epoch": 4.470478847047885, "grad_norm": 1.2798960208892822, "learning_rate": 4.411685059289334e-06, "loss": 0.0779, "step": 9616 }, { "epoch": 4.471408647140865, "grad_norm": 1.4751040935516357, "learning_rate": 4.3672335326750585e-06, "loss": 0.074, "step": 9618 }, { "epoch": 4.472338447233844, "grad_norm": 1.1418123245239258, "learning_rate": 4.32298564313598e-06, "loss": 0.0539, "step": 9620 }, { "epoch": 4.473268247326825, "grad_norm": 1.1665270328521729, "learning_rate": 4.27894182738098e-06, "loss": 0.07, "step": 9622 }, { "epoch": 4.474198047419804, "grad_norm": 0.9098102450370789, "learning_rate": 4.235102520104714e-06, "loss": 0.0596, "step": 9624 }, { "epoch": 4.475127847512785, "grad_norm": 1.2840076684951782, "learning_rate": 4.191468153983422e-06, "loss": 0.0816, "step": 9626 }, { "epoch": 4.4760576476057645, "grad_norm": 1.644608974456787, "learning_rate": 4.14803915967073e-06, "loss": 0.092, "step": 9628 }, { "epoch": 4.476987447698745, "grad_norm": 1.6773964166641235, "learning_rate": 4.104815965793282e-06, "loss": 0.0922, "step": 9630 }, { "epoch": 4.477917247791725, "grad_norm": 1.3729380369186401, "learning_rate": 4.061798998946473e-06, "loss": 0.0943, "step": 9632 }, { "epoch": 4.478847047884705, "grad_norm": 0.8804832100868225, "learning_rate": 4.018988683690478e-06, "loss": 0.0892, "step": 9634 }, { "epoch": 4.479776847977685, "grad_norm": 1.5134036540985107, "learning_rate": 3.976385442545793e-06, "loss": 0.0643, "step": 9636 }, { "epoch": 4.480706648070665, "grad_norm": 1.2066324949264526, "learning_rate": 3.933989695989218e-06, "loss": 0.0732, "step": 9638 }, { "epoch": 4.481636448163645, "grad_norm": 1.483345627784729, "learning_rate": 3.891801862449652e-06, "loss": 0.116, "step": 9640 }, { "epoch": 4.482566248256624, "grad_norm": 1.531400203704834, "learning_rate": 3.8498223583039755e-06, "loss": 0.0854, "step": 9642 }, { "epoch": 4.483496048349605, "grad_norm": 1.1915887594223022, "learning_rate": 3.808051597872949e-06, "loss": 0.1019, "step": 9644 }, { "epoch": 4.4844258484425845, "grad_norm": 1.5278739929199219, "learning_rate": 3.7664899934170954e-06, "loss": 0.0886, "step": 9646 }, { "epoch": 4.485355648535565, "grad_norm": 0.9714561700820923, "learning_rate": 3.7251379551327168e-06, "loss": 0.0535, "step": 9648 }, { "epoch": 4.486285448628545, "grad_norm": 1.1696851253509521, "learning_rate": 3.6839958911477084e-06, "loss": 0.0699, "step": 9650 }, { "epoch": 4.487215248721525, "grad_norm": 0.8624780774116516, "learning_rate": 3.6430642075176614e-06, "loss": 0.0561, "step": 9652 }, { "epoch": 4.488145048814505, "grad_norm": 1.3105947971343994, "learning_rate": 3.6023433082216933e-06, "loss": 0.1326, "step": 9654 }, { "epoch": 4.489074848907485, "grad_norm": 2.166456699371338, "learning_rate": 3.5618335951587153e-06, "loss": 0.1169, "step": 9656 }, { "epoch": 4.490004649000465, "grad_norm": 1.087326169013977, "learning_rate": 3.5215354681432146e-06, "loss": 0.059, "step": 9658 }, { "epoch": 4.490934449093445, "grad_norm": 1.1378189325332642, "learning_rate": 3.4814493249014125e-06, "loss": 0.0644, "step": 9660 }, { "epoch": 4.491864249186425, "grad_norm": 1.5042082071304321, "learning_rate": 3.441575561067429e-06, "loss": 0.1076, "step": 9662 }, { "epoch": 4.492794049279405, "grad_norm": 1.6666792631149292, "learning_rate": 3.4019145701791463e-06, "loss": 0.1045, "step": 9664 }, { "epoch": 4.493723849372385, "grad_norm": 1.0480409860610962, "learning_rate": 3.362466743674558e-06, "loss": 0.0613, "step": 9666 }, { "epoch": 4.494653649465365, "grad_norm": 1.2447770833969116, "learning_rate": 3.32323247088776e-06, "loss": 0.0833, "step": 9668 }, { "epoch": 4.495583449558345, "grad_norm": 2.0616519451141357, "learning_rate": 3.2842121390452336e-06, "loss": 0.0943, "step": 9670 }, { "epoch": 4.496513249651325, "grad_norm": 0.9936814904212952, "learning_rate": 3.2454061332618767e-06, "loss": 0.0531, "step": 9672 }, { "epoch": 4.497443049744305, "grad_norm": 1.7335423231124878, "learning_rate": 3.206814836537297e-06, "loss": 0.0712, "step": 9674 }, { "epoch": 4.498372849837285, "grad_norm": 1.8310233354568481, "learning_rate": 3.168438629752018e-06, "loss": 0.0888, "step": 9676 }, { "epoch": 4.499302649930265, "grad_norm": 1.3353545665740967, "learning_rate": 3.1302778916637044e-06, "loss": 0.0612, "step": 9678 }, { "epoch": 4.500232450023245, "grad_norm": 1.3720858097076416, "learning_rate": 3.0923329989034154e-06, "loss": 0.0855, "step": 9680 }, { "epoch": 4.501162250116225, "grad_norm": 1.079142689704895, "learning_rate": 3.054604325971965e-06, "loss": 0.0819, "step": 9682 }, { "epoch": 4.502092050209205, "grad_norm": 1.4063178300857544, "learning_rate": 3.0170922452361244e-06, "loss": 0.0474, "step": 9684 }, { "epoch": 4.503021850302185, "grad_norm": 2.7392921447753906, "learning_rate": 2.979797126924943e-06, "loss": 0.1387, "step": 9686 }, { "epoch": 4.503951650395165, "grad_norm": 1.6900469064712524, "learning_rate": 2.9427193391261814e-06, "loss": 0.0757, "step": 9688 }, { "epoch": 4.504881450488145, "grad_norm": 1.3289343118667603, "learning_rate": 2.9058592477826822e-06, "loss": 0.0731, "step": 9690 }, { "epoch": 4.505811250581125, "grad_norm": 2.2616021633148193, "learning_rate": 2.8692172166886375e-06, "loss": 0.0893, "step": 9692 }, { "epoch": 4.506741050674105, "grad_norm": 1.3043015003204346, "learning_rate": 2.832793607486089e-06, "loss": 0.0767, "step": 9694 }, { "epoch": 4.507670850767085, "grad_norm": 1.1043853759765625, "learning_rate": 2.79658877966141e-06, "loss": 0.0787, "step": 9696 }, { "epoch": 4.508600650860065, "grad_norm": 1.2527625560760498, "learning_rate": 2.760603090541577e-06, "loss": 0.0919, "step": 9698 }, { "epoch": 4.509530450953045, "grad_norm": 1.0304163694381714, "learning_rate": 2.72483689529083e-06, "loss": 0.0651, "step": 9700 }, { "epoch": 4.510460251046025, "grad_norm": 1.5783592462539673, "learning_rate": 2.6892905469070605e-06, "loss": 0.0677, "step": 9702 }, { "epoch": 4.5113900511390055, "grad_norm": 1.4144093990325928, "learning_rate": 2.6539643962184163e-06, "loss": 0.0781, "step": 9704 }, { "epoch": 4.512319851231985, "grad_norm": 0.9751781225204468, "learning_rate": 2.6188587918797434e-06, "loss": 0.0428, "step": 9706 }, { "epoch": 4.513249651324966, "grad_norm": 1.401387333869934, "learning_rate": 2.583974080369119e-06, "loss": 0.0744, "step": 9708 }, { "epoch": 4.514179451417945, "grad_norm": 1.5523817539215088, "learning_rate": 2.5493106059846272e-06, "loss": 0.0976, "step": 9710 }, { "epoch": 4.515109251510925, "grad_norm": 1.5795130729675293, "learning_rate": 2.5148687108407438e-06, "loss": 0.094, "step": 9712 }, { "epoch": 4.516039051603905, "grad_norm": 1.4061871767044067, "learning_rate": 2.4806487348650497e-06, "loss": 0.0799, "step": 9714 }, { "epoch": 4.516968851696885, "grad_norm": 1.6776823997497559, "learning_rate": 2.446651015794933e-06, "loss": 0.092, "step": 9716 }, { "epoch": 4.517898651789865, "grad_norm": 1.2543243169784546, "learning_rate": 2.412875889174155e-06, "loss": 0.0773, "step": 9718 }, { "epoch": 4.518828451882845, "grad_norm": 1.2847959995269775, "learning_rate": 2.3793236883495385e-06, "loss": 0.0988, "step": 9720 }, { "epoch": 4.5197582519758255, "grad_norm": 1.5950731039047241, "learning_rate": 2.345994744467764e-06, "loss": 0.0797, "step": 9722 }, { "epoch": 4.520688052068805, "grad_norm": 1.6074390411376953, "learning_rate": 2.3128893864720893e-06, "loss": 0.0671, "step": 9724 }, { "epoch": 4.521617852161786, "grad_norm": 1.4902364015579224, "learning_rate": 2.2800079410990106e-06, "loss": 0.0732, "step": 9726 }, { "epoch": 4.522547652254765, "grad_norm": 0.9927710890769958, "learning_rate": 2.2473507328751225e-06, "loss": 0.0532, "step": 9728 }, { "epoch": 4.523477452347745, "grad_norm": 1.4497416019439697, "learning_rate": 2.214918084113887e-06, "loss": 0.1069, "step": 9730 }, { "epoch": 4.524407252440725, "grad_norm": 0.9233128428459167, "learning_rate": 2.1827103149124474e-06, "loss": 0.0705, "step": 9732 }, { "epoch": 4.525337052533705, "grad_norm": 2.061769723892212, "learning_rate": 2.1507277431484916e-06, "loss": 0.1024, "step": 9734 }, { "epoch": 4.526266852626685, "grad_norm": 0.6574268937110901, "learning_rate": 2.1189706844770667e-06, "loss": 0.0625, "step": 9736 }, { "epoch": 4.527196652719665, "grad_norm": 1.4207185506820679, "learning_rate": 2.0874394523275575e-06, "loss": 0.0859, "step": 9738 }, { "epoch": 4.5281264528126455, "grad_norm": 1.5577261447906494, "learning_rate": 2.0561343579004986e-06, "loss": 0.0786, "step": 9740 }, { "epoch": 4.529056252905625, "grad_norm": 1.6594295501708984, "learning_rate": 2.0250557101644772e-06, "loss": 0.1029, "step": 9742 }, { "epoch": 4.529986052998606, "grad_norm": 1.2784063816070557, "learning_rate": 1.9942038158532534e-06, "loss": 0.0781, "step": 9744 }, { "epoch": 4.530915853091585, "grad_norm": 1.166426181793213, "learning_rate": 1.963578979462554e-06, "loss": 0.0931, "step": 9746 }, { "epoch": 4.531845653184566, "grad_norm": 1.5640226602554321, "learning_rate": 1.9331815032471438e-06, "loss": 0.1308, "step": 9748 }, { "epoch": 4.532775453277545, "grad_norm": 1.7410671710968018, "learning_rate": 1.903011687217831e-06, "loss": 0.0515, "step": 9750 }, { "epoch": 4.533705253370526, "grad_norm": 1.4662057161331177, "learning_rate": 1.8730698291385676e-06, "loss": 0.0692, "step": 9752 }, { "epoch": 4.534635053463505, "grad_norm": 0.9554816484451294, "learning_rate": 1.8433562245233535e-06, "loss": 0.0642, "step": 9754 }, { "epoch": 4.535564853556485, "grad_norm": 1.789206862449646, "learning_rate": 1.8138711666334732e-06, "loss": 0.1047, "step": 9756 }, { "epoch": 4.5364946536494655, "grad_norm": 1.1458922624588013, "learning_rate": 1.784614946474574e-06, "loss": 0.1325, "step": 9758 }, { "epoch": 4.537424453742445, "grad_norm": 1.3826743364334106, "learning_rate": 1.7555878527937238e-06, "loss": 0.0736, "step": 9760 }, { "epoch": 4.538354253835426, "grad_norm": 1.235148310661316, "learning_rate": 1.7267901720766358e-06, "loss": 0.0671, "step": 9762 }, { "epoch": 4.539284053928405, "grad_norm": 1.1997483968734741, "learning_rate": 1.6982221885447395e-06, "loss": 0.0651, "step": 9764 }, { "epoch": 4.540213854021386, "grad_norm": 1.0022221803665161, "learning_rate": 1.66988418415255e-06, "loss": 0.0576, "step": 9766 }, { "epoch": 4.541143654114365, "grad_norm": 3.038173198699951, "learning_rate": 1.641776438584715e-06, "loss": 0.1324, "step": 9768 }, { "epoch": 4.542073454207346, "grad_norm": 1.5199517011642456, "learning_rate": 1.6138992292533198e-06, "loss": 0.0626, "step": 9770 }, { "epoch": 4.543003254300325, "grad_norm": 1.3615814447402954, "learning_rate": 1.5862528312951942e-06, "loss": 0.0871, "step": 9772 }, { "epoch": 4.543933054393305, "grad_norm": 0.9987950921058655, "learning_rate": 1.5588375175691293e-06, "loss": 0.0673, "step": 9774 }, { "epoch": 4.5448628544862855, "grad_norm": 1.233931303024292, "learning_rate": 1.5316535586531547e-06, "loss": 0.0757, "step": 9776 }, { "epoch": 4.545792654579266, "grad_norm": 1.602816104888916, "learning_rate": 1.5047012228420178e-06, "loss": 0.0702, "step": 9778 }, { "epoch": 4.546722454672246, "grad_norm": 1.1163880825042725, "learning_rate": 1.4779807761443728e-06, "loss": 0.0381, "step": 9780 }, { "epoch": 4.547652254765225, "grad_norm": 1.3250819444656372, "learning_rate": 1.4514924822802455e-06, "loss": 0.0688, "step": 9782 }, { "epoch": 4.548582054858206, "grad_norm": 1.4963914155960083, "learning_rate": 1.4252366026784013e-06, "loss": 0.0627, "step": 9784 }, { "epoch": 4.549511854951185, "grad_norm": 1.2599036693572998, "learning_rate": 1.3992133964737699e-06, "loss": 0.0713, "step": 9786 }, { "epoch": 4.550441655044166, "grad_norm": 1.3909556865692139, "learning_rate": 1.3734231205048937e-06, "loss": 0.0926, "step": 9788 }, { "epoch": 4.551371455137145, "grad_norm": 1.7940722703933716, "learning_rate": 1.3478660293113703e-06, "loss": 0.0976, "step": 9790 }, { "epoch": 4.552301255230126, "grad_norm": 1.5406320095062256, "learning_rate": 1.322542375131397e-06, "loss": 0.0938, "step": 9792 }, { "epoch": 4.5532310553231055, "grad_norm": 2.1973612308502197, "learning_rate": 1.297452407899205e-06, "loss": 0.1144, "step": 9794 }, { "epoch": 4.554160855416086, "grad_norm": 1.4950363636016846, "learning_rate": 1.2725963752426571e-06, "loss": 0.0918, "step": 9796 }, { "epoch": 4.5550906555090656, "grad_norm": 1.2297629117965698, "learning_rate": 1.2479745224807158e-06, "loss": 0.1045, "step": 9798 }, { "epoch": 4.556020455602045, "grad_norm": 1.675352931022644, "learning_rate": 1.2235870926211697e-06, "loss": 0.0692, "step": 9800 }, { "epoch": 4.556950255695026, "grad_norm": 1.6223082542419434, "learning_rate": 1.199434326358095e-06, "loss": 0.1459, "step": 9802 }, { "epoch": 4.557880055788005, "grad_norm": 1.619726538658142, "learning_rate": 1.175516462069528e-06, "loss": 0.0867, "step": 9804 }, { "epoch": 4.558809855880986, "grad_norm": 1.7090150117874146, "learning_rate": 1.1518337358151741e-06, "loss": 0.1224, "step": 9806 }, { "epoch": 4.559739655973965, "grad_norm": 0.7824251651763916, "learning_rate": 1.1283863813339422e-06, "loss": 0.0531, "step": 9808 }, { "epoch": 4.560669456066946, "grad_norm": 2.0378170013427734, "learning_rate": 1.105174630041763e-06, "loss": 0.1038, "step": 9810 }, { "epoch": 4.5615992561599255, "grad_norm": 2.5732834339141846, "learning_rate": 1.0821987110292439e-06, "loss": 0.1399, "step": 9812 }, { "epoch": 4.562529056252906, "grad_norm": 1.022570252418518, "learning_rate": 1.0594588510594491e-06, "loss": 0.0586, "step": 9814 }, { "epoch": 4.5634588563458856, "grad_norm": 2.1113452911376953, "learning_rate": 1.0369552745656115e-06, "loss": 0.0847, "step": 9816 }, { "epoch": 4.564388656438865, "grad_norm": 2.4296977519989014, "learning_rate": 1.014688203648938e-06, "loss": 0.0999, "step": 9818 }, { "epoch": 4.565318456531846, "grad_norm": 1.5335438251495361, "learning_rate": 9.926578580764333e-07, "loss": 0.0756, "step": 9820 }, { "epoch": 4.566248256624826, "grad_norm": 0.9887020587921143, "learning_rate": 9.708644552787126e-07, "loss": 0.0422, "step": 9822 }, { "epoch": 4.567178056717806, "grad_norm": 1.57457435131073, "learning_rate": 9.493082103478531e-07, "loss": 0.1141, "step": 9824 }, { "epoch": 4.568107856810785, "grad_norm": 1.828744888305664, "learning_rate": 9.279893360353106e-07, "loss": 0.1198, "step": 9826 }, { "epoch": 4.569037656903766, "grad_norm": 1.5493601560592651, "learning_rate": 9.069080427497725e-07, "loss": 0.0938, "step": 9828 }, { "epoch": 4.5699674569967454, "grad_norm": 1.1560444831848145, "learning_rate": 8.860645385550632e-07, "loss": 0.0601, "step": 9830 }, { "epoch": 4.570897257089726, "grad_norm": 0.7878835201263428, "learning_rate": 8.654590291681569e-07, "loss": 0.0742, "step": 9832 }, { "epoch": 4.5718270571827055, "grad_norm": 1.4632859230041504, "learning_rate": 8.450917179571371e-07, "loss": 0.0844, "step": 9834 }, { "epoch": 4.572756857275686, "grad_norm": 2.2661290168762207, "learning_rate": 8.249628059391316e-07, "loss": 0.1296, "step": 9836 }, { "epoch": 4.573686657368666, "grad_norm": 0.924258828163147, "learning_rate": 8.050724917783617e-07, "loss": 0.0705, "step": 9838 }, { "epoch": 4.574616457461646, "grad_norm": 1.733229160308838, "learning_rate": 7.854209717842323e-07, "loss": 0.1098, "step": 9840 }, { "epoch": 4.575546257554626, "grad_norm": 1.8916281461715698, "learning_rate": 7.660084399092779e-07, "loss": 0.0765, "step": 9842 }, { "epoch": 4.576476057647605, "grad_norm": 1.2317783832550049, "learning_rate": 7.468350877473669e-07, "loss": 0.0711, "step": 9844 }, { "epoch": 4.577405857740586, "grad_norm": 1.6018142700195312, "learning_rate": 7.279011045317258e-07, "loss": 0.1027, "step": 9846 }, { "epoch": 4.578335657833565, "grad_norm": 1.0899640321731567, "learning_rate": 7.09206677133154e-07, "loss": 0.0551, "step": 9848 }, { "epoch": 4.579265457926546, "grad_norm": 1.2509325742721558, "learning_rate": 6.907519900581005e-07, "loss": 0.076, "step": 9850 }, { "epoch": 4.5801952580195255, "grad_norm": 1.415245771408081, "learning_rate": 6.725372254468403e-07, "loss": 0.0827, "step": 9852 }, { "epoch": 4.581125058112506, "grad_norm": 1.258665680885315, "learning_rate": 6.545625630717843e-07, "loss": 0.0934, "step": 9854 }, { "epoch": 4.582054858205486, "grad_norm": 2.085360527038574, "learning_rate": 6.368281803355778e-07, "loss": 0.1433, "step": 9856 }, { "epoch": 4.582984658298466, "grad_norm": 1.391481637954712, "learning_rate": 6.193342522694131e-07, "loss": 0.0936, "step": 9858 }, { "epoch": 4.583914458391446, "grad_norm": 1.423584222793579, "learning_rate": 6.020809515313143e-07, "loss": 0.1116, "step": 9860 }, { "epoch": 4.584844258484426, "grad_norm": 1.6494166851043701, "learning_rate": 5.850684484043967e-07, "loss": 0.0704, "step": 9862 }, { "epoch": 4.585774058577406, "grad_norm": 1.1511698961257935, "learning_rate": 5.682969107951741e-07, "loss": 0.0685, "step": 9864 }, { "epoch": 4.586703858670386, "grad_norm": 1.487305998802185, "learning_rate": 5.517665042319569e-07, "loss": 0.0828, "step": 9866 }, { "epoch": 4.587633658763366, "grad_norm": 1.156471848487854, "learning_rate": 5.354773918632009e-07, "loss": 0.0762, "step": 9868 }, { "epoch": 4.5885634588563455, "grad_norm": 2.0750138759613037, "learning_rate": 5.194297344558589e-07, "loss": 0.0837, "step": 9870 }, { "epoch": 4.589493258949326, "grad_norm": 1.0420153141021729, "learning_rate": 5.036236903938338e-07, "loss": 0.0639, "step": 9872 }, { "epoch": 4.590423059042306, "grad_norm": 1.328913688659668, "learning_rate": 4.880594156763949e-07, "loss": 0.0935, "step": 9874 }, { "epoch": 4.591352859135286, "grad_norm": 1.1534656286239624, "learning_rate": 4.7273706391665585e-07, "loss": 0.0466, "step": 9876 }, { "epoch": 4.592282659228266, "grad_norm": 1.9059916734695435, "learning_rate": 4.576567863400351e-07, "loss": 0.1337, "step": 9878 }, { "epoch": 4.593212459321246, "grad_norm": 1.012990117073059, "learning_rate": 4.4281873178278433e-07, "loss": 0.0893, "step": 9880 }, { "epoch": 4.594142259414226, "grad_norm": 1.364102840423584, "learning_rate": 4.282230466905202e-07, "loss": 0.0883, "step": 9882 }, { "epoch": 4.595072059507206, "grad_norm": 2.1059677600860596, "learning_rate": 4.138698751167674e-07, "loss": 0.1034, "step": 9884 }, { "epoch": 4.596001859600186, "grad_norm": 0.9101712703704834, "learning_rate": 3.997593587215097e-07, "loss": 0.0585, "step": 9886 }, { "epoch": 4.5969316596931655, "grad_norm": 1.502005696296692, "learning_rate": 3.858916367698687e-07, "loss": 0.0806, "step": 9888 }, { "epoch": 4.597861459786146, "grad_norm": 1.6315875053405762, "learning_rate": 3.7226684613065523e-07, "loss": 0.0751, "step": 9890 }, { "epoch": 4.598791259879126, "grad_norm": 1.493014931678772, "learning_rate": 3.5888512127505345e-07, "loss": 0.0737, "step": 9892 }, { "epoch": 4.599721059972106, "grad_norm": 1.3123685121536255, "learning_rate": 3.457465942752831e-07, "loss": 0.093, "step": 9894 }, { "epoch": 4.600650860065086, "grad_norm": 1.5223602056503296, "learning_rate": 3.3285139480330354e-07, "loss": 0.1147, "step": 9896 }, { "epoch": 4.601580660158066, "grad_norm": 1.2751762866973877, "learning_rate": 3.2019965012952565e-07, "loss": 0.0742, "step": 9898 }, { "epoch": 4.602510460251046, "grad_norm": 0.7941957116127014, "learning_rate": 3.077914851215573e-07, "loss": 0.0675, "step": 9900 }, { "epoch": 4.603440260344026, "grad_norm": 1.7325323820114136, "learning_rate": 2.956270222429878e-07, "loss": 0.1017, "step": 9902 }, { "epoch": 4.604370060437006, "grad_norm": 0.9534595608711243, "learning_rate": 2.8370638155215266e-07, "loss": 0.079, "step": 9904 }, { "epoch": 4.605299860529986, "grad_norm": 2.0844361782073975, "learning_rate": 2.72029680700965e-07, "loss": 0.1098, "step": 9906 }, { "epoch": 4.606229660622966, "grad_norm": 1.6481866836547852, "learning_rate": 2.605970349337279e-07, "loss": 0.1049, "step": 9908 }, { "epoch": 4.6071594607159465, "grad_norm": 1.6442292928695679, "learning_rate": 2.4940855708606555e-07, "loss": 0.0645, "step": 9910 }, { "epoch": 4.608089260808926, "grad_norm": 1.0619252920150757, "learning_rate": 2.384643575837242e-07, "loss": 0.0444, "step": 9912 }, { "epoch": 4.609019060901906, "grad_norm": 0.7864127159118652, "learning_rate": 2.2776454444153156e-07, "loss": 0.0637, "step": 9914 }, { "epoch": 4.609948860994886, "grad_norm": 1.7801432609558105, "learning_rate": 2.1730922326234182e-07, "loss": 0.0887, "step": 9916 }, { "epoch": 4.610878661087866, "grad_norm": 1.498134732246399, "learning_rate": 2.0709849723593396e-07, "loss": 0.0842, "step": 9918 }, { "epoch": 4.611808461180846, "grad_norm": 1.110042929649353, "learning_rate": 1.9713246713805403e-07, "loss": 0.1167, "step": 9920 }, { "epoch": 4.612738261273826, "grad_norm": 2.1407055854797363, "learning_rate": 1.8741123132940773e-07, "loss": 0.1185, "step": 9922 }, { "epoch": 4.613668061366806, "grad_norm": 0.9538351893424988, "learning_rate": 1.7793488575466116e-07, "loss": 0.0567, "step": 9924 }, { "epoch": 4.614597861459786, "grad_norm": 2.2358429431915283, "learning_rate": 1.6870352394151658e-07, "loss": 0.0763, "step": 9926 }, { "epoch": 4.6155276615527665, "grad_norm": 1.410854697227478, "learning_rate": 1.5971723699979365e-07, "loss": 0.0735, "step": 9928 }, { "epoch": 4.616457461645746, "grad_norm": 1.0383957624435425, "learning_rate": 1.5097611362051082e-07, "loss": 0.0531, "step": 9930 }, { "epoch": 4.617387261738726, "grad_norm": 1.6477042436599731, "learning_rate": 1.4248024007503035e-07, "loss": 0.0743, "step": 9932 }, { "epoch": 4.618317061831706, "grad_norm": 1.5358082056045532, "learning_rate": 1.3422970021418965e-07, "loss": 0.0896, "step": 9934 }, { "epoch": 4.619246861924686, "grad_norm": 1.5238046646118164, "learning_rate": 1.2622457546749625e-07, "loss": 0.0769, "step": 9936 }, { "epoch": 4.620176662017666, "grad_norm": 1.3935869932174683, "learning_rate": 1.1846494484229532e-07, "loss": 0.0958, "step": 9938 }, { "epoch": 4.621106462110646, "grad_norm": 1.0747686624526978, "learning_rate": 1.1095088492300341e-07, "loss": 0.0592, "step": 9940 }, { "epoch": 4.622036262203626, "grad_norm": 0.765288233757019, "learning_rate": 1.0368246987035918e-07, "loss": 0.0345, "step": 9942 }, { "epoch": 4.622966062296606, "grad_norm": 0.9143588542938232, "learning_rate": 9.665977142069062e-08, "loss": 0.0579, "step": 9944 }, { "epoch": 4.6238958623895865, "grad_norm": 1.8870759010314941, "learning_rate": 8.988285888519064e-08, "loss": 0.09, "step": 9946 }, { "epoch": 4.624825662482566, "grad_norm": 1.867397665977478, "learning_rate": 8.335179914925091e-08, "loss": 0.0914, "step": 9948 }, { "epoch": 4.625755462575547, "grad_norm": 0.900800883769989, "learning_rate": 7.706665667180407e-08, "loss": 0.0468, "step": 9950 }, { "epoch": 4.626685262668526, "grad_norm": 1.4241893291473389, "learning_rate": 7.102749348465479e-08, "loss": 0.1099, "step": 9952 }, { "epoch": 4.627615062761507, "grad_norm": 0.9740573167800903, "learning_rate": 6.523436919191084e-08, "loss": 0.0689, "step": 9954 }, { "epoch": 4.628544862854486, "grad_norm": 1.380014419555664, "learning_rate": 5.968734096936965e-08, "loss": 0.062, "step": 9956 }, { "epoch": 4.629474662947466, "grad_norm": 1.3265957832336426, "learning_rate": 5.4386463563963205e-08, "loss": 0.1166, "step": 9958 }, { "epoch": 4.630404463040446, "grad_norm": 1.3451951742172241, "learning_rate": 4.933178929321405e-08, "loss": 0.0892, "step": 9960 }, { "epoch": 4.631334263133426, "grad_norm": 0.9932853579521179, "learning_rate": 4.452336804470514e-08, "loss": 0.038, "step": 9962 }, { "epoch": 4.6322640632264065, "grad_norm": 1.0170948505401611, "learning_rate": 3.9961247275624646e-08, "loss": 0.0545, "step": 9964 }, { "epoch": 4.633193863319386, "grad_norm": 0.8916456699371338, "learning_rate": 3.564547201225805e-08, "loss": 0.0648, "step": 9966 }, { "epoch": 4.634123663412367, "grad_norm": 1.0806301832199097, "learning_rate": 3.1576084849560697e-08, "loss": 0.0679, "step": 9968 }, { "epoch": 4.635053463505346, "grad_norm": 1.2849698066711426, "learning_rate": 2.7753125950749776e-08, "loss": 0.0681, "step": 9970 }, { "epoch": 4.635983263598327, "grad_norm": 0.8310216665267944, "learning_rate": 2.4176633046882454e-08, "loss": 0.0658, "step": 9972 }, { "epoch": 4.636913063691306, "grad_norm": 1.3196799755096436, "learning_rate": 2.084664143649783e-08, "loss": 0.061, "step": 9974 }, { "epoch": 4.637842863784286, "grad_norm": 1.2703583240509033, "learning_rate": 1.776318398526997e-08, "loss": 0.0701, "step": 9976 }, { "epoch": 4.638772663877266, "grad_norm": 1.3110756874084473, "learning_rate": 1.4926291125677634e-08, "loss": 0.1039, "step": 9978 }, { "epoch": 4.639702463970247, "grad_norm": 2.2721807956695557, "learning_rate": 1.2335990856712837e-08, "loss": 0.1201, "step": 9980 }, { "epoch": 4.6406322640632265, "grad_norm": 1.8474149703979492, "learning_rate": 9.992308743586636e-09, "loss": 0.0813, "step": 9982 }, { "epoch": 4.641562064156206, "grad_norm": 1.0631083250045776, "learning_rate": 7.895267917501544e-09, "loss": 0.0825, "step": 9984 }, { "epoch": 4.642491864249187, "grad_norm": 1.394451379776001, "learning_rate": 6.044889075398938e-09, "loss": 0.0783, "step": 9986 }, { "epoch": 4.643421664342166, "grad_norm": 1.2568907737731934, "learning_rate": 4.4411904797758925e-09, "loss": 0.0787, "step": 9988 }, { "epoch": 4.644351464435147, "grad_norm": 0.9923886656761169, "learning_rate": 3.084187958485323e-09, "loss": 0.0594, "step": 9990 }, { "epoch": 4.645281264528126, "grad_norm": 1.83738374710083, "learning_rate": 1.9738949045972167e-09, "loss": 0.0685, "step": 9992 }, { "epoch": 4.646211064621107, "grad_norm": 1.0400084257125854, "learning_rate": 1.1103222762543e-09, "loss": 0.063, "step": 9994 }, { "epoch": 4.647140864714086, "grad_norm": 1.3662570714950562, "learning_rate": 4.934785965721192e-10, "loss": 0.0896, "step": 9996 }, { "epoch": 4.648070664807067, "grad_norm": 1.6833701133728027, "learning_rate": 1.2336995354467256e-10, "loss": 0.0714, "step": 9998 }, { "epoch": 4.6490004649000465, "grad_norm": 1.0532835721969604, "learning_rate": 0.0, "loss": 0.0814, "step": 10000 }, { "epoch": 4.6490004649000465, "eval_cer": 0.15753823084579688, "eval_loss": 0.226575568318367, "eval_runtime": 393.1643, "eval_samples_per_second": 32.287, "eval_steps_per_second": 1.01, "step": 10000 }, { "epoch": 4.649930264993026, "grad_norm": 1.4832113981246948, "learning_rate": 1.2336995354467197e-10, "loss": 0.0742, "step": 10002 }, { "epoch": 4.650860065086007, "grad_norm": 1.2918769121170044, "learning_rate": 4.934785965721167e-10, "loss": 0.0681, "step": 10004 }, { "epoch": 4.651789865178986, "grad_norm": 1.2782461643218994, "learning_rate": 1.1103222762542941e-09, "loss": 0.0519, "step": 10006 }, { "epoch": 4.652719665271967, "grad_norm": 0.9362637400627136, "learning_rate": 1.973894904597207e-09, "loss": 0.0749, "step": 10008 }, { "epoch": 4.653649465364946, "grad_norm": 2.0953824520111084, "learning_rate": 3.0841879584853073e-09, "loss": 0.0959, "step": 10010 }, { "epoch": 4.654579265457927, "grad_norm": 1.0107920169830322, "learning_rate": 4.441190479775869e-09, "loss": 0.0899, "step": 10012 }, { "epoch": 4.655509065550906, "grad_norm": 1.5154882669448853, "learning_rate": 6.0448890753989065e-09, "loss": 0.0754, "step": 10014 }, { "epoch": 4.656438865643887, "grad_norm": 2.1608870029449463, "learning_rate": 7.895267917501502e-09, "loss": 0.0919, "step": 10016 }, { "epoch": 4.6573686657368665, "grad_norm": 1.1562899351119995, "learning_rate": 9.992308743586585e-09, "loss": 0.0805, "step": 10018 }, { "epoch": 4.658298465829847, "grad_norm": 1.6055142879486084, "learning_rate": 1.2335990856709996e-08, "loss": 0.0947, "step": 10020 }, { "epoch": 4.659228265922827, "grad_norm": 1.711699366569519, "learning_rate": 1.4926291125677558e-08, "loss": 0.1032, "step": 10022 }, { "epoch": 4.660158066015807, "grad_norm": 1.2476134300231934, "learning_rate": 1.776318398526988e-08, "loss": 0.0755, "step": 10024 }, { "epoch": 4.661087866108787, "grad_norm": 1.112786889076233, "learning_rate": 2.0846641436494943e-08, "loss": 0.0738, "step": 10026 }, { "epoch": 4.662017666201766, "grad_norm": 1.790185809135437, "learning_rate": 2.4176633046879555e-08, "loss": 0.1562, "step": 10028 }, { "epoch": 4.662947466294747, "grad_norm": 1.388174057006836, "learning_rate": 2.7753125950749634e-08, "loss": 0.094, "step": 10030 }, { "epoch": 4.663877266387726, "grad_norm": 1.5390956401824951, "learning_rate": 3.157608484956331e-08, "loss": 0.093, "step": 10032 }, { "epoch": 4.664807066480707, "grad_norm": 0.7963013648986816, "learning_rate": 3.5645472012255095e-08, "loss": 0.0605, "step": 10034 }, { "epoch": 4.6657368665736865, "grad_norm": 1.3623672723770142, "learning_rate": 3.996124727562445e-08, "loss": 0.0813, "step": 10036 }, { "epoch": 4.666666666666667, "grad_norm": 1.9209089279174805, "learning_rate": 4.4523368044704916e-08, "loss": 0.1161, "step": 10038 }, { "epoch": 4.667596466759647, "grad_norm": 1.962255835533142, "learning_rate": 4.9331789293208254e-08, "loss": 0.1203, "step": 10040 }, { "epoch": 4.668526266852627, "grad_norm": 1.3392102718353271, "learning_rate": 5.438646356396294e-08, "loss": 0.0791, "step": 10042 }, { "epoch": 4.669456066945607, "grad_norm": 1.156941294670105, "learning_rate": 5.968734096936936e-08, "loss": 0.0852, "step": 10044 }, { "epoch": 4.670385867038586, "grad_norm": 1.051546573638916, "learning_rate": 6.523436919191054e-08, "loss": 0.0786, "step": 10046 }, { "epoch": 4.671315667131567, "grad_norm": 1.236020803451538, "learning_rate": 7.10274934846489e-08, "loss": 0.0893, "step": 10048 }, { "epoch": 4.672245467224546, "grad_norm": 1.6180431842803955, "learning_rate": 7.706665667179814e-08, "loss": 0.071, "step": 10050 }, { "epoch": 4.673175267317527, "grad_norm": 1.6200193166732788, "learning_rate": 8.33517991492533e-08, "loss": 0.1034, "step": 10052 }, { "epoch": 4.6741050674105065, "grad_norm": 1.1058170795440674, "learning_rate": 8.988285888518745e-08, "loss": 0.0862, "step": 10054 }, { "epoch": 4.675034867503487, "grad_norm": 2.0400681495666504, "learning_rate": 9.665977142068739e-08, "loss": 0.155, "step": 10056 }, { "epoch": 4.675964667596467, "grad_norm": 1.8675481081008911, "learning_rate": 1.0368246987035871e-07, "loss": 0.0776, "step": 10058 }, { "epoch": 4.676894467689447, "grad_norm": 0.9386976957321167, "learning_rate": 1.1095088492299736e-07, "loss": 0.0496, "step": 10060 }, { "epoch": 4.677824267782427, "grad_norm": 1.5633740425109863, "learning_rate": 1.1846494484228925e-07, "loss": 0.0924, "step": 10062 }, { "epoch": 4.678754067875407, "grad_norm": 0.855400800704956, "learning_rate": 1.2622457546749572e-07, "loss": 0.0505, "step": 10064 }, { "epoch": 4.679683867968387, "grad_norm": 2.3111658096313477, "learning_rate": 1.3422970021419184e-07, "loss": 0.0883, "step": 10066 }, { "epoch": 4.680613668061367, "grad_norm": 1.290824294090271, "learning_rate": 1.42480240075027e-07, "loss": 0.0846, "step": 10068 }, { "epoch": 4.681543468154347, "grad_norm": 1.7041199207305908, "learning_rate": 1.5097611362050187e-07, "loss": 0.1222, "step": 10070 }, { "epoch": 4.6824732682473265, "grad_norm": 1.1775106191635132, "learning_rate": 1.5971723699978746e-07, "loss": 0.0581, "step": 10072 }, { "epoch": 4.683403068340307, "grad_norm": 1.3921239376068115, "learning_rate": 1.6870352394151314e-07, "loss": 0.0787, "step": 10074 }, { "epoch": 4.684332868433287, "grad_norm": 1.9331352710723877, "learning_rate": 1.7793488575465492e-07, "loss": 0.1095, "step": 10076 }, { "epoch": 4.685262668526267, "grad_norm": 1.78963303565979, "learning_rate": 1.8741123132940976e-07, "loss": 0.1112, "step": 10078 }, { "epoch": 4.686192468619247, "grad_norm": 1.3081421852111816, "learning_rate": 1.97132467138056e-07, "loss": 0.0912, "step": 10080 }, { "epoch": 4.687122268712227, "grad_norm": 0.982524573802948, "learning_rate": 2.0709849723592477e-07, "loss": 0.0884, "step": 10082 }, { "epoch": 4.688052068805207, "grad_norm": 1.0345120429992676, "learning_rate": 2.173092232623298e-07, "loss": 0.0843, "step": 10084 }, { "epoch": 4.688981868898187, "grad_norm": 1.1876717805862427, "learning_rate": 2.2776454444153336e-07, "loss": 0.0689, "step": 10086 }, { "epoch": 4.689911668991167, "grad_norm": 1.7273870706558228, "learning_rate": 2.384643575837176e-07, "loss": 0.0806, "step": 10088 }, { "epoch": 4.6908414690841465, "grad_norm": 1.678489327430725, "learning_rate": 2.494085570860589e-07, "loss": 0.0762, "step": 10090 }, { "epoch": 4.691771269177127, "grad_norm": 1.7696075439453125, "learning_rate": 2.60597034933724e-07, "loss": 0.0699, "step": 10092 }, { "epoch": 4.6927010692701066, "grad_norm": 1.3133859634399414, "learning_rate": 2.7202968070095273e-07, "loss": 0.0687, "step": 10094 }, { "epoch": 4.693630869363087, "grad_norm": 1.2971642017364502, "learning_rate": 2.8370638155214583e-07, "loss": 0.0845, "step": 10096 }, { "epoch": 4.694560669456067, "grad_norm": 1.3530523777008057, "learning_rate": 2.956270222429837e-07, "loss": 0.0761, "step": 10098 }, { "epoch": 4.695490469549047, "grad_norm": 1.2110105752944946, "learning_rate": 3.077914851215586e-07, "loss": 0.0701, "step": 10100 }, { "epoch": 4.696420269642027, "grad_norm": 1.1819078922271729, "learning_rate": 3.201996501295214e-07, "loss": 0.1013, "step": 10102 }, { "epoch": 4.697350069735007, "grad_norm": 1.5074821710586548, "learning_rate": 3.3285139480329094e-07, "loss": 0.1065, "step": 10104 }, { "epoch": 4.698279869827987, "grad_norm": 1.0121428966522217, "learning_rate": 3.457465942752788e-07, "loss": 0.1023, "step": 10106 }, { "epoch": 4.699209669920967, "grad_norm": 1.0896471738815308, "learning_rate": 3.5888512127504636e-07, "loss": 0.0903, "step": 10108 }, { "epoch": 4.700139470013947, "grad_norm": 1.47075355052948, "learning_rate": 3.722668461306481e-07, "loss": 0.1213, "step": 10110 }, { "epoch": 4.701069270106927, "grad_norm": 1.719409465789795, "learning_rate": 3.8589163676986976e-07, "loss": 0.0849, "step": 10112 }, { "epoch": 4.701999070199907, "grad_norm": 1.3082845211029053, "learning_rate": 3.9975935872150227e-07, "loss": 0.0807, "step": 10114 }, { "epoch": 4.702928870292887, "grad_norm": 1.4725843667984009, "learning_rate": 4.1386987511675164e-07, "loss": 0.1112, "step": 10116 }, { "epoch": 4.703858670385867, "grad_norm": 1.0917723178863525, "learning_rate": 4.2822304669051263e-07, "loss": 0.0762, "step": 10118 }, { "epoch": 4.704788470478847, "grad_norm": 1.0763918161392212, "learning_rate": 4.4281873178278496e-07, "loss": 0.0651, "step": 10120 }, { "epoch": 4.705718270571827, "grad_norm": 0.6449530124664307, "learning_rate": 4.576567863400273e-07, "loss": 0.0671, "step": 10122 }, { "epoch": 4.706648070664807, "grad_norm": 1.070594072341919, "learning_rate": 4.7273706391664526e-07, "loss": 0.0819, "step": 10124 }, { "epoch": 4.707577870757787, "grad_norm": 1.2808902263641357, "learning_rate": 4.88059415676387e-07, "loss": 0.0701, "step": 10126 }, { "epoch": 4.708507670850767, "grad_norm": 1.02010977268219, "learning_rate": 5.036236903938261e-07, "loss": 0.068, "step": 10128 }, { "epoch": 4.709437470943747, "grad_norm": 0.962847113609314, "learning_rate": 5.194297344558511e-07, "loss": 0.0648, "step": 10130 }, { "epoch": 4.710367271036727, "grad_norm": 0.6844775676727295, "learning_rate": 5.354773918631932e-07, "loss": 0.0531, "step": 10132 }, { "epoch": 4.711297071129707, "grad_norm": 0.9298563599586487, "learning_rate": 5.517665042319574e-07, "loss": 0.0519, "step": 10134 }, { "epoch": 4.712226871222687, "grad_norm": 1.6843488216400146, "learning_rate": 5.682969107951552e-07, "loss": 0.0904, "step": 10136 }, { "epoch": 4.713156671315668, "grad_norm": 1.0882751941680908, "learning_rate": 5.850684484043778e-07, "loss": 0.0618, "step": 10138 }, { "epoch": 4.714086471408647, "grad_norm": 0.8333344459533691, "learning_rate": 6.020809515313147e-07, "loss": 0.0991, "step": 10140 }, { "epoch": 4.715016271501627, "grad_norm": 1.0231436491012573, "learning_rate": 6.193342522694133e-07, "loss": 0.0747, "step": 10142 }, { "epoch": 4.715946071594607, "grad_norm": 1.8301037549972534, "learning_rate": 6.368281803355695e-07, "loss": 0.1134, "step": 10144 }, { "epoch": 4.716875871687587, "grad_norm": 1.233171820640564, "learning_rate": 6.545625630717841e-07, "loss": 0.0635, "step": 10146 }, { "epoch": 4.717805671780567, "grad_norm": 1.3537906408309937, "learning_rate": 6.725372254468319e-07, "loss": 0.0771, "step": 10148 }, { "epoch": 4.718735471873547, "grad_norm": 1.5481637716293335, "learning_rate": 6.907519900580781e-07, "loss": 0.0885, "step": 10150 }, { "epoch": 4.7196652719665275, "grad_norm": 1.4911386966705322, "learning_rate": 7.092066771331454e-07, "loss": 0.1224, "step": 10152 }, { "epoch": 4.720595072059507, "grad_norm": 1.0691442489624023, "learning_rate": 7.279011045317282e-07, "loss": 0.0615, "step": 10154 }, { "epoch": 4.721524872152488, "grad_norm": 1.3612043857574463, "learning_rate": 7.468350877473555e-07, "loss": 0.1032, "step": 10156 }, { "epoch": 4.722454672245467, "grad_norm": 1.114565372467041, "learning_rate": 7.660084399092581e-07, "loss": 0.0734, "step": 10158 }, { "epoch": 4.723384472338447, "grad_norm": 1.3573925495147705, "learning_rate": 7.854209717842209e-07, "loss": 0.0858, "step": 10160 }, { "epoch": 4.724314272431427, "grad_norm": 1.7935236692428589, "learning_rate": 8.050724917783613e-07, "loss": 0.0667, "step": 10162 }, { "epoch": 4.725244072524407, "grad_norm": 2.3662168979644775, "learning_rate": 8.249628059391202e-07, "loss": 0.0746, "step": 10164 }, { "epoch": 4.726173872617387, "grad_norm": 1.3964946269989014, "learning_rate": 8.450917179571283e-07, "loss": 0.0868, "step": 10166 }, { "epoch": 4.727103672710367, "grad_norm": 1.1902151107788086, "learning_rate": 8.654590291681591e-07, "loss": 0.09, "step": 10168 }, { "epoch": 4.7280334728033475, "grad_norm": 1.055928349494934, "learning_rate": 8.860645385550403e-07, "loss": 0.062, "step": 10170 }, { "epoch": 4.728963272896327, "grad_norm": 1.595266580581665, "learning_rate": 9.069080427497493e-07, "loss": 0.0807, "step": 10172 }, { "epoch": 4.729893072989308, "grad_norm": 1.3677793741226196, "learning_rate": 9.279893360353124e-07, "loss": 0.1119, "step": 10174 }, { "epoch": 4.730822873082287, "grad_norm": 1.2172508239746094, "learning_rate": 9.493082103478521e-07, "loss": 0.0746, "step": 10176 }, { "epoch": 4.731752673175267, "grad_norm": 1.2761157751083374, "learning_rate": 9.708644552787003e-07, "loss": 0.0699, "step": 10178 }, { "epoch": 4.732682473268247, "grad_norm": 1.2101826667785645, "learning_rate": 9.926578580764073e-07, "loss": 0.0632, "step": 10180 }, { "epoch": 4.733612273361228, "grad_norm": 1.4786204099655151, "learning_rate": 1.0146882036489258e-06, "loss": 0.0895, "step": 10182 }, { "epoch": 4.734542073454207, "grad_norm": 1.5057793855667114, "learning_rate": 1.0369552745655962e-06, "loss": 0.0845, "step": 10184 }, { "epoch": 4.735471873547187, "grad_norm": 1.2484323978424072, "learning_rate": 1.0594588510594504e-06, "loss": 0.0577, "step": 10186 }, { "epoch": 4.7364016736401675, "grad_norm": 1.2883540391921997, "learning_rate": 1.0821987110292284e-06, "loss": 0.0834, "step": 10188 }, { "epoch": 4.737331473733147, "grad_norm": 1.1708238124847412, "learning_rate": 1.1051746300417502e-06, "loss": 0.1049, "step": 10190 }, { "epoch": 4.738261273826128, "grad_norm": 2.1424951553344727, "learning_rate": 1.1283863813339155e-06, "loss": 0.0844, "step": 10192 }, { "epoch": 4.739191073919107, "grad_norm": 1.6615924835205078, "learning_rate": 1.1518337358151612e-06, "loss": 0.0983, "step": 10194 }, { "epoch": 4.740120874012088, "grad_norm": 1.280787467956543, "learning_rate": 1.175516462069515e-06, "loss": 0.0724, "step": 10196 }, { "epoch": 4.741050674105067, "grad_norm": 1.840606927871704, "learning_rate": 1.1994343263580818e-06, "loss": 0.0666, "step": 10198 }, { "epoch": 4.741980474198048, "grad_norm": 1.0359681844711304, "learning_rate": 1.2235870926211701e-06, "loss": 0.0767, "step": 10200 }, { "epoch": 4.742910274291027, "grad_norm": 1.1216896772384644, "learning_rate": 1.2479745224806993e-06, "loss": 0.0678, "step": 10202 }, { "epoch": 4.743840074384007, "grad_norm": 1.1779941320419312, "learning_rate": 1.2725963752426127e-06, "loss": 0.0774, "step": 10204 }, { "epoch": 4.7447698744769875, "grad_norm": 0.9597591757774353, "learning_rate": 1.297452407899191e-06, "loss": 0.0732, "step": 10206 }, { "epoch": 4.745699674569967, "grad_norm": 1.368155598640442, "learning_rate": 1.3225423751313967e-06, "loss": 0.0909, "step": 10208 }, { "epoch": 4.746629474662948, "grad_norm": 1.3259602785110474, "learning_rate": 1.3478660293113538e-06, "loss": 0.0913, "step": 10210 }, { "epoch": 4.747559274755927, "grad_norm": 1.3567256927490234, "learning_rate": 1.3734231205048798e-06, "loss": 0.0997, "step": 10212 }, { "epoch": 4.748489074848908, "grad_norm": 1.3158137798309326, "learning_rate": 1.3992133964737392e-06, "loss": 0.0723, "step": 10214 }, { "epoch": 4.749418874941887, "grad_norm": 1.1166596412658691, "learning_rate": 1.4252366026783844e-06, "loss": 0.0623, "step": 10216 }, { "epoch": 4.750348675034868, "grad_norm": 1.4125547409057617, "learning_rate": 1.4514924822802144e-06, "loss": 0.0676, "step": 10218 }, { "epoch": 4.751278475127847, "grad_norm": 0.8963711857795715, "learning_rate": 1.4779807761443552e-06, "loss": 0.0696, "step": 10220 }, { "epoch": 4.752208275220828, "grad_norm": 1.3324004411697388, "learning_rate": 1.5047012228420142e-06, "loss": 0.0839, "step": 10222 }, { "epoch": 4.7531380753138075, "grad_norm": 1.3223248720169067, "learning_rate": 1.5316535586531371e-06, "loss": 0.0919, "step": 10224 }, { "epoch": 4.754067875406788, "grad_norm": 1.5069656372070312, "learning_rate": 1.5588375175691113e-06, "loss": 0.0738, "step": 10226 }, { "epoch": 4.754997675499768, "grad_norm": 0.9712395668029785, "learning_rate": 1.586252831295176e-06, "loss": 0.0931, "step": 10228 }, { "epoch": 4.755927475592747, "grad_norm": 1.332391619682312, "learning_rate": 1.6138992292533153e-06, "loss": 0.1071, "step": 10230 }, { "epoch": 4.756857275685728, "grad_norm": 1.3061268329620361, "learning_rate": 1.6417764385847132e-06, "loss": 0.0442, "step": 10232 }, { "epoch": 4.757787075778707, "grad_norm": 1.133480429649353, "learning_rate": 1.669884184152531e-06, "loss": 0.0641, "step": 10234 }, { "epoch": 4.758716875871688, "grad_norm": 1.47034752368927, "learning_rate": 1.698222188544704e-06, "loss": 0.0857, "step": 10236 }, { "epoch": 4.759646675964667, "grad_norm": 1.3951983451843262, "learning_rate": 1.7267901720766004e-06, "loss": 0.075, "step": 10238 }, { "epoch": 4.760576476057648, "grad_norm": 1.636591911315918, "learning_rate": 1.7555878527937215e-06, "loss": 0.0821, "step": 10240 }, { "epoch": 4.7615062761506275, "grad_norm": 1.8825777769088745, "learning_rate": 1.784614946474555e-06, "loss": 0.0865, "step": 10242 }, { "epoch": 4.762436076243608, "grad_norm": 1.5661083459854126, "learning_rate": 1.8138711666334709e-06, "loss": 0.0578, "step": 10244 }, { "epoch": 4.763365876336588, "grad_norm": 1.6867209672927856, "learning_rate": 1.843356224523318e-06, "loss": 0.0893, "step": 10246 }, { "epoch": 4.764295676429567, "grad_norm": 1.598695158958435, "learning_rate": 1.8730698291385488e-06, "loss": 0.0958, "step": 10248 }, { "epoch": 4.765225476522548, "grad_norm": 1.3466211557388306, "learning_rate": 1.9030116872178122e-06, "loss": 0.0716, "step": 10250 }, { "epoch": 4.766155276615527, "grad_norm": 1.716845989227295, "learning_rate": 1.9331815032471247e-06, "loss": 0.0872, "step": 10252 }, { "epoch": 4.767085076708508, "grad_norm": 1.100034236907959, "learning_rate": 1.9635789794625492e-06, "loss": 0.0693, "step": 10254 }, { "epoch": 4.768014876801487, "grad_norm": 1.495779275894165, "learning_rate": 1.9942038158532352e-06, "loss": 0.0833, "step": 10256 }, { "epoch": 4.768944676894468, "grad_norm": 0.984935998916626, "learning_rate": 2.025055710164439e-06, "loss": 0.0523, "step": 10258 }, { "epoch": 4.7698744769874475, "grad_norm": 2.1143808364868164, "learning_rate": 2.0561343579004605e-06, "loss": 0.0744, "step": 10260 }, { "epoch": 4.770804277080428, "grad_norm": 1.715173363685608, "learning_rate": 2.0874394523275554e-06, "loss": 0.0918, "step": 10262 }, { "epoch": 4.771734077173408, "grad_norm": 1.5820279121398926, "learning_rate": 2.1189706844770455e-06, "loss": 0.1062, "step": 10264 }, { "epoch": 4.772663877266388, "grad_norm": 1.3895467519760132, "learning_rate": 2.150727743148471e-06, "loss": 0.0991, "step": 10266 }, { "epoch": 4.773593677359368, "grad_norm": 1.5767277479171753, "learning_rate": 2.1827103149124097e-06, "loss": 0.0783, "step": 10268 }, { "epoch": 4.774523477452348, "grad_norm": 1.5592186450958252, "learning_rate": 2.2149180841138655e-06, "loss": 0.1015, "step": 10270 }, { "epoch": 4.775453277545328, "grad_norm": 1.4612256288528442, "learning_rate": 2.2473507328750844e-06, "loss": 0.0839, "step": 10272 }, { "epoch": 4.776383077638307, "grad_norm": 1.9756066799163818, "learning_rate": 2.280007941098992e-06, "loss": 0.102, "step": 10274 }, { "epoch": 4.777312877731288, "grad_norm": 1.4238847494125366, "learning_rate": 2.3128893864720868e-06, "loss": 0.0593, "step": 10276 }, { "epoch": 4.7782426778242675, "grad_norm": 0.8743839859962463, "learning_rate": 2.3459947444677422e-06, "loss": 0.0548, "step": 10278 }, { "epoch": 4.779172477917248, "grad_norm": 1.665838360786438, "learning_rate": 2.3793236883495173e-06, "loss": 0.0785, "step": 10280 }, { "epoch": 4.780102278010228, "grad_norm": 1.1714600324630737, "learning_rate": 2.412875889174114e-06, "loss": 0.0601, "step": 10282 }, { "epoch": 4.781032078103208, "grad_norm": 1.504372000694275, "learning_rate": 2.4466510157949303e-06, "loss": 0.0866, "step": 10284 }, { "epoch": 4.781961878196188, "grad_norm": 1.5648397207260132, "learning_rate": 2.4806487348650273e-06, "loss": 0.0844, "step": 10286 }, { "epoch": 4.782891678289168, "grad_norm": 1.5103235244750977, "learning_rate": 2.5148687108407213e-06, "loss": 0.0706, "step": 10288 }, { "epoch": 4.783821478382148, "grad_norm": 1.2555932998657227, "learning_rate": 2.5493106059845845e-06, "loss": 0.0764, "step": 10290 }, { "epoch": 4.784751278475127, "grad_norm": 1.713771104812622, "learning_rate": 2.583974080369095e-06, "loss": 0.1009, "step": 10292 }, { "epoch": 4.785681078568108, "grad_norm": 2.5289084911346436, "learning_rate": 2.6188587918797197e-06, "loss": 0.1245, "step": 10294 }, { "epoch": 4.786610878661088, "grad_norm": 1.5585154294967651, "learning_rate": 2.653964396218392e-06, "loss": 0.0639, "step": 10296 }, { "epoch": 4.787540678754068, "grad_norm": 2.3487167358398438, "learning_rate": 2.6892905469070584e-06, "loss": 0.1134, "step": 10298 }, { "epoch": 4.788470478847048, "grad_norm": 1.4393227100372314, "learning_rate": 2.724836895290786e-06, "loss": 0.095, "step": 10300 }, { "epoch": 4.789400278940028, "grad_norm": 1.5341770648956299, "learning_rate": 2.760603090541552e-06, "loss": 0.0954, "step": 10302 }, { "epoch": 4.790330079033008, "grad_norm": 1.9289863109588623, "learning_rate": 2.7965887796613655e-06, "loss": 0.11, "step": 10304 }, { "epoch": 4.791259879125988, "grad_norm": 1.3867520093917847, "learning_rate": 2.832793607486084e-06, "loss": 0.075, "step": 10306 }, { "epoch": 4.792189679218968, "grad_norm": 1.7377142906188965, "learning_rate": 2.869217216688636e-06, "loss": 0.0665, "step": 10308 }, { "epoch": 4.793119479311948, "grad_norm": 1.7269774675369263, "learning_rate": 2.9058592477826556e-06, "loss": 0.1175, "step": 10310 }, { "epoch": 4.794049279404928, "grad_norm": 1.4077142477035522, "learning_rate": 2.942719339126138e-06, "loss": 0.0767, "step": 10312 }, { "epoch": 4.794979079497908, "grad_norm": 1.4966390132904053, "learning_rate": 2.979797126924897e-06, "loss": 0.0624, "step": 10314 }, { "epoch": 4.795908879590888, "grad_norm": 1.0637574195861816, "learning_rate": 3.0170922452361003e-06, "loss": 0.0789, "step": 10316 }, { "epoch": 4.796838679683868, "grad_norm": 1.5518721342086792, "learning_rate": 3.0546043259719418e-06, "loss": 0.0789, "step": 10318 }, { "epoch": 4.797768479776848, "grad_norm": 1.398267388343811, "learning_rate": 3.092332998903411e-06, "loss": 0.0646, "step": 10320 }, { "epoch": 4.798698279869828, "grad_norm": 1.156738519668579, "learning_rate": 3.130277891663703e-06, "loss": 0.0652, "step": 10322 }, { "epoch": 4.799628079962808, "grad_norm": 1.2885563373565674, "learning_rate": 3.1684386297519944e-06, "loss": 0.1025, "step": 10324 }, { "epoch": 4.800557880055788, "grad_norm": 1.6372218132019043, "learning_rate": 3.2068148365372514e-06, "loss": 0.1181, "step": 10326 }, { "epoch": 4.801487680148768, "grad_norm": 1.3146613836288452, "learning_rate": 3.245406133261851e-06, "loss": 0.0806, "step": 10328 }, { "epoch": 4.802417480241748, "grad_norm": 1.3144171237945557, "learning_rate": 3.28421213904523e-06, "loss": 0.0881, "step": 10330 }, { "epoch": 4.803347280334728, "grad_norm": 1.122820496559143, "learning_rate": 3.3232324708877376e-06, "loss": 0.0792, "step": 10332 }, { "epoch": 4.804277080427708, "grad_norm": 1.1748305559158325, "learning_rate": 3.3624667436744887e-06, "loss": 0.0591, "step": 10334 }, { "epoch": 4.805206880520688, "grad_norm": 1.2349666357040405, "learning_rate": 3.4019145701791014e-06, "loss": 0.0417, "step": 10336 }, { "epoch": 4.806136680613668, "grad_norm": 1.0421677827835083, "learning_rate": 3.441575561067406e-06, "loss": 0.0995, "step": 10338 }, { "epoch": 4.8070664807066485, "grad_norm": 1.9544591903686523, "learning_rate": 3.4814493249013866e-06, "loss": 0.0951, "step": 10340 }, { "epoch": 4.807996280799628, "grad_norm": 1.4532266855239868, "learning_rate": 3.5215354681431884e-06, "loss": 0.0922, "step": 10342 }, { "epoch": 4.808926080892608, "grad_norm": 1.3679553270339966, "learning_rate": 3.561833595158711e-06, "loss": 0.0497, "step": 10344 }, { "epoch": 4.809855880985588, "grad_norm": 1.2070510387420654, "learning_rate": 3.6023433082216696e-06, "loss": 0.0587, "step": 10346 }, { "epoch": 4.810785681078568, "grad_norm": 2.4262187480926514, "learning_rate": 3.6430642075175873e-06, "loss": 0.1377, "step": 10348 }, { "epoch": 4.811715481171548, "grad_norm": 1.4000191688537598, "learning_rate": 3.6839958911476834e-06, "loss": 0.1002, "step": 10350 }, { "epoch": 4.812645281264528, "grad_norm": 1.2299065589904785, "learning_rate": 3.7251379551327113e-06, "loss": 0.0708, "step": 10352 }, { "epoch": 4.8135750813575084, "grad_norm": 2.115764617919922, "learning_rate": 3.7664899934170683e-06, "loss": 0.1345, "step": 10354 }, { "epoch": 4.814504881450488, "grad_norm": 1.916088342666626, "learning_rate": 3.808051597872924e-06, "loss": 0.1152, "step": 10356 }, { "epoch": 4.8154346815434685, "grad_norm": 1.1731781959533691, "learning_rate": 3.849822358303922e-06, "loss": 0.0887, "step": 10358 }, { "epoch": 4.816364481636448, "grad_norm": 1.6071748733520508, "learning_rate": 3.891801862449623e-06, "loss": 0.0694, "step": 10360 }, { "epoch": 4.817294281729428, "grad_norm": 1.169023871421814, "learning_rate": 3.9339896959891654e-06, "loss": 0.0859, "step": 10362 }, { "epoch": 4.818224081822408, "grad_norm": 1.2230957746505737, "learning_rate": 3.976385442545766e-06, "loss": 0.0796, "step": 10364 }, { "epoch": 4.819153881915388, "grad_norm": 1.6057041883468628, "learning_rate": 4.018988683690473e-06, "loss": 0.0654, "step": 10366 }, { "epoch": 4.820083682008368, "grad_norm": 1.0979729890823364, "learning_rate": 4.061798998946446e-06, "loss": 0.0489, "step": 10368 }, { "epoch": 4.821013482101348, "grad_norm": 1.4533686637878418, "learning_rate": 4.104815965793255e-06, "loss": 0.081, "step": 10370 }, { "epoch": 4.821943282194328, "grad_norm": 1.1927108764648438, "learning_rate": 4.148039159670704e-06, "loss": 0.0777, "step": 10372 }, { "epoch": 4.822873082287308, "grad_norm": 1.0800485610961914, "learning_rate": 4.19146815398342e-06, "loss": 0.0413, "step": 10374 }, { "epoch": 4.8238028823802885, "grad_norm": 1.6261851787567139, "learning_rate": 4.235102520104712e-06, "loss": 0.1175, "step": 10376 }, { "epoch": 4.824732682473268, "grad_norm": 0.7650971412658691, "learning_rate": 4.278941827380951e-06, "loss": 0.0531, "step": 10378 }, { "epoch": 4.825662482566249, "grad_norm": 0.948656439781189, "learning_rate": 4.322985643135929e-06, "loss": 0.0752, "step": 10380 }, { "epoch": 4.826592282659228, "grad_norm": 1.032982349395752, "learning_rate": 4.367233532675007e-06, "loss": 0.0713, "step": 10382 }, { "epoch": 4.827522082752209, "grad_norm": 1.2745039463043213, "learning_rate": 4.411685059289333e-06, "loss": 0.0835, "step": 10384 }, { "epoch": 4.828451882845188, "grad_norm": 0.4688706696033478, "learning_rate": 4.456339784260236e-06, "loss": 0.054, "step": 10386 }, { "epoch": 4.829381682938168, "grad_norm": 1.0281339883804321, "learning_rate": 4.501197266863705e-06, "loss": 0.0578, "step": 10388 }, { "epoch": 4.830311483031148, "grad_norm": 1.2990009784698486, "learning_rate": 4.5462570643744e-06, "loss": 0.0879, "step": 10390 }, { "epoch": 4.831241283124128, "grad_norm": 1.5872259140014648, "learning_rate": 4.591518732070411e-06, "loss": 0.1051, "step": 10392 }, { "epoch": 4.8321710832171085, "grad_norm": 1.5075445175170898, "learning_rate": 4.636981823237245e-06, "loss": 0.086, "step": 10394 }, { "epoch": 4.833100883310088, "grad_norm": 1.5093629360198975, "learning_rate": 4.682645889172656e-06, "loss": 0.0706, "step": 10396 }, { "epoch": 4.834030683403069, "grad_norm": 1.6567991971969604, "learning_rate": 4.728510479190692e-06, "loss": 0.079, "step": 10398 }, { "epoch": 4.834960483496048, "grad_norm": 1.4129399061203003, "learning_rate": 4.774575140626314e-06, "loss": 0.0887, "step": 10400 }, { "epoch": 4.835890283589029, "grad_norm": 0.9730110764503479, "learning_rate": 4.820839418839962e-06, "loss": 0.0718, "step": 10402 }, { "epoch": 4.836820083682008, "grad_norm": 1.6360864639282227, "learning_rate": 4.867302857221945e-06, "loss": 0.1071, "step": 10404 }, { "epoch": 4.837749883774988, "grad_norm": 1.446438193321228, "learning_rate": 4.9139649971968274e-06, "loss": 0.1103, "step": 10406 }, { "epoch": 4.838679683867968, "grad_norm": 2.1516292095184326, "learning_rate": 4.960825378228074e-06, "loss": 0.0769, "step": 10408 }, { "epoch": 4.839609483960948, "grad_norm": 0.7172218561172485, "learning_rate": 5.007883537822748e-06, "loss": 0.0561, "step": 10410 }, { "epoch": 4.8405392840539285, "grad_norm": 1.0078840255737305, "learning_rate": 5.055139011535708e-06, "loss": 0.0566, "step": 10412 }, { "epoch": 4.841469084146908, "grad_norm": 1.6113215684890747, "learning_rate": 5.102591332974613e-06, "loss": 0.0859, "step": 10414 }, { "epoch": 4.842398884239889, "grad_norm": 1.341173768043518, "learning_rate": 5.150240033804093e-06, "loss": 0.0677, "step": 10416 }, { "epoch": 4.843328684332868, "grad_norm": 1.3848614692687988, "learning_rate": 5.198084643750825e-06, "loss": 0.081, "step": 10418 }, { "epoch": 4.844258484425849, "grad_norm": 2.2091643810272217, "learning_rate": 5.246124690607767e-06, "loss": 0.128, "step": 10420 }, { "epoch": 4.845188284518828, "grad_norm": 1.2456965446472168, "learning_rate": 5.294359700238995e-06, "loss": 0.0632, "step": 10422 }, { "epoch": 4.846118084611809, "grad_norm": 0.936846911907196, "learning_rate": 5.342789196584546e-06, "loss": 0.0758, "step": 10424 }, { "epoch": 4.847047884704788, "grad_norm": 1.578140377998352, "learning_rate": 5.391412701664738e-06, "loss": 0.0681, "step": 10426 }, { "epoch": 4.847977684797769, "grad_norm": 1.9946905374526978, "learning_rate": 5.440229735585311e-06, "loss": 0.0951, "step": 10428 }, { "epoch": 4.8489074848907485, "grad_norm": 1.859833836555481, "learning_rate": 5.489239816541738e-06, "loss": 0.0966, "step": 10430 }, { "epoch": 4.849837284983728, "grad_norm": 1.8291791677474976, "learning_rate": 5.538442460824422e-06, "loss": 0.0717, "step": 10432 }, { "epoch": 4.850767085076709, "grad_norm": 1.427720308303833, "learning_rate": 5.587837182823069e-06, "loss": 0.1145, "step": 10434 }, { "epoch": 4.851696885169688, "grad_norm": 1.1481969356536865, "learning_rate": 5.637423495031658e-06, "loss": 0.0581, "step": 10436 }, { "epoch": 4.852626685262669, "grad_norm": 1.252558946609497, "learning_rate": 5.687200908053416e-06, "loss": 0.0779, "step": 10438 }, { "epoch": 4.853556485355648, "grad_norm": 1.709113359451294, "learning_rate": 5.737168930605264e-06, "loss": 0.1303, "step": 10440 }, { "epoch": 4.854486285448629, "grad_norm": 2.2490293979644775, "learning_rate": 5.78732706952305e-06, "loss": 0.1157, "step": 10442 }, { "epoch": 4.855416085541608, "grad_norm": 1.0818889141082764, "learning_rate": 5.837674829766244e-06, "loss": 0.0466, "step": 10444 }, { "epoch": 4.856345885634589, "grad_norm": 1.7636911869049072, "learning_rate": 5.888211714422727e-06, "loss": 0.0838, "step": 10446 }, { "epoch": 4.8572756857275685, "grad_norm": 1.8468148708343506, "learning_rate": 5.938937224713846e-06, "loss": 0.1087, "step": 10448 }, { "epoch": 4.858205485820548, "grad_norm": 1.447888731956482, "learning_rate": 5.989850859999237e-06, "loss": 0.0603, "step": 10450 }, { "epoch": 4.859135285913529, "grad_norm": 1.5595952272415161, "learning_rate": 6.0409521177819335e-06, "loss": 0.1007, "step": 10452 }, { "epoch": 4.860065086006508, "grad_norm": 0.9600377082824707, "learning_rate": 6.092240493713205e-06, "loss": 0.0637, "step": 10454 }, { "epoch": 4.860994886099489, "grad_norm": 1.4171404838562012, "learning_rate": 6.143715481597376e-06, "loss": 0.1255, "step": 10456 }, { "epoch": 4.861924686192468, "grad_norm": 1.357022762298584, "learning_rate": 6.195376573397272e-06, "loss": 0.0746, "step": 10458 }, { "epoch": 4.862854486285449, "grad_norm": 1.9014962911605835, "learning_rate": 6.2472232592385345e-06, "loss": 0.1119, "step": 10460 }, { "epoch": 4.863784286378428, "grad_norm": 1.0665113925933838, "learning_rate": 6.299255027415429e-06, "loss": 0.0509, "step": 10462 }, { "epoch": 4.864714086471409, "grad_norm": 1.9426823854446411, "learning_rate": 6.351471364395463e-06, "loss": 0.1098, "step": 10464 }, { "epoch": 4.8656438865643885, "grad_norm": 1.6426761150360107, "learning_rate": 6.4038717548243575e-06, "loss": 0.1082, "step": 10466 }, { "epoch": 4.866573686657369, "grad_norm": 1.3181664943695068, "learning_rate": 6.4564556815315305e-06, "loss": 0.0749, "step": 10468 }, { "epoch": 4.867503486750349, "grad_norm": 2.0795702934265137, "learning_rate": 6.509222625534735e-06, "loss": 0.1053, "step": 10470 }, { "epoch": 4.868433286843329, "grad_norm": 0.8834363222122192, "learning_rate": 6.5621720660456565e-06, "loss": 0.1065, "step": 10472 }, { "epoch": 4.869363086936309, "grad_norm": 1.4994748830795288, "learning_rate": 6.615303480474633e-06, "loss": 0.0953, "step": 10474 }, { "epoch": 4.870292887029288, "grad_norm": 1.227318525314331, "learning_rate": 6.668616344435994e-06, "loss": 0.0546, "step": 10476 }, { "epoch": 4.871222687122269, "grad_norm": 1.4421656131744385, "learning_rate": 6.722110131753422e-06, "loss": 0.0938, "step": 10478 }, { "epoch": 4.872152487215248, "grad_norm": 1.5669265985488892, "learning_rate": 6.775784314464702e-06, "loss": 0.096, "step": 10480 }, { "epoch": 4.873082287308229, "grad_norm": 1.3813056945800781, "learning_rate": 6.829638362827446e-06, "loss": 0.072, "step": 10482 }, { "epoch": 4.8740120874012085, "grad_norm": 1.1649268865585327, "learning_rate": 6.8836717453238175e-06, "loss": 0.0528, "step": 10484 }, { "epoch": 4.874941887494189, "grad_norm": 1.4899111986160278, "learning_rate": 6.9378839286662645e-06, "loss": 0.1017, "step": 10486 }, { "epoch": 4.875871687587169, "grad_norm": 1.0713146924972534, "learning_rate": 6.992274377802364e-06, "loss": 0.0375, "step": 10488 }, { "epoch": 4.876801487680149, "grad_norm": 1.3530281782150269, "learning_rate": 7.0468425559202805e-06, "loss": 0.0777, "step": 10490 }, { "epoch": 4.877731287773129, "grad_norm": 1.2925240993499756, "learning_rate": 7.101587924454251e-06, "loss": 0.0672, "step": 10492 }, { "epoch": 4.878661087866108, "grad_norm": 1.1036229133605957, "learning_rate": 7.156509943089461e-06, "loss": 0.0738, "step": 10494 }, { "epoch": 4.879590887959089, "grad_norm": 0.8460735082626343, "learning_rate": 7.211608069767828e-06, "loss": 0.062, "step": 10496 }, { "epoch": 4.880520688052069, "grad_norm": 1.8389581441879272, "learning_rate": 7.266881760693144e-06, "loss": 0.0713, "step": 10498 }, { "epoch": 4.881450488145049, "grad_norm": 1.2642865180969238, "learning_rate": 7.3223304703363305e-06, "loss": 0.0717, "step": 10500 }, { "epoch": 4.8823802882380285, "grad_norm": 1.2638412714004517, "learning_rate": 7.377953651441011e-06, "loss": 0.0827, "step": 10502 }, { "epoch": 4.883310088331009, "grad_norm": 1.4444843530654907, "learning_rate": 7.433750755028782e-06, "loss": 0.0549, "step": 10504 }, { "epoch": 4.884239888423989, "grad_norm": 0.99676114320755, "learning_rate": 7.489721230404819e-06, "loss": 0.0319, "step": 10506 }, { "epoch": 4.885169688516969, "grad_norm": 1.1287050247192383, "learning_rate": 7.545864525163189e-06, "loss": 0.0772, "step": 10508 }, { "epoch": 4.886099488609949, "grad_norm": 1.369383454322815, "learning_rate": 7.602180085192114e-06, "loss": 0.0988, "step": 10510 }, { "epoch": 4.887029288702929, "grad_norm": 0.6510347127914429, "learning_rate": 7.658667354679876e-06, "loss": 0.0495, "step": 10512 }, { "epoch": 4.887959088795909, "grad_norm": 1.0386924743652344, "learning_rate": 7.715325776119868e-06, "loss": 0.0884, "step": 10514 }, { "epoch": 4.888888888888889, "grad_norm": 1.5761016607284546, "learning_rate": 7.772154790316279e-06, "loss": 0.0952, "step": 10516 }, { "epoch": 4.889818688981869, "grad_norm": 1.5356781482696533, "learning_rate": 7.829153836389815e-06, "loss": 0.0703, "step": 10518 }, { "epoch": 4.8907484890748485, "grad_norm": 1.7232391834259033, "learning_rate": 7.886322351782767e-06, "loss": 0.1132, "step": 10520 }, { "epoch": 4.891678289167829, "grad_norm": 1.8515410423278809, "learning_rate": 7.943659772265106e-06, "loss": 0.0613, "step": 10522 }, { "epoch": 4.892608089260809, "grad_norm": 2.0902016162872314, "learning_rate": 8.0011655319395e-06, "loss": 0.0975, "step": 10524 }, { "epoch": 4.893537889353789, "grad_norm": 1.2587970495224, "learning_rate": 8.058839063247452e-06, "loss": 0.1041, "step": 10526 }, { "epoch": 4.894467689446769, "grad_norm": 1.6638431549072266, "learning_rate": 8.116679796974429e-06, "loss": 0.0915, "step": 10528 }, { "epoch": 4.895397489539749, "grad_norm": 1.448391318321228, "learning_rate": 8.174687162255674e-06, "loss": 0.089, "step": 10530 }, { "epoch": 4.896327289632729, "grad_norm": 0.9627419710159302, "learning_rate": 8.232860586582033e-06, "loss": 0.0766, "step": 10532 }, { "epoch": 4.897257089725709, "grad_norm": 1.0117768049240112, "learning_rate": 8.29119949580517e-06, "loss": 0.0663, "step": 10534 }, { "epoch": 4.898186889818689, "grad_norm": 1.2229477167129517, "learning_rate": 8.349703314143735e-06, "loss": 0.0762, "step": 10536 }, { "epoch": 4.899116689911669, "grad_norm": 1.6320070028305054, "learning_rate": 8.408371464188527e-06, "loss": 0.0905, "step": 10538 }, { "epoch": 4.900046490004649, "grad_norm": 1.4457217454910278, "learning_rate": 8.467203366908658e-06, "loss": 0.0829, "step": 10540 }, { "epoch": 4.9009762900976295, "grad_norm": 1.356744647026062, "learning_rate": 8.526198441657127e-06, "loss": 0.0902, "step": 10542 }, { "epoch": 4.901906090190609, "grad_norm": 1.2560880184173584, "learning_rate": 8.585356106176102e-06, "loss": 0.0828, "step": 10544 }, { "epoch": 4.902835890283589, "grad_norm": 1.0386697053909302, "learning_rate": 8.644675776603517e-06, "loss": 0.0906, "step": 10546 }, { "epoch": 4.903765690376569, "grad_norm": 1.452987790107727, "learning_rate": 8.704156867478036e-06, "loss": 0.0856, "step": 10548 }, { "epoch": 4.904695490469549, "grad_norm": 1.255620002746582, "learning_rate": 8.763798791745376e-06, "loss": 0.0643, "step": 10550 }, { "epoch": 4.905625290562529, "grad_norm": 1.4075191020965576, "learning_rate": 8.82360096076389e-06, "loss": 0.1433, "step": 10552 }, { "epoch": 4.906555090655509, "grad_norm": 1.67631196975708, "learning_rate": 8.883562784310176e-06, "loss": 0.0976, "step": 10554 }, { "epoch": 4.907484890748489, "grad_norm": 1.765733242034912, "learning_rate": 8.943683670585448e-06, "loss": 0.0716, "step": 10556 }, { "epoch": 4.908414690841469, "grad_norm": 1.507331132888794, "learning_rate": 9.003963026220558e-06, "loss": 0.0944, "step": 10558 }, { "epoch": 4.9093444909344495, "grad_norm": 1.2145267724990845, "learning_rate": 9.064400256282736e-06, "loss": 0.0793, "step": 10560 }, { "epoch": 4.910274291027429, "grad_norm": 1.678959608078003, "learning_rate": 9.124994764280996e-06, "loss": 0.079, "step": 10562 }, { "epoch": 4.911204091120409, "grad_norm": 1.6477391719818115, "learning_rate": 9.185745952171863e-06, "loss": 0.0585, "step": 10564 }, { "epoch": 4.912133891213389, "grad_norm": 1.1641191244125366, "learning_rate": 9.246653220365776e-06, "loss": 0.082, "step": 10566 }, { "epoch": 4.913063691306369, "grad_norm": 1.6716172695159912, "learning_rate": 9.307715967732523e-06, "loss": 0.0902, "step": 10568 }, { "epoch": 4.913993491399349, "grad_norm": 1.4088958501815796, "learning_rate": 9.368933591607436e-06, "loss": 0.1333, "step": 10570 }, { "epoch": 4.914923291492329, "grad_norm": 1.7856649160385132, "learning_rate": 9.430305487797215e-06, "loss": 0.0624, "step": 10572 }, { "epoch": 4.915853091585309, "grad_norm": 0.9629393219947815, "learning_rate": 9.491831050586088e-06, "loss": 0.0709, "step": 10574 }, { "epoch": 4.916782891678289, "grad_norm": 1.1676836013793945, "learning_rate": 9.553509672741662e-06, "loss": 0.0612, "step": 10576 }, { "epoch": 4.9177126917712695, "grad_norm": 1.5499244928359985, "learning_rate": 9.615340745520694e-06, "loss": 0.0707, "step": 10578 }, { "epoch": 4.918642491864249, "grad_norm": 0.9325110912322998, "learning_rate": 9.6773236586756e-06, "loss": 0.0864, "step": 10580 }, { "epoch": 4.91957229195723, "grad_norm": 2.104841470718384, "learning_rate": 9.739457800459979e-06, "loss": 0.0691, "step": 10582 }, { "epoch": 4.920502092050209, "grad_norm": 1.2941107749938965, "learning_rate": 9.801742557634869e-06, "loss": 0.0849, "step": 10584 }, { "epoch": 4.92143189214319, "grad_norm": 1.6992027759552002, "learning_rate": 9.864177315474997e-06, "loss": 0.0851, "step": 10586 }, { "epoch": 4.922361692236169, "grad_norm": 1.0506742000579834, "learning_rate": 9.926761457774377e-06, "loss": 0.0781, "step": 10588 }, { "epoch": 4.923291492329149, "grad_norm": 1.4826622009277344, "learning_rate": 9.989494366852924e-06, "loss": 0.0929, "step": 10590 }, { "epoch": 4.924221292422129, "grad_norm": 1.4648953676223755, "learning_rate": 1.0052375423562014e-05, "loss": 0.0748, "step": 10592 }, { "epoch": 4.925151092515109, "grad_norm": 2.123533248901367, "learning_rate": 1.0115404007291072e-05, "loss": 0.0976, "step": 10594 }, { "epoch": 4.9260808926080895, "grad_norm": 1.9419779777526855, "learning_rate": 1.0178579495973566e-05, "loss": 0.1108, "step": 10596 }, { "epoch": 4.927010692701069, "grad_norm": 2.149592399597168, "learning_rate": 1.0241901266092641e-05, "loss": 0.0973, "step": 10598 }, { "epoch": 4.9279404927940496, "grad_norm": 1.6808990240097046, "learning_rate": 1.0305368692688208e-05, "loss": 0.0997, "step": 10600 }, { "epoch": 4.928870292887029, "grad_norm": 2.041966199874878, "learning_rate": 1.0368981149362246e-05, "loss": 0.0882, "step": 10602 }, { "epoch": 4.92980009298001, "grad_norm": 1.41558837890625, "learning_rate": 1.0432738008285558e-05, "loss": 0.0586, "step": 10604 }, { "epoch": 4.930729893072989, "grad_norm": 1.075947642326355, "learning_rate": 1.0496638640203755e-05, "loss": 0.1031, "step": 10606 }, { "epoch": 4.931659693165969, "grad_norm": 1.3150171041488647, "learning_rate": 1.056068241444326e-05, "loss": 0.0696, "step": 10608 }, { "epoch": 4.932589493258949, "grad_norm": 1.6794102191925049, "learning_rate": 1.0624868698918089e-05, "loss": 0.0626, "step": 10610 }, { "epoch": 4.933519293351929, "grad_norm": 1.4375172853469849, "learning_rate": 1.0689196860135242e-05, "loss": 0.0775, "step": 10612 }, { "epoch": 4.9344490934449095, "grad_norm": 1.470690369606018, "learning_rate": 1.075366626320195e-05, "loss": 0.0668, "step": 10614 }, { "epoch": 4.935378893537889, "grad_norm": 1.5768470764160156, "learning_rate": 1.0818276271831093e-05, "loss": 0.0992, "step": 10616 }, { "epoch": 4.9363086936308695, "grad_norm": 1.2624262571334839, "learning_rate": 1.0883026248348038e-05, "loss": 0.0673, "step": 10618 }, { "epoch": 4.937238493723849, "grad_norm": 1.6734384298324585, "learning_rate": 1.094791555369673e-05, "loss": 0.1236, "step": 10620 }, { "epoch": 4.93816829381683, "grad_norm": 1.9353188276290894, "learning_rate": 1.101294354744578e-05, "loss": 0.0775, "step": 10622 }, { "epoch": 4.939098093909809, "grad_norm": 1.8327268362045288, "learning_rate": 1.1078109587795362e-05, "loss": 0.1045, "step": 10624 }, { "epoch": 4.94002789400279, "grad_norm": 1.2721298933029175, "learning_rate": 1.114341303158267e-05, "loss": 0.0738, "step": 10626 }, { "epoch": 4.940957694095769, "grad_norm": 2.008549451828003, "learning_rate": 1.1208853234289225e-05, "loss": 0.1146, "step": 10628 }, { "epoch": 4.94188749418875, "grad_norm": 1.3593685626983643, "learning_rate": 1.127442955004672e-05, "loss": 0.0811, "step": 10630 }, { "epoch": 4.9428172942817294, "grad_norm": 1.090069055557251, "learning_rate": 1.1340141331643245e-05, "loss": 0.0664, "step": 10632 }, { "epoch": 4.943747094374709, "grad_norm": 0.992962658405304, "learning_rate": 1.1405987930530194e-05, "loss": 0.1084, "step": 10634 }, { "epoch": 4.9446768944676895, "grad_norm": 1.7771581411361694, "learning_rate": 1.147196869682807e-05, "loss": 0.0786, "step": 10636 }, { "epoch": 4.945606694560669, "grad_norm": 1.2287153005599976, "learning_rate": 1.1538082979333495e-05, "loss": 0.0725, "step": 10638 }, { "epoch": 4.94653649465365, "grad_norm": 1.3041534423828125, "learning_rate": 1.1604330125525116e-05, "loss": 0.0815, "step": 10640 }, { "epoch": 4.947466294746629, "grad_norm": 1.0089099407196045, "learning_rate": 1.1670709481570278e-05, "loss": 0.1034, "step": 10642 }, { "epoch": 4.94839609483961, "grad_norm": 1.1482346057891846, "learning_rate": 1.1737220392331676e-05, "loss": 0.0868, "step": 10644 }, { "epoch": 4.949325894932589, "grad_norm": 1.2572206258773804, "learning_rate": 1.1803862201373325e-05, "loss": 0.0874, "step": 10646 }, { "epoch": 4.95025569502557, "grad_norm": 1.4876114130020142, "learning_rate": 1.1870634250967552e-05, "loss": 0.0651, "step": 10648 }, { "epoch": 4.951185495118549, "grad_norm": 1.2647569179534912, "learning_rate": 1.1937535882101256e-05, "loss": 0.0871, "step": 10650 }, { "epoch": 4.952115295211529, "grad_norm": 1.7334457635879517, "learning_rate": 1.2004566434482278e-05, "loss": 0.0801, "step": 10652 }, { "epoch": 4.9530450953045095, "grad_norm": 1.248849868774414, "learning_rate": 1.2071725246546119e-05, "loss": 0.0816, "step": 10654 }, { "epoch": 4.95397489539749, "grad_norm": 1.2751007080078125, "learning_rate": 1.2139011655462347e-05, "loss": 0.1529, "step": 10656 }, { "epoch": 4.95490469549047, "grad_norm": 1.3207019567489624, "learning_rate": 1.2206424997141417e-05, "loss": 0.0865, "step": 10658 }, { "epoch": 4.955834495583449, "grad_norm": 1.4472508430480957, "learning_rate": 1.2273964606240718e-05, "loss": 0.0678, "step": 10660 }, { "epoch": 4.95676429567643, "grad_norm": 1.4366477727890015, "learning_rate": 1.2341629816171642e-05, "loss": 0.074, "step": 10662 }, { "epoch": 4.957694095769409, "grad_norm": 1.9882205724716187, "learning_rate": 1.2409419959106045e-05, "loss": 0.0904, "step": 10664 }, { "epoch": 4.95862389586239, "grad_norm": 2.36133074760437, "learning_rate": 1.247733436598228e-05, "loss": 0.0902, "step": 10666 }, { "epoch": 4.959553695955369, "grad_norm": 1.7178056240081787, "learning_rate": 1.2545372366512791e-05, "loss": 0.1008, "step": 10668 }, { "epoch": 4.96048349604835, "grad_norm": 1.1588070392608643, "learning_rate": 1.261353328918983e-05, "loss": 0.0629, "step": 10670 }, { "epoch": 4.9614132961413295, "grad_norm": 1.3395750522613525, "learning_rate": 1.2681816461292688e-05, "loss": 0.0656, "step": 10672 }, { "epoch": 4.96234309623431, "grad_norm": 1.7347582578659058, "learning_rate": 1.2750221208894096e-05, "loss": 0.09, "step": 10674 }, { "epoch": 4.96327289632729, "grad_norm": 1.58704674243927, "learning_rate": 1.2818746856866658e-05, "loss": 0.1096, "step": 10676 }, { "epoch": 4.964202696420269, "grad_norm": 1.4709559679031372, "learning_rate": 1.2887392728890131e-05, "loss": 0.076, "step": 10678 }, { "epoch": 4.96513249651325, "grad_norm": 1.0777533054351807, "learning_rate": 1.2956158147457155e-05, "loss": 0.1064, "step": 10680 }, { "epoch": 4.966062296606229, "grad_norm": 1.0802059173583984, "learning_rate": 1.302504243388097e-05, "loss": 0.0973, "step": 10682 }, { "epoch": 4.96699209669921, "grad_norm": 1.509121298789978, "learning_rate": 1.3094044908301552e-05, "loss": 0.0881, "step": 10684 }, { "epoch": 4.967921896792189, "grad_norm": 1.7771835327148438, "learning_rate": 1.3163164889692184e-05, "loss": 0.1194, "step": 10686 }, { "epoch": 4.96885169688517, "grad_norm": 0.8710341453552246, "learning_rate": 1.3232401695866709e-05, "loss": 0.0558, "step": 10688 }, { "epoch": 4.9697814969781495, "grad_norm": 1.2193742990493774, "learning_rate": 1.3301754643485644e-05, "loss": 0.0704, "step": 10690 }, { "epoch": 4.97071129707113, "grad_norm": 0.9982830286026001, "learning_rate": 1.3371223048063554e-05, "loss": 0.0501, "step": 10692 }, { "epoch": 4.97164109716411, "grad_norm": 1.7036782503128052, "learning_rate": 1.3440806223975254e-05, "loss": 0.0703, "step": 10694 }, { "epoch": 4.972570897257089, "grad_norm": 0.9894372224807739, "learning_rate": 1.3510503484462805e-05, "loss": 0.0572, "step": 10696 }, { "epoch": 4.97350069735007, "grad_norm": 1.8105050325393677, "learning_rate": 1.3580314141642551e-05, "loss": 0.0591, "step": 10698 }, { "epoch": 4.97443049744305, "grad_norm": 1.2168759107589722, "learning_rate": 1.3650237506511323e-05, "loss": 0.0713, "step": 10700 }, { "epoch": 4.97536029753603, "grad_norm": 1.5412770509719849, "learning_rate": 1.37202728889539e-05, "loss": 0.0793, "step": 10702 }, { "epoch": 4.976290097629009, "grad_norm": 1.778908610343933, "learning_rate": 1.3790419597749175e-05, "loss": 0.0938, "step": 10704 }, { "epoch": 4.97721989772199, "grad_norm": 0.907158374786377, "learning_rate": 1.3860676940577611e-05, "loss": 0.0647, "step": 10706 }, { "epoch": 4.9781496978149695, "grad_norm": 1.3486312627792358, "learning_rate": 1.3931044224027517e-05, "loss": 0.0691, "step": 10708 }, { "epoch": 4.97907949790795, "grad_norm": 1.591888427734375, "learning_rate": 1.4001520753602133e-05, "loss": 0.0936, "step": 10710 }, { "epoch": 4.98000929800093, "grad_norm": 1.3494086265563965, "learning_rate": 1.4072105833726724e-05, "loss": 0.0805, "step": 10712 }, { "epoch": 4.98093909809391, "grad_norm": 1.6457390785217285, "learning_rate": 1.4142798767754893e-05, "loss": 0.1021, "step": 10714 }, { "epoch": 4.98186889818689, "grad_norm": 1.294028639793396, "learning_rate": 1.421359885797599e-05, "loss": 0.0621, "step": 10716 }, { "epoch": 4.98279869827987, "grad_norm": 1.7375147342681885, "learning_rate": 1.4284505405621792e-05, "loss": 0.0807, "step": 10718 }, { "epoch": 4.98372849837285, "grad_norm": 1.547577142715454, "learning_rate": 1.4355517710873219e-05, "loss": 0.0624, "step": 10720 }, { "epoch": 4.984658298465829, "grad_norm": 1.081154704093933, "learning_rate": 1.4426635072867492e-05, "loss": 0.0717, "step": 10722 }, { "epoch": 4.98558809855881, "grad_norm": 2.3876123428344727, "learning_rate": 1.4497856789704875e-05, "loss": 0.1298, "step": 10724 }, { "epoch": 4.9865178986517895, "grad_norm": 0.8641354441642761, "learning_rate": 1.4569182158455853e-05, "loss": 0.0511, "step": 10726 }, { "epoch": 4.98744769874477, "grad_norm": 1.5279072523117065, "learning_rate": 1.4640610475167917e-05, "loss": 0.1091, "step": 10728 }, { "epoch": 4.98837749883775, "grad_norm": 1.4522676467895508, "learning_rate": 1.4712141034872256e-05, "loss": 0.0956, "step": 10730 }, { "epoch": 4.98930729893073, "grad_norm": 1.4195867776870728, "learning_rate": 1.4783773131591288e-05, "loss": 0.1037, "step": 10732 }, { "epoch": 4.99023709902371, "grad_norm": 1.6175800561904907, "learning_rate": 1.4855506058345052e-05, "loss": 0.077, "step": 10734 }, { "epoch": 4.99116689911669, "grad_norm": 1.6370899677276611, "learning_rate": 1.492733910715844e-05, "loss": 0.1036, "step": 10736 }, { "epoch": 4.99209669920967, "grad_norm": 1.0597280263900757, "learning_rate": 1.4999271569068426e-05, "loss": 0.0616, "step": 10738 }, { "epoch": 4.99302649930265, "grad_norm": 2.0037453174591064, "learning_rate": 1.5071302734130487e-05, "loss": 0.0893, "step": 10740 }, { "epoch": 4.99395629939563, "grad_norm": 0.7946469783782959, "learning_rate": 1.5143431891426262e-05, "loss": 0.0441, "step": 10742 }, { "epoch": 4.99488609948861, "grad_norm": 1.2230485677719116, "learning_rate": 1.5215658329069929e-05, "loss": 0.0632, "step": 10744 }, { "epoch": 4.99581589958159, "grad_norm": 1.1877840757369995, "learning_rate": 1.5287981334215956e-05, "loss": 0.065, "step": 10746 }, { "epoch": 4.99674569967457, "grad_norm": 1.9741896390914917, "learning_rate": 1.5360400193065148e-05, "loss": 0.1061, "step": 10748 }, { "epoch": 4.99767549976755, "grad_norm": 1.467490792274475, "learning_rate": 1.5432914190872777e-05, "loss": 0.069, "step": 10750 }, { "epoch": 4.99860529986053, "grad_norm": 1.4954689741134644, "learning_rate": 1.550552261195503e-05, "loss": 0.0739, "step": 10752 }, { "epoch": 4.99953509995351, "grad_norm": 1.013161301612854, "learning_rate": 1.5578224739695948e-05, "loss": 0.0766, "step": 10754 }, { "epoch": 5.00046490004649, "grad_norm": 0.7970132231712341, "learning_rate": 1.5651019856555043e-05, "loss": 0.0408, "step": 10756 }, { "epoch": 5.00139470013947, "grad_norm": 1.568413257598877, "learning_rate": 1.572390724407373e-05, "loss": 0.0988, "step": 10758 }, { "epoch": 5.00232450023245, "grad_norm": 1.9965211153030396, "learning_rate": 1.5796886182883093e-05, "loss": 0.0757, "step": 10760 }, { "epoch": 5.00325430032543, "grad_norm": 0.6749058961868286, "learning_rate": 1.5869955952710383e-05, "loss": 0.08, "step": 10762 }, { "epoch": 5.00418410041841, "grad_norm": 0.9779415726661682, "learning_rate": 1.5943115832386386e-05, "loss": 0.0595, "step": 10764 }, { "epoch": 5.0051139005113905, "grad_norm": 0.848933219909668, "learning_rate": 1.60163650998528e-05, "loss": 0.0512, "step": 10766 }, { "epoch": 5.00604370060437, "grad_norm": 1.155746579170227, "learning_rate": 1.6089703032168754e-05, "loss": 0.0539, "step": 10768 }, { "epoch": 5.00697350069735, "grad_norm": 1.7646892070770264, "learning_rate": 1.616312890551855e-05, "loss": 0.0652, "step": 10770 }, { "epoch": 5.00790330079033, "grad_norm": 1.172414779663086, "learning_rate": 1.6236641995218538e-05, "loss": 0.0627, "step": 10772 }, { "epoch": 5.00883310088331, "grad_norm": 1.2840142250061035, "learning_rate": 1.6310241575724127e-05, "loss": 0.0734, "step": 10774 }, { "epoch": 5.00976290097629, "grad_norm": 0.8571532964706421, "learning_rate": 1.638392692063716e-05, "loss": 0.0461, "step": 10776 }, { "epoch": 5.01069270106927, "grad_norm": 1.4188227653503418, "learning_rate": 1.645769730271296e-05, "loss": 0.1086, "step": 10778 }, { "epoch": 5.01162250116225, "grad_norm": 1.1636364459991455, "learning_rate": 1.653155199386771e-05, "loss": 0.0923, "step": 10780 }, { "epoch": 5.01255230125523, "grad_norm": 1.6049036979675293, "learning_rate": 1.6605490265185516e-05, "loss": 0.0471, "step": 10782 }, { "epoch": 5.0134821013482105, "grad_norm": 1.1182575225830078, "learning_rate": 1.667951138692532e-05, "loss": 0.0776, "step": 10784 }, { "epoch": 5.01441190144119, "grad_norm": 0.873047411441803, "learning_rate": 1.6753614628528703e-05, "loss": 0.0429, "step": 10786 }, { "epoch": 5.015341701534171, "grad_norm": 1.070116400718689, "learning_rate": 1.68277992586265e-05, "loss": 0.0645, "step": 10788 }, { "epoch": 5.01627150162715, "grad_norm": 0.9851587414741516, "learning_rate": 1.6902064545046366e-05, "loss": 0.0427, "step": 10790 }, { "epoch": 5.01720130172013, "grad_norm": 1.7221943140029907, "learning_rate": 1.6976409754819818e-05, "loss": 0.0859, "step": 10792 }, { "epoch": 5.01813110181311, "grad_norm": 1.0389313697814941, "learning_rate": 1.705083415418973e-05, "loss": 0.0706, "step": 10794 }, { "epoch": 5.01906090190609, "grad_norm": 1.201953649520874, "learning_rate": 1.7125337008617423e-05, "loss": 0.0963, "step": 10796 }, { "epoch": 5.01999070199907, "grad_norm": 1.0658096075057983, "learning_rate": 1.7199917582789623e-05, "loss": 0.0579, "step": 10798 }, { "epoch": 5.02092050209205, "grad_norm": 0.6056813597679138, "learning_rate": 1.7274575140626345e-05, "loss": 0.0378, "step": 10800 }, { "epoch": 5.0218503021850305, "grad_norm": 1.4814337491989136, "learning_rate": 1.734930894528755e-05, "loss": 0.0706, "step": 10802 }, { "epoch": 5.02278010227801, "grad_norm": 0.887216329574585, "learning_rate": 1.742411825918067e-05, "loss": 0.0701, "step": 10804 }, { "epoch": 5.023709902370991, "grad_norm": 1.5089613199234009, "learning_rate": 1.749900234396815e-05, "loss": 0.0655, "step": 10806 }, { "epoch": 5.02463970246397, "grad_norm": 1.2838698625564575, "learning_rate": 1.7573960460574133e-05, "loss": 0.0471, "step": 10808 }, { "epoch": 5.025569502556951, "grad_norm": 1.145018219947815, "learning_rate": 1.7648991869192442e-05, "loss": 0.0634, "step": 10810 }, { "epoch": 5.02649930264993, "grad_norm": 1.0549525022506714, "learning_rate": 1.77240958292932e-05, "loss": 0.0593, "step": 10812 }, { "epoch": 5.02742910274291, "grad_norm": 0.6820663809776306, "learning_rate": 1.7799271599630715e-05, "loss": 0.0402, "step": 10814 }, { "epoch": 5.02835890283589, "grad_norm": 1.2271950244903564, "learning_rate": 1.7874518438250665e-05, "loss": 0.073, "step": 10816 }, { "epoch": 5.02928870292887, "grad_norm": 1.5578179359436035, "learning_rate": 1.7949835602496783e-05, "loss": 0.0634, "step": 10818 }, { "epoch": 5.0302185030218505, "grad_norm": 1.0786514282226562, "learning_rate": 1.802522234901933e-05, "loss": 0.0353, "step": 10820 }, { "epoch": 5.03114830311483, "grad_norm": 1.7055717706680298, "learning_rate": 1.8100677933781367e-05, "loss": 0.0494, "step": 10822 }, { "epoch": 5.032078103207811, "grad_norm": 1.1131478548049927, "learning_rate": 1.817620161206683e-05, "loss": 0.0516, "step": 10824 }, { "epoch": 5.03300790330079, "grad_norm": 1.378394365310669, "learning_rate": 1.8251792638487593e-05, "loss": 0.0975, "step": 10826 }, { "epoch": 5.033937703393771, "grad_norm": 1.0096086263656616, "learning_rate": 1.832745026699057e-05, "loss": 0.072, "step": 10828 }, { "epoch": 5.03486750348675, "grad_norm": 1.2666897773742676, "learning_rate": 1.8403173750865756e-05, "loss": 0.0835, "step": 10830 }, { "epoch": 5.035797303579731, "grad_norm": 1.1747487783432007, "learning_rate": 1.8478962342752604e-05, "loss": 0.0681, "step": 10832 }, { "epoch": 5.03672710367271, "grad_norm": 1.2397509813308716, "learning_rate": 1.8554815294648568e-05, "loss": 0.068, "step": 10834 }, { "epoch": 5.03765690376569, "grad_norm": 2.0104563236236572, "learning_rate": 1.8630731857915463e-05, "loss": 0.0803, "step": 10836 }, { "epoch": 5.0385867038586705, "grad_norm": 1.1833728551864624, "learning_rate": 1.8706711283287542e-05, "loss": 0.0568, "step": 10838 }, { "epoch": 5.03951650395165, "grad_norm": 1.2269152402877808, "learning_rate": 1.8782752820878638e-05, "loss": 0.0727, "step": 10840 }, { "epoch": 5.040446304044631, "grad_norm": 0.8452809453010559, "learning_rate": 1.88588557201893e-05, "loss": 0.0488, "step": 10842 }, { "epoch": 5.04137610413761, "grad_norm": 1.0067319869995117, "learning_rate": 1.8935019230114897e-05, "loss": 0.0824, "step": 10844 }, { "epoch": 5.042305904230591, "grad_norm": 1.8662631511688232, "learning_rate": 1.9011242598951986e-05, "loss": 0.069, "step": 10846 }, { "epoch": 5.04323570432357, "grad_norm": 1.4577773809432983, "learning_rate": 1.9087525074406865e-05, "loss": 0.0732, "step": 10848 }, { "epoch": 5.044165504416551, "grad_norm": 2.0202178955078125, "learning_rate": 1.9163865903602384e-05, "loss": 0.0846, "step": 10850 }, { "epoch": 5.04509530450953, "grad_norm": 1.0990694761276245, "learning_rate": 1.9240264333085204e-05, "loss": 0.0673, "step": 10852 }, { "epoch": 5.046025104602511, "grad_norm": 1.2217682600021362, "learning_rate": 1.9316719608833845e-05, "loss": 0.0672, "step": 10854 }, { "epoch": 5.0469549046954905, "grad_norm": 1.0296355485916138, "learning_rate": 1.9393230976265514e-05, "loss": 0.054, "step": 10856 }, { "epoch": 5.04788470478847, "grad_norm": 1.4041845798492432, "learning_rate": 1.946979768024382e-05, "loss": 0.045, "step": 10858 }, { "epoch": 5.048814504881451, "grad_norm": 1.4366117715835571, "learning_rate": 1.9546418965086473e-05, "loss": 0.0619, "step": 10860 }, { "epoch": 5.04974430497443, "grad_norm": 2.1986260414123535, "learning_rate": 1.962309407457215e-05, "loss": 0.0636, "step": 10862 }, { "epoch": 5.050674105067411, "grad_norm": 1.097172737121582, "learning_rate": 1.969982225194866e-05, "loss": 0.0715, "step": 10864 }, { "epoch": 5.05160390516039, "grad_norm": 0.9033523201942444, "learning_rate": 1.9776602739939684e-05, "loss": 0.0558, "step": 10866 }, { "epoch": 5.052533705253371, "grad_norm": 2.2103185653686523, "learning_rate": 1.9853434780752895e-05, "loss": 0.0684, "step": 10868 }, { "epoch": 5.05346350534635, "grad_norm": 1.8277872800827026, "learning_rate": 1.9930317616087237e-05, "loss": 0.0981, "step": 10870 }, { "epoch": 5.054393305439331, "grad_norm": 1.4388090372085571, "learning_rate": 2.0007250487139826e-05, "loss": 0.081, "step": 10872 }, { "epoch": 5.0553231055323105, "grad_norm": 1.5256789922714233, "learning_rate": 2.0084232634614537e-05, "loss": 0.1006, "step": 10874 }, { "epoch": 5.056252905625291, "grad_norm": 1.6897873878479004, "learning_rate": 2.016126329872848e-05, "loss": 0.0594, "step": 10876 }, { "epoch": 5.0571827057182706, "grad_norm": 1.1458923816680908, "learning_rate": 2.023834171922028e-05, "loss": 0.087, "step": 10878 }, { "epoch": 5.05811250581125, "grad_norm": 1.4550811052322388, "learning_rate": 2.0315467135356863e-05, "loss": 0.0576, "step": 10880 }, { "epoch": 5.059042305904231, "grad_norm": 0.8442602157592773, "learning_rate": 2.0392638785941596e-05, "loss": 0.0594, "step": 10882 }, { "epoch": 5.05997210599721, "grad_norm": 0.8410201668739319, "learning_rate": 2.046985590932162e-05, "loss": 0.0612, "step": 10884 }, { "epoch": 5.060901906090191, "grad_norm": 0.8100524544715881, "learning_rate": 2.0547117743394754e-05, "loss": 0.0877, "step": 10886 }, { "epoch": 5.06183170618317, "grad_norm": 0.878171980381012, "learning_rate": 2.0624423525618145e-05, "loss": 0.077, "step": 10888 }, { "epoch": 5.062761506276151, "grad_norm": 1.149806022644043, "learning_rate": 2.070177249301476e-05, "loss": 0.0646, "step": 10890 }, { "epoch": 5.0636913063691305, "grad_norm": 0.7475777268409729, "learning_rate": 2.0779163882181604e-05, "loss": 0.039, "step": 10892 }, { "epoch": 5.064621106462111, "grad_norm": 1.13507080078125, "learning_rate": 2.0856596929296996e-05, "loss": 0.0405, "step": 10894 }, { "epoch": 5.0655509065550905, "grad_norm": 1.2661365270614624, "learning_rate": 2.093407087012785e-05, "loss": 0.1034, "step": 10896 }, { "epoch": 5.066480706648071, "grad_norm": 1.1550068855285645, "learning_rate": 2.101158494003791e-05, "loss": 0.0674, "step": 10898 }, { "epoch": 5.067410506741051, "grad_norm": 0.9092123508453369, "learning_rate": 2.1089138373994247e-05, "loss": 0.0688, "step": 10900 }, { "epoch": 5.06834030683403, "grad_norm": 1.428299069404602, "learning_rate": 2.116673040657595e-05, "loss": 0.0958, "step": 10902 }, { "epoch": 5.069270106927011, "grad_norm": 0.4893101155757904, "learning_rate": 2.1244360271981087e-05, "loss": 0.0568, "step": 10904 }, { "epoch": 5.07019990701999, "grad_norm": 1.174504280090332, "learning_rate": 2.132202720403403e-05, "loss": 0.0562, "step": 10906 }, { "epoch": 5.071129707112971, "grad_norm": 1.6554592847824097, "learning_rate": 2.1399730436193704e-05, "loss": 0.08, "step": 10908 }, { "epoch": 5.0720595072059504, "grad_norm": 1.2946306467056274, "learning_rate": 2.147746920156039e-05, "loss": 0.0901, "step": 10910 }, { "epoch": 5.072989307298931, "grad_norm": 1.6123777627944946, "learning_rate": 2.1555242732884044e-05, "loss": 0.093, "step": 10912 }, { "epoch": 5.0739191073919105, "grad_norm": 1.3118947744369507, "learning_rate": 2.1633050262571223e-05, "loss": 0.0896, "step": 10914 }, { "epoch": 5.074848907484891, "grad_norm": 1.0857356786727905, "learning_rate": 2.1710891022692925e-05, "loss": 0.0516, "step": 10916 }, { "epoch": 5.075778707577871, "grad_norm": 1.7532364130020142, "learning_rate": 2.1788764244992457e-05, "loss": 0.0768, "step": 10918 }, { "epoch": 5.076708507670851, "grad_norm": 1.2671988010406494, "learning_rate": 2.186666916089237e-05, "loss": 0.0615, "step": 10920 }, { "epoch": 5.077638307763831, "grad_norm": 1.0737037658691406, "learning_rate": 2.1944605001502782e-05, "loss": 0.0611, "step": 10922 }, { "epoch": 5.07856810785681, "grad_norm": 2.5819907188415527, "learning_rate": 2.2022570997628222e-05, "loss": 0.0733, "step": 10924 }, { "epoch": 5.079497907949791, "grad_norm": 2.648963212966919, "learning_rate": 2.210056637977597e-05, "loss": 0.103, "step": 10926 }, { "epoch": 5.08042770804277, "grad_norm": 0.9894536733627319, "learning_rate": 2.2178590378163003e-05, "loss": 0.0521, "step": 10928 }, { "epoch": 5.081357508135751, "grad_norm": 1.8477691411972046, "learning_rate": 2.2256642222723866e-05, "loss": 0.0865, "step": 10930 }, { "epoch": 5.0822873082287305, "grad_norm": 1.2638821601867676, "learning_rate": 2.233472114311854e-05, "loss": 0.0462, "step": 10932 }, { "epoch": 5.083217108321711, "grad_norm": 1.379410982131958, "learning_rate": 2.2412826368739423e-05, "loss": 0.0601, "step": 10934 }, { "epoch": 5.084146908414691, "grad_norm": 0.8403978943824768, "learning_rate": 2.2490957128719566e-05, "loss": 0.0675, "step": 10936 }, { "epoch": 5.085076708507671, "grad_norm": 1.4724233150482178, "learning_rate": 2.256911265193999e-05, "loss": 0.1061, "step": 10938 }, { "epoch": 5.086006508600651, "grad_norm": 1.8275896310806274, "learning_rate": 2.2647292167037154e-05, "loss": 0.1233, "step": 10940 }, { "epoch": 5.086936308693631, "grad_norm": 1.2040122747421265, "learning_rate": 2.27254949024108e-05, "loss": 0.076, "step": 10942 }, { "epoch": 5.087866108786611, "grad_norm": 1.2138248682022095, "learning_rate": 2.280372008623142e-05, "loss": 0.0468, "step": 10944 }, { "epoch": 5.088795908879591, "grad_norm": 1.3812378644943237, "learning_rate": 2.2881966946448112e-05, "loss": 0.0734, "step": 10946 }, { "epoch": 5.089725708972571, "grad_norm": 1.368330717086792, "learning_rate": 2.296023471079605e-05, "loss": 0.0821, "step": 10948 }, { "epoch": 5.0906555090655505, "grad_norm": 1.3312082290649414, "learning_rate": 2.3038522606803815e-05, "loss": 0.08, "step": 10950 }, { "epoch": 5.091585309158531, "grad_norm": 1.399098515510559, "learning_rate": 2.3116829861801747e-05, "loss": 0.0589, "step": 10952 }, { "epoch": 5.092515109251511, "grad_norm": 1.9711779356002808, "learning_rate": 2.3195155702928497e-05, "loss": 0.0739, "step": 10954 }, { "epoch": 5.093444909344491, "grad_norm": 1.5985890626907349, "learning_rate": 2.3273499357139818e-05, "loss": 0.0629, "step": 10956 }, { "epoch": 5.094374709437471, "grad_norm": 0.7068646550178528, "learning_rate": 2.3351860051215556e-05, "loss": 0.0553, "step": 10958 }, { "epoch": 5.095304509530451, "grad_norm": 0.8555617332458496, "learning_rate": 2.3430237011767116e-05, "loss": 0.0413, "step": 10960 }, { "epoch": 5.096234309623431, "grad_norm": 0.7853547930717468, "learning_rate": 2.3508629465245728e-05, "loss": 0.0828, "step": 10962 }, { "epoch": 5.097164109716411, "grad_norm": 0.8844277262687683, "learning_rate": 2.3587036637949327e-05, "loss": 0.0751, "step": 10964 }, { "epoch": 5.098093909809391, "grad_norm": 1.400120496749878, "learning_rate": 2.366545775603106e-05, "loss": 0.0994, "step": 10966 }, { "epoch": 5.099023709902371, "grad_norm": 1.1698178052902222, "learning_rate": 2.3743892045505777e-05, "loss": 0.046, "step": 10968 }, { "epoch": 5.099953509995351, "grad_norm": 1.7013192176818848, "learning_rate": 2.38223387322589e-05, "loss": 0.0634, "step": 10970 }, { "epoch": 5.100883310088331, "grad_norm": 1.1706453561782837, "learning_rate": 2.390079704205338e-05, "loss": 0.0469, "step": 10972 }, { "epoch": 5.101813110181311, "grad_norm": 1.6695013046264648, "learning_rate": 2.3979266200537205e-05, "loss": 0.1059, "step": 10974 }, { "epoch": 5.102742910274291, "grad_norm": 1.2515122890472412, "learning_rate": 2.4057745433251628e-05, "loss": 0.0783, "step": 10976 }, { "epoch": 5.103672710367271, "grad_norm": 1.2814478874206543, "learning_rate": 2.4136233965638137e-05, "loss": 0.0984, "step": 10978 }, { "epoch": 5.104602510460251, "grad_norm": 1.4301273822784424, "learning_rate": 2.4214731023046776e-05, "loss": 0.0529, "step": 10980 }, { "epoch": 5.105532310553231, "grad_norm": 1.409045696258545, "learning_rate": 2.4293235830743188e-05, "loss": 0.0929, "step": 10982 }, { "epoch": 5.106462110646211, "grad_norm": 1.390425205230713, "learning_rate": 2.4371747613916535e-05, "loss": 0.07, "step": 10984 }, { "epoch": 5.107391910739191, "grad_norm": 2.0132012367248535, "learning_rate": 2.4450265597687382e-05, "loss": 0.067, "step": 10986 }, { "epoch": 5.108321710832171, "grad_norm": 1.4662468433380127, "learning_rate": 2.4528789007114756e-05, "loss": 0.0565, "step": 10988 }, { "epoch": 5.1092515109251515, "grad_norm": 1.1352580785751343, "learning_rate": 2.4607317067204394e-05, "loss": 0.0563, "step": 10990 }, { "epoch": 5.110181311018131, "grad_norm": 1.3244822025299072, "learning_rate": 2.4685849002916135e-05, "loss": 0.0694, "step": 10992 }, { "epoch": 5.111111111111111, "grad_norm": 1.3275954723358154, "learning_rate": 2.4764384039171387e-05, "loss": 0.0609, "step": 10994 }, { "epoch": 5.112040911204091, "grad_norm": 0.8566855192184448, "learning_rate": 2.484292140086105e-05, "loss": 0.0602, "step": 10996 }, { "epoch": 5.112970711297071, "grad_norm": 1.1139239072799683, "learning_rate": 2.492146031285298e-05, "loss": 0.0833, "step": 10998 }, { "epoch": 5.113900511390051, "grad_norm": 2.1492762565612793, "learning_rate": 2.499999999999992e-05, "loss": 0.1161, "step": 11000 }, { "epoch": 5.113900511390051, "eval_cer": 0.16955333900358258, "eval_loss": 0.24899624288082123, "eval_runtime": 394.0738, "eval_samples_per_second": 32.212, "eval_steps_per_second": 1.007, "step": 11000 }, { "epoch": 5.114830311483031, "grad_norm": 1.5630639791488647, "learning_rate": 2.5078539687146948e-05, "loss": 0.0768, "step": 11002 }, { "epoch": 5.115760111576011, "grad_norm": 1.5762748718261719, "learning_rate": 2.5157078599138886e-05, "loss": 0.1077, "step": 11004 }, { "epoch": 5.116689911668991, "grad_norm": 0.9956851601600647, "learning_rate": 2.5235615960828544e-05, "loss": 0.0671, "step": 11006 }, { "epoch": 5.1176197117619715, "grad_norm": 1.9722270965576172, "learning_rate": 2.5314150997083803e-05, "loss": 0.0593, "step": 11008 }, { "epoch": 5.118549511854951, "grad_norm": 1.5124605894088745, "learning_rate": 2.5392682932795537e-05, "loss": 0.0816, "step": 11010 }, { "epoch": 5.119479311947932, "grad_norm": 1.3476002216339111, "learning_rate": 2.547121099288518e-05, "loss": 0.0745, "step": 11012 }, { "epoch": 5.120409112040911, "grad_norm": 0.9191723465919495, "learning_rate": 2.554973440231255e-05, "loss": 0.1023, "step": 11014 }, { "epoch": 5.121338912133891, "grad_norm": 1.2574830055236816, "learning_rate": 2.5628252386083403e-05, "loss": 0.0809, "step": 11016 }, { "epoch": 5.122268712226871, "grad_norm": 1.539140224456787, "learning_rate": 2.5706764169256743e-05, "loss": 0.0891, "step": 11018 }, { "epoch": 5.123198512319851, "grad_norm": 1.373398780822754, "learning_rate": 2.578526897695316e-05, "loss": 0.0726, "step": 11020 }, { "epoch": 5.124128312412831, "grad_norm": 1.5852015018463135, "learning_rate": 2.5863766034361797e-05, "loss": 0.0773, "step": 11022 }, { "epoch": 5.125058112505811, "grad_norm": 1.0527437925338745, "learning_rate": 2.594225456674831e-05, "loss": 0.059, "step": 11024 }, { "epoch": 5.1259879125987915, "grad_norm": 1.3821097612380981, "learning_rate": 2.6020733799462737e-05, "loss": 0.0691, "step": 11026 }, { "epoch": 5.126917712691771, "grad_norm": 1.0179837942123413, "learning_rate": 2.609920295794656e-05, "loss": 0.0738, "step": 11028 }, { "epoch": 5.127847512784752, "grad_norm": 1.7908450365066528, "learning_rate": 2.6177661267741038e-05, "loss": 0.0956, "step": 11030 }, { "epoch": 5.128777312877731, "grad_norm": 1.1716934442520142, "learning_rate": 2.625610795449416e-05, "loss": 0.0645, "step": 11032 }, { "epoch": 5.129707112970712, "grad_norm": 1.5297298431396484, "learning_rate": 2.6334542243968882e-05, "loss": 0.0728, "step": 11034 }, { "epoch": 5.130636913063691, "grad_norm": 1.6794601678848267, "learning_rate": 2.641296336205061e-05, "loss": 0.0738, "step": 11036 }, { "epoch": 5.131566713156671, "grad_norm": 1.2551597356796265, "learning_rate": 2.6491370534754217e-05, "loss": 0.0668, "step": 11038 }, { "epoch": 5.132496513249651, "grad_norm": 0.8580448627471924, "learning_rate": 2.6569762988232825e-05, "loss": 0.0375, "step": 11040 }, { "epoch": 5.133426313342631, "grad_norm": 0.8109052181243896, "learning_rate": 2.664813994878439e-05, "loss": 0.056, "step": 11042 }, { "epoch": 5.1343561134356115, "grad_norm": 1.9807177782058716, "learning_rate": 2.672650064286004e-05, "loss": 0.1092, "step": 11044 }, { "epoch": 5.135285913528591, "grad_norm": 2.173588275909424, "learning_rate": 2.6804844297071448e-05, "loss": 0.0925, "step": 11046 }, { "epoch": 5.136215713621572, "grad_norm": 1.381929874420166, "learning_rate": 2.688317013819829e-05, "loss": 0.059, "step": 11048 }, { "epoch": 5.137145513714551, "grad_norm": 1.6762280464172363, "learning_rate": 2.6961477393196146e-05, "loss": 0.0883, "step": 11050 }, { "epoch": 5.138075313807532, "grad_norm": 2.0811376571655273, "learning_rate": 2.7039765289203905e-05, "loss": 0.0844, "step": 11052 }, { "epoch": 5.139005113900511, "grad_norm": 2.0426993370056152, "learning_rate": 2.711803305355184e-05, "loss": 0.0698, "step": 11054 }, { "epoch": 5.139934913993492, "grad_norm": 1.5511572360992432, "learning_rate": 2.7196279913768537e-05, "loss": 0.0683, "step": 11056 }, { "epoch": 5.140864714086471, "grad_norm": 1.465307593345642, "learning_rate": 2.7274505097589154e-05, "loss": 0.1061, "step": 11058 }, { "epoch": 5.141794514179451, "grad_norm": 1.296337366104126, "learning_rate": 2.7352707832962807e-05, "loss": 0.0675, "step": 11060 }, { "epoch": 5.1427243142724315, "grad_norm": 1.4766134023666382, "learning_rate": 2.7430887348059966e-05, "loss": 0.0772, "step": 11062 }, { "epoch": 5.143654114365411, "grad_norm": 2.0222489833831787, "learning_rate": 2.7509042871280396e-05, "loss": 0.1247, "step": 11064 }, { "epoch": 5.144583914458392, "grad_norm": 1.3428126573562622, "learning_rate": 2.7587173631260535e-05, "loss": 0.0661, "step": 11066 }, { "epoch": 5.145513714551371, "grad_norm": 1.8053919076919556, "learning_rate": 2.7665278856881417e-05, "loss": 0.0765, "step": 11068 }, { "epoch": 5.146443514644352, "grad_norm": 2.4176998138427734, "learning_rate": 2.7743357777276086e-05, "loss": 0.1613, "step": 11070 }, { "epoch": 5.147373314737331, "grad_norm": 1.6899892091751099, "learning_rate": 2.7821409621836945e-05, "loss": 0.1134, "step": 11072 }, { "epoch": 5.148303114830312, "grad_norm": 1.7302422523498535, "learning_rate": 2.789943362022397e-05, "loss": 0.1081, "step": 11074 }, { "epoch": 5.149232914923291, "grad_norm": 1.0689784288406372, "learning_rate": 2.797742900237172e-05, "loss": 0.067, "step": 11076 }, { "epoch": 5.150162715016272, "grad_norm": 2.2317512035369873, "learning_rate": 2.8055394998497166e-05, "loss": 0.0813, "step": 11078 }, { "epoch": 5.1510925151092515, "grad_norm": 2.1485395431518555, "learning_rate": 2.8133330839107574e-05, "loss": 0.1378, "step": 11080 }, { "epoch": 5.152022315202231, "grad_norm": 1.6457802057266235, "learning_rate": 2.8211235755007484e-05, "loss": 0.1002, "step": 11082 }, { "epoch": 5.152952115295212, "grad_norm": 1.3326839208602905, "learning_rate": 2.8289108977307016e-05, "loss": 0.0983, "step": 11084 }, { "epoch": 5.153881915388191, "grad_norm": 1.8945016860961914, "learning_rate": 2.836694973742872e-05, "loss": 0.1279, "step": 11086 }, { "epoch": 5.154811715481172, "grad_norm": 1.8803046941757202, "learning_rate": 2.844475726711581e-05, "loss": 0.0847, "step": 11088 }, { "epoch": 5.155741515574151, "grad_norm": 1.313645362854004, "learning_rate": 2.8522530798439547e-05, "loss": 0.0746, "step": 11090 }, { "epoch": 5.156671315667132, "grad_norm": 1.5353357791900635, "learning_rate": 2.8600269563806238e-05, "loss": 0.0935, "step": 11092 }, { "epoch": 5.157601115760111, "grad_norm": 1.6016924381256104, "learning_rate": 2.8677972795965906e-05, "loss": 0.0999, "step": 11094 }, { "epoch": 5.158530915853092, "grad_norm": 2.97111439704895, "learning_rate": 2.875563972801885e-05, "loss": 0.0784, "step": 11096 }, { "epoch": 5.1594607159460715, "grad_norm": 1.6186655759811401, "learning_rate": 2.883326959342398e-05, "loss": 0.1141, "step": 11098 }, { "epoch": 5.160390516039052, "grad_norm": 1.2135052680969238, "learning_rate": 2.8910861626005688e-05, "loss": 0.0535, "step": 11100 }, { "epoch": 5.161320316132032, "grad_norm": 0.846235990524292, "learning_rate": 2.8988415059962024e-05, "loss": 0.0484, "step": 11102 }, { "epoch": 5.162250116225012, "grad_norm": 2.115225076675415, "learning_rate": 2.9065929129872084e-05, "loss": 0.0675, "step": 11104 }, { "epoch": 5.163179916317992, "grad_norm": 1.7884937524795532, "learning_rate": 2.914340307070294e-05, "loss": 0.0681, "step": 11106 }, { "epoch": 5.164109716410971, "grad_norm": 1.9819371700286865, "learning_rate": 2.9220836117818337e-05, "loss": 0.077, "step": 11108 }, { "epoch": 5.165039516503952, "grad_norm": 1.9104548692703247, "learning_rate": 2.9298227506985178e-05, "loss": 0.109, "step": 11110 }, { "epoch": 5.165969316596931, "grad_norm": 0.979775607585907, "learning_rate": 2.9375576474381793e-05, "loss": 0.0806, "step": 11112 }, { "epoch": 5.166899116689912, "grad_norm": 1.0888532400131226, "learning_rate": 2.9452882256605184e-05, "loss": 0.0644, "step": 11114 }, { "epoch": 5.1678289167828915, "grad_norm": 1.4200685024261475, "learning_rate": 2.9530144090678313e-05, "loss": 0.0676, "step": 11116 }, { "epoch": 5.168758716875872, "grad_norm": 2.3193681240081787, "learning_rate": 2.9607361214058335e-05, "loss": 0.1221, "step": 11118 }, { "epoch": 5.169688516968852, "grad_norm": 1.1054084300994873, "learning_rate": 2.968453286464307e-05, "loss": 0.0774, "step": 11120 }, { "epoch": 5.170618317061832, "grad_norm": 1.872146487236023, "learning_rate": 2.9761658280779664e-05, "loss": 0.0853, "step": 11122 }, { "epoch": 5.171548117154812, "grad_norm": 1.7935988903045654, "learning_rate": 2.9838736701271457e-05, "loss": 0.0727, "step": 11124 }, { "epoch": 5.172477917247791, "grad_norm": 2.529811143875122, "learning_rate": 2.9915767365385404e-05, "loss": 0.0816, "step": 11126 }, { "epoch": 5.173407717340772, "grad_norm": 1.9408584833145142, "learning_rate": 2.9992749512860122e-05, "loss": 0.1127, "step": 11128 }, { "epoch": 5.174337517433751, "grad_norm": 1.477112889289856, "learning_rate": 3.0069682383912708e-05, "loss": 0.0734, "step": 11130 }, { "epoch": 5.175267317526732, "grad_norm": 2.4046640396118164, "learning_rate": 3.014656521924697e-05, "loss": 0.0981, "step": 11132 }, { "epoch": 5.1761971176197115, "grad_norm": 1.4917956590652466, "learning_rate": 3.0223397260060265e-05, "loss": 0.0671, "step": 11134 }, { "epoch": 5.177126917712692, "grad_norm": 1.4354100227355957, "learning_rate": 3.0300177748051295e-05, "loss": 0.064, "step": 11136 }, { "epoch": 5.178056717805672, "grad_norm": 1.5973511934280396, "learning_rate": 3.03769059254278e-05, "loss": 0.0884, "step": 11138 }, { "epoch": 5.178986517898652, "grad_norm": 2.0413267612457275, "learning_rate": 3.0453581034913486e-05, "loss": 0.0788, "step": 11140 }, { "epoch": 5.179916317991632, "grad_norm": 2.7149460315704346, "learning_rate": 3.0530202319756144e-05, "loss": 0.0797, "step": 11142 }, { "epoch": 5.180846118084612, "grad_norm": 2.0406789779663086, "learning_rate": 3.060676902373453e-05, "loss": 0.1174, "step": 11144 }, { "epoch": 5.181775918177592, "grad_norm": 1.3339744806289673, "learning_rate": 3.068328039116611e-05, "loss": 0.0736, "step": 11146 }, { "epoch": 5.182705718270572, "grad_norm": 1.5914502143859863, "learning_rate": 3.0759735666914744e-05, "loss": 0.0831, "step": 11148 }, { "epoch": 5.183635518363552, "grad_norm": 1.2917993068695068, "learning_rate": 3.083613409639757e-05, "loss": 0.0663, "step": 11150 }, { "epoch": 5.1845653184565315, "grad_norm": 1.360071063041687, "learning_rate": 3.091247492559308e-05, "loss": 0.0866, "step": 11152 }, { "epoch": 5.185495118549512, "grad_norm": 2.4450371265411377, "learning_rate": 3.0988757401047955e-05, "loss": 0.0939, "step": 11154 }, { "epoch": 5.1864249186424916, "grad_norm": 2.175072193145752, "learning_rate": 3.106498076988505e-05, "loss": 0.1027, "step": 11156 }, { "epoch": 5.187354718735472, "grad_norm": 1.4904019832611084, "learning_rate": 3.1141144279810645e-05, "loss": 0.0803, "step": 11158 }, { "epoch": 5.188284518828452, "grad_norm": 1.4917701482772827, "learning_rate": 3.121724717912131e-05, "loss": 0.0794, "step": 11160 }, { "epoch": 5.189214318921432, "grad_norm": 1.9966301918029785, "learning_rate": 3.129328871671241e-05, "loss": 0.0775, "step": 11162 }, { "epoch": 5.190144119014412, "grad_norm": 1.4901390075683594, "learning_rate": 3.136926814208449e-05, "loss": 0.0976, "step": 11164 }, { "epoch": 5.191073919107392, "grad_norm": 1.2645851373672485, "learning_rate": 3.144518470535138e-05, "loss": 0.0999, "step": 11166 }, { "epoch": 5.192003719200372, "grad_norm": 1.299222469329834, "learning_rate": 3.152103765724734e-05, "loss": 0.0678, "step": 11168 }, { "epoch": 5.192933519293352, "grad_norm": 2.0527114868164062, "learning_rate": 3.159682624913419e-05, "loss": 0.0929, "step": 11170 }, { "epoch": 5.193863319386332, "grad_norm": 1.956254243850708, "learning_rate": 3.1672549733009376e-05, "loss": 0.0711, "step": 11172 }, { "epoch": 5.1947931194793115, "grad_norm": 1.1194896697998047, "learning_rate": 3.174820736151235e-05, "loss": 0.0765, "step": 11174 }, { "epoch": 5.195722919572292, "grad_norm": 2.3592116832733154, "learning_rate": 3.1823798387933026e-05, "loss": 0.1345, "step": 11176 }, { "epoch": 5.196652719665272, "grad_norm": 2.1487016677856445, "learning_rate": 3.1899322066218584e-05, "loss": 0.1015, "step": 11178 }, { "epoch": 5.197582519758252, "grad_norm": 1.729973316192627, "learning_rate": 3.197477765098062e-05, "loss": 0.0972, "step": 11180 }, { "epoch": 5.198512319851232, "grad_norm": 2.1152920722961426, "learning_rate": 3.205016439750316e-05, "loss": 0.0904, "step": 11182 }, { "epoch": 5.199442119944212, "grad_norm": 1.8803492784500122, "learning_rate": 3.212548156174928e-05, "loss": 0.0818, "step": 11184 }, { "epoch": 5.200371920037192, "grad_norm": 1.7058197259902954, "learning_rate": 3.2200728400369226e-05, "loss": 0.063, "step": 11186 }, { "epoch": 5.201301720130172, "grad_norm": 1.574267864227295, "learning_rate": 3.227590417070675e-05, "loss": 0.0892, "step": 11188 }, { "epoch": 5.202231520223152, "grad_norm": 1.6701850891113281, "learning_rate": 3.23510081308075e-05, "loss": 0.0988, "step": 11190 }, { "epoch": 5.203161320316132, "grad_norm": 1.479265809059143, "learning_rate": 3.2426039539425815e-05, "loss": 0.0728, "step": 11192 }, { "epoch": 5.204091120409112, "grad_norm": 1.8405462503433228, "learning_rate": 3.250099765603179e-05, "loss": 0.0945, "step": 11194 }, { "epoch": 5.205020920502092, "grad_norm": 1.4174997806549072, "learning_rate": 3.2575881740819273e-05, "loss": 0.1118, "step": 11196 }, { "epoch": 5.205950720595072, "grad_norm": 1.4903433322906494, "learning_rate": 3.26506910547124e-05, "loss": 0.0942, "step": 11198 }, { "epoch": 5.206880520688052, "grad_norm": 1.4177409410476685, "learning_rate": 3.2725424859373596e-05, "loss": 0.0868, "step": 11200 }, { "epoch": 5.207810320781032, "grad_norm": 2.3973066806793213, "learning_rate": 3.2800082417210315e-05, "loss": 0.1422, "step": 11202 }, { "epoch": 5.208740120874012, "grad_norm": 2.583517551422119, "learning_rate": 3.287466299138252e-05, "loss": 0.1062, "step": 11204 }, { "epoch": 5.209669920966992, "grad_norm": 1.1928949356079102, "learning_rate": 3.2949165845810215e-05, "loss": 0.0703, "step": 11206 }, { "epoch": 5.210599721059972, "grad_norm": 1.2239540815353394, "learning_rate": 3.302359024518013e-05, "loss": 0.107, "step": 11208 }, { "epoch": 5.211529521152952, "grad_norm": 2.33571457862854, "learning_rate": 3.309793545495358e-05, "loss": 0.1012, "step": 11210 }, { "epoch": 5.212459321245932, "grad_norm": 1.2043864727020264, "learning_rate": 3.317220074137344e-05, "loss": 0.1138, "step": 11212 }, { "epoch": 5.2133891213389125, "grad_norm": 1.8296688795089722, "learning_rate": 3.324638537147123e-05, "loss": 0.1016, "step": 11214 }, { "epoch": 5.214318921431892, "grad_norm": 1.440307855606079, "learning_rate": 3.332048861307461e-05, "loss": 0.0847, "step": 11216 }, { "epoch": 5.215248721524872, "grad_norm": 1.7288883924484253, "learning_rate": 3.339450973481442e-05, "loss": 0.1243, "step": 11218 }, { "epoch": 5.216178521617852, "grad_norm": 1.8922189474105835, "learning_rate": 3.346844800613214e-05, "loss": 0.1328, "step": 11220 }, { "epoch": 5.217108321710832, "grad_norm": 1.9221770763397217, "learning_rate": 3.354230269728697e-05, "loss": 0.0846, "step": 11222 }, { "epoch": 5.218038121803812, "grad_norm": 2.197085380554199, "learning_rate": 3.361607307936277e-05, "loss": 0.1277, "step": 11224 }, { "epoch": 5.218967921896792, "grad_norm": 1.7911341190338135, "learning_rate": 3.368975842427581e-05, "loss": 0.0885, "step": 11226 }, { "epoch": 5.219897721989772, "grad_norm": 1.5096545219421387, "learning_rate": 3.3763358004781394e-05, "loss": 0.0839, "step": 11228 }, { "epoch": 5.220827522082752, "grad_norm": 1.8931463956832886, "learning_rate": 3.3836871094481384e-05, "loss": 0.0961, "step": 11230 }, { "epoch": 5.2217573221757325, "grad_norm": 2.107968330383301, "learning_rate": 3.391029696783118e-05, "loss": 0.1123, "step": 11232 }, { "epoch": 5.222687122268712, "grad_norm": 1.6391505002975464, "learning_rate": 3.398363490014713e-05, "loss": 0.0741, "step": 11234 }, { "epoch": 5.223616922361693, "grad_norm": 2.2727880477905273, "learning_rate": 3.405688416761354e-05, "loss": 0.0942, "step": 11236 }, { "epoch": 5.224546722454672, "grad_norm": 1.599691390991211, "learning_rate": 3.413004404728955e-05, "loss": 0.0838, "step": 11238 }, { "epoch": 5.225476522547652, "grad_norm": 1.5289621353149414, "learning_rate": 3.4203113817116936e-05, "loss": 0.0724, "step": 11240 }, { "epoch": 5.226406322640632, "grad_norm": 1.5364100933074951, "learning_rate": 3.427609275592622e-05, "loss": 0.1012, "step": 11242 }, { "epoch": 5.227336122733612, "grad_norm": 1.8897290229797363, "learning_rate": 3.4348980143444906e-05, "loss": 0.1168, "step": 11244 }, { "epoch": 5.228265922826592, "grad_norm": 2.297315835952759, "learning_rate": 3.4421775260303993e-05, "loss": 0.0851, "step": 11246 }, { "epoch": 5.229195722919572, "grad_norm": 1.3774179220199585, "learning_rate": 3.4494477388044906e-05, "loss": 0.0706, "step": 11248 }, { "epoch": 5.2301255230125525, "grad_norm": 1.6587481498718262, "learning_rate": 3.456708580912716e-05, "loss": 0.0985, "step": 11250 }, { "epoch": 5.231055323105532, "grad_norm": 1.5946284532546997, "learning_rate": 3.463959980693479e-05, "loss": 0.1076, "step": 11252 }, { "epoch": 5.231985123198513, "grad_norm": 2.298661708831787, "learning_rate": 3.471201866578407e-05, "loss": 0.1016, "step": 11254 }, { "epoch": 5.232914923291492, "grad_norm": 1.2447930574417114, "learning_rate": 3.478434167093001e-05, "loss": 0.0781, "step": 11256 }, { "epoch": 5.233844723384473, "grad_norm": 1.9317702054977417, "learning_rate": 3.485656810857368e-05, "loss": 0.1135, "step": 11258 }, { "epoch": 5.234774523477452, "grad_norm": 1.8892072439193726, "learning_rate": 3.4928697265869454e-05, "loss": 0.0839, "step": 11260 }, { "epoch": 5.235704323570433, "grad_norm": 1.8717846870422363, "learning_rate": 3.500072843093151e-05, "loss": 0.0885, "step": 11262 }, { "epoch": 5.236634123663412, "grad_norm": 1.3314261436462402, "learning_rate": 3.507266089284141e-05, "loss": 0.0644, "step": 11264 }, { "epoch": 5.237563923756392, "grad_norm": 1.0393916368484497, "learning_rate": 3.5144493941654886e-05, "loss": 0.0769, "step": 11266 }, { "epoch": 5.2384937238493725, "grad_norm": 2.0877010822296143, "learning_rate": 3.5216226868408645e-05, "loss": 0.1018, "step": 11268 }, { "epoch": 5.239423523942352, "grad_norm": 1.7318999767303467, "learning_rate": 3.528785896512768e-05, "loss": 0.0752, "step": 11270 }, { "epoch": 5.240353324035333, "grad_norm": 1.8167376518249512, "learning_rate": 3.535938952483203e-05, "loss": 0.0861, "step": 11272 }, { "epoch": 5.241283124128312, "grad_norm": 1.8807436227798462, "learning_rate": 3.54308178415441e-05, "loss": 0.0951, "step": 11274 }, { "epoch": 5.242212924221293, "grad_norm": 2.0322265625, "learning_rate": 3.5502143210295077e-05, "loss": 0.097, "step": 11276 }, { "epoch": 5.243142724314272, "grad_norm": 2.1281898021698, "learning_rate": 3.557336492713245e-05, "loss": 0.1449, "step": 11278 }, { "epoch": 5.244072524407253, "grad_norm": 0.9990894198417664, "learning_rate": 3.5644482289126726e-05, "loss": 0.0785, "step": 11280 }, { "epoch": 5.245002324500232, "grad_norm": 1.8806668519973755, "learning_rate": 3.5715494594378155e-05, "loss": 0.1168, "step": 11282 }, { "epoch": 5.245932124593212, "grad_norm": 2.3310134410858154, "learning_rate": 3.578640114202396e-05, "loss": 0.1326, "step": 11284 }, { "epoch": 5.2468619246861925, "grad_norm": 1.0236868858337402, "learning_rate": 3.585720123224506e-05, "loss": 0.0518, "step": 11286 }, { "epoch": 5.247791724779172, "grad_norm": 2.51246976852417, "learning_rate": 3.592789416627322e-05, "loss": 0.0906, "step": 11288 }, { "epoch": 5.248721524872153, "grad_norm": 1.7576264142990112, "learning_rate": 3.599847924639781e-05, "loss": 0.1152, "step": 11290 }, { "epoch": 5.249651324965132, "grad_norm": 0.9916730523109436, "learning_rate": 3.6068955775972426e-05, "loss": 0.0457, "step": 11292 }, { "epoch": 5.250581125058113, "grad_norm": 2.0876212120056152, "learning_rate": 3.6139323059422334e-05, "loss": 0.1102, "step": 11294 }, { "epoch": 5.251510925151092, "grad_norm": 2.152745485305786, "learning_rate": 3.6209580402250775e-05, "loss": 0.0873, "step": 11296 }, { "epoch": 5.252440725244073, "grad_norm": 1.9583994150161743, "learning_rate": 3.627972711104606e-05, "loss": 0.0931, "step": 11298 }, { "epoch": 5.253370525337052, "grad_norm": 1.8593000173568726, "learning_rate": 3.634976249348864e-05, "loss": 0.0785, "step": 11300 }, { "epoch": 5.254300325430033, "grad_norm": 1.3899953365325928, "learning_rate": 3.641968585835742e-05, "loss": 0.0861, "step": 11302 }, { "epoch": 5.2552301255230125, "grad_norm": 2.4593896865844727, "learning_rate": 3.6489496515537156e-05, "loss": 0.1095, "step": 11304 }, { "epoch": 5.256159925615993, "grad_norm": 2.284862518310547, "learning_rate": 3.6559193776024706e-05, "loss": 0.1295, "step": 11306 }, { "epoch": 5.257089725708973, "grad_norm": 1.877328872680664, "learning_rate": 3.662877695193641e-05, "loss": 0.0984, "step": 11308 }, { "epoch": 5.258019525801952, "grad_norm": 1.6364229917526245, "learning_rate": 3.669824535651432e-05, "loss": 0.0943, "step": 11310 }, { "epoch": 5.258949325894933, "grad_norm": 1.7444112300872803, "learning_rate": 3.6767598304133256e-05, "loss": 0.0555, "step": 11312 }, { "epoch": 5.259879125987912, "grad_norm": 1.7216237783432007, "learning_rate": 3.683683511030778e-05, "loss": 0.1338, "step": 11314 }, { "epoch": 5.260808926080893, "grad_norm": 1.4406601190567017, "learning_rate": 3.690595509169842e-05, "loss": 0.0778, "step": 11316 }, { "epoch": 5.261738726173872, "grad_norm": 1.4580053091049194, "learning_rate": 3.6974957566119e-05, "loss": 0.097, "step": 11318 }, { "epoch": 5.262668526266853, "grad_norm": 2.465404510498047, "learning_rate": 3.704384185254281e-05, "loss": 0.1312, "step": 11320 }, { "epoch": 5.2635983263598325, "grad_norm": 2.486724853515625, "learning_rate": 3.7112607271109836e-05, "loss": 0.12, "step": 11322 }, { "epoch": 5.264528126452813, "grad_norm": 2.3581063747406006, "learning_rate": 3.7181253143133315e-05, "loss": 0.1003, "step": 11324 }, { "epoch": 5.265457926545793, "grad_norm": 1.9905922412872314, "learning_rate": 3.7249778791105886e-05, "loss": 0.0797, "step": 11326 }, { "epoch": 5.266387726638772, "grad_norm": 2.3152265548706055, "learning_rate": 3.7318183538707294e-05, "loss": 0.0759, "step": 11328 }, { "epoch": 5.267317526731753, "grad_norm": 2.456657886505127, "learning_rate": 3.738646671081015e-05, "loss": 0.1054, "step": 11330 }, { "epoch": 5.268247326824732, "grad_norm": 2.517679214477539, "learning_rate": 3.745462763348719e-05, "loss": 0.1148, "step": 11332 }, { "epoch": 5.269177126917713, "grad_norm": 1.8364942073822021, "learning_rate": 3.7522665634017697e-05, "loss": 0.1149, "step": 11334 }, { "epoch": 5.270106927010692, "grad_norm": 1.4396495819091797, "learning_rate": 3.7590580040894e-05, "loss": 0.0899, "step": 11336 }, { "epoch": 5.271036727103673, "grad_norm": 2.2425150871276855, "learning_rate": 3.7658370183828325e-05, "loss": 0.1255, "step": 11338 }, { "epoch": 5.2719665271966525, "grad_norm": 1.2200160026550293, "learning_rate": 3.7726035393759264e-05, "loss": 0.0657, "step": 11340 }, { "epoch": 5.272896327289633, "grad_norm": 2.1287434101104736, "learning_rate": 3.779357500285856e-05, "loss": 0.091, "step": 11342 }, { "epoch": 5.273826127382613, "grad_norm": 3.3361425399780273, "learning_rate": 3.786098834453763e-05, "loss": 0.1309, "step": 11344 }, { "epoch": 5.274755927475593, "grad_norm": 1.4539604187011719, "learning_rate": 3.7928274753453855e-05, "loss": 0.0952, "step": 11346 }, { "epoch": 5.275685727568573, "grad_norm": 1.4602144956588745, "learning_rate": 3.79954335655177e-05, "loss": 0.055, "step": 11348 }, { "epoch": 5.276615527661553, "grad_norm": 1.960712194442749, "learning_rate": 3.806246411789872e-05, "loss": 0.1001, "step": 11350 }, { "epoch": 5.277545327754533, "grad_norm": 1.295759677886963, "learning_rate": 3.812936574903243e-05, "loss": 0.1191, "step": 11352 }, { "epoch": 5.278475127847512, "grad_norm": 1.8985122442245483, "learning_rate": 3.819613779862665e-05, "loss": 0.0631, "step": 11354 }, { "epoch": 5.279404927940493, "grad_norm": 1.404944896697998, "learning_rate": 3.8262779607668306e-05, "loss": 0.0574, "step": 11356 }, { "epoch": 5.2803347280334725, "grad_norm": 2.419867515563965, "learning_rate": 3.83292905184297e-05, "loss": 0.1272, "step": 11358 }, { "epoch": 5.281264528126453, "grad_norm": 1.4163614511489868, "learning_rate": 3.839566987447485e-05, "loss": 0.0894, "step": 11360 }, { "epoch": 5.282194328219433, "grad_norm": 1.7941302061080933, "learning_rate": 3.846191702066647e-05, "loss": 0.0982, "step": 11362 }, { "epoch": 5.283124128312413, "grad_norm": 1.6362106800079346, "learning_rate": 3.8528031303171895e-05, "loss": 0.095, "step": 11364 }, { "epoch": 5.284053928405393, "grad_norm": 1.7592780590057373, "learning_rate": 3.8594012069469776e-05, "loss": 0.1259, "step": 11366 }, { "epoch": 5.284983728498373, "grad_norm": 1.6937856674194336, "learning_rate": 3.8659858668356725e-05, "loss": 0.0922, "step": 11368 }, { "epoch": 5.285913528591353, "grad_norm": 1.8944251537322998, "learning_rate": 3.8725570449953245e-05, "loss": 0.1156, "step": 11370 }, { "epoch": 5.286843328684333, "grad_norm": 2.237471342086792, "learning_rate": 3.879114676571074e-05, "loss": 0.1111, "step": 11372 }, { "epoch": 5.287773128777313, "grad_norm": 1.5272622108459473, "learning_rate": 3.8856586968417293e-05, "loss": 0.0561, "step": 11374 }, { "epoch": 5.2887029288702925, "grad_norm": 2.6301655769348145, "learning_rate": 3.89218904122046e-05, "loss": 0.0964, "step": 11376 }, { "epoch": 5.289632728963273, "grad_norm": 2.301297187805176, "learning_rate": 3.8987056452554184e-05, "loss": 0.0889, "step": 11378 }, { "epoch": 5.290562529056253, "grad_norm": 4.285898208618164, "learning_rate": 3.905208444630323e-05, "loss": 0.0971, "step": 11380 }, { "epoch": 5.291492329149233, "grad_norm": 1.670302152633667, "learning_rate": 3.9116973751651926e-05, "loss": 0.0801, "step": 11382 }, { "epoch": 5.292422129242213, "grad_norm": 2.3147850036621094, "learning_rate": 3.918172372816887e-05, "loss": 0.0535, "step": 11384 }, { "epoch": 5.293351929335193, "grad_norm": 1.3866766691207886, "learning_rate": 3.9246333736798014e-05, "loss": 0.1409, "step": 11386 }, { "epoch": 5.294281729428173, "grad_norm": 1.332489252090454, "learning_rate": 3.931080313986472e-05, "loss": 0.0882, "step": 11388 }, { "epoch": 5.295211529521153, "grad_norm": 1.9111614227294922, "learning_rate": 3.937513130108188e-05, "loss": 0.1024, "step": 11390 }, { "epoch": 5.296141329614133, "grad_norm": 1.898848056793213, "learning_rate": 3.943931758555671e-05, "loss": 0.0851, "step": 11392 }, { "epoch": 5.297071129707113, "grad_norm": 1.1371184587478638, "learning_rate": 3.950336135979621e-05, "loss": 0.0709, "step": 11394 }, { "epoch": 5.298000929800093, "grad_norm": 1.9778186082839966, "learning_rate": 3.956726199171441e-05, "loss": 0.0934, "step": 11396 }, { "epoch": 5.298930729893073, "grad_norm": 1.7803287506103516, "learning_rate": 3.963101885063772e-05, "loss": 0.0733, "step": 11398 }, { "epoch": 5.299860529986053, "grad_norm": 1.9970037937164307, "learning_rate": 3.9694631307311755e-05, "loss": 0.0906, "step": 11400 }, { "epoch": 5.300790330079033, "grad_norm": 2.1106204986572266, "learning_rate": 3.9758098733907316e-05, "loss": 0.1051, "step": 11402 }, { "epoch": 5.301720130172013, "grad_norm": 2.5044541358947754, "learning_rate": 3.98214205040264e-05, "loss": 0.0845, "step": 11404 }, { "epoch": 5.302649930264993, "grad_norm": 1.6073615550994873, "learning_rate": 3.988459599270889e-05, "loss": 0.0849, "step": 11406 }, { "epoch": 5.303579730357973, "grad_norm": 1.6361602544784546, "learning_rate": 3.9947624576437955e-05, "loss": 0.0859, "step": 11408 }, { "epoch": 5.304509530450953, "grad_norm": 2.2464001178741455, "learning_rate": 4.001050563314704e-05, "loss": 0.1354, "step": 11410 }, { "epoch": 5.305439330543933, "grad_norm": 1.8336480855941772, "learning_rate": 4.0073238542225585e-05, "loss": 0.1097, "step": 11412 }, { "epoch": 5.306369130636913, "grad_norm": 1.4459197521209717, "learning_rate": 4.0135822684524956e-05, "loss": 0.0939, "step": 11414 }, { "epoch": 5.3072989307298934, "grad_norm": 2.2981925010681152, "learning_rate": 4.019825744236509e-05, "loss": 0.1544, "step": 11416 }, { "epoch": 5.308228730822873, "grad_norm": 2.20123553276062, "learning_rate": 4.026054219953998e-05, "loss": 0.1357, "step": 11418 }, { "epoch": 5.3091585309158535, "grad_norm": 2.336221694946289, "learning_rate": 4.032267634132436e-05, "loss": 0.1154, "step": 11420 }, { "epoch": 5.310088331008833, "grad_norm": 2.073302984237671, "learning_rate": 4.038465925447928e-05, "loss": 0.1472, "step": 11422 }, { "epoch": 5.311018131101813, "grad_norm": 2.4125449657440186, "learning_rate": 4.044649032725831e-05, "loss": 0.1191, "step": 11424 }, { "epoch": 5.311947931194793, "grad_norm": 1.941807746887207, "learning_rate": 4.050816894941388e-05, "loss": 0.1084, "step": 11426 }, { "epoch": 5.312877731287773, "grad_norm": 2.9095187187194824, "learning_rate": 4.056969451220276e-05, "loss": 0.132, "step": 11428 }, { "epoch": 5.313807531380753, "grad_norm": 2.8945846557617188, "learning_rate": 4.0631066408392536e-05, "loss": 0.1003, "step": 11430 }, { "epoch": 5.314737331473733, "grad_norm": 1.9887511730194092, "learning_rate": 4.069228403226753e-05, "loss": 0.1252, "step": 11432 }, { "epoch": 5.315667131566713, "grad_norm": 2.66692852973938, "learning_rate": 4.0753346779634204e-05, "loss": 0.1482, "step": 11434 }, { "epoch": 5.316596931659693, "grad_norm": 2.027012586593628, "learning_rate": 4.081425404782812e-05, "loss": 0.0965, "step": 11436 }, { "epoch": 5.3175267317526735, "grad_norm": 2.626821994781494, "learning_rate": 4.087500523571898e-05, "loss": 0.1247, "step": 11438 }, { "epoch": 5.318456531845653, "grad_norm": 1.988160252571106, "learning_rate": 4.0935599743717234e-05, "loss": 0.0932, "step": 11440 }, { "epoch": 5.319386331938633, "grad_norm": 2.163687229156494, "learning_rate": 4.099603697377942e-05, "loss": 0.124, "step": 11442 }, { "epoch": 5.320316132031613, "grad_norm": 2.7263569831848145, "learning_rate": 4.1056316329414514e-05, "loss": 0.1371, "step": 11444 }, { "epoch": 5.321245932124593, "grad_norm": 2.226001262664795, "learning_rate": 4.111643721568979e-05, "loss": 0.0938, "step": 11446 }, { "epoch": 5.322175732217573, "grad_norm": 1.9844993352890015, "learning_rate": 4.117639903923608e-05, "loss": 0.0868, "step": 11448 }, { "epoch": 5.323105532310553, "grad_norm": 2.833519697189331, "learning_rate": 4.123620120825458e-05, "loss": 0.1502, "step": 11450 }, { "epoch": 5.324035332403533, "grad_norm": 2.386953353881836, "learning_rate": 4.129584313252192e-05, "loss": 0.1383, "step": 11452 }, { "epoch": 5.324965132496513, "grad_norm": 1.8518837690353394, "learning_rate": 4.135532422339645e-05, "loss": 0.0961, "step": 11454 }, { "epoch": 5.3258949325894935, "grad_norm": 2.2450168132781982, "learning_rate": 4.141464389382386e-05, "loss": 0.0929, "step": 11456 }, { "epoch": 5.326824732682473, "grad_norm": 1.630490779876709, "learning_rate": 4.1473801558342826e-05, "loss": 0.0967, "step": 11458 }, { "epoch": 5.327754532775454, "grad_norm": 1.552894949913025, "learning_rate": 4.1532796633091296e-05, "loss": 0.0822, "step": 11460 }, { "epoch": 5.328684332868433, "grad_norm": 1.2568556070327759, "learning_rate": 4.159162853581142e-05, "loss": 0.0721, "step": 11462 }, { "epoch": 5.329614132961414, "grad_norm": 2.288292407989502, "learning_rate": 4.165029668585622e-05, "loss": 0.0898, "step": 11464 }, { "epoch": 5.330543933054393, "grad_norm": 2.1545231342315674, "learning_rate": 4.1708800504194786e-05, "loss": 0.1223, "step": 11466 }, { "epoch": 5.331473733147373, "grad_norm": 2.0439212322235107, "learning_rate": 4.176713941341792e-05, "loss": 0.095, "step": 11468 }, { "epoch": 5.332403533240353, "grad_norm": 1.5042338371276855, "learning_rate": 4.182531283774428e-05, "loss": 0.1133, "step": 11470 }, { "epoch": 5.333333333333333, "grad_norm": 1.8512599468231201, "learning_rate": 4.188332020302552e-05, "loss": 0.0823, "step": 11472 }, { "epoch": 5.3342631334263135, "grad_norm": 1.6908984184265137, "learning_rate": 4.1941160936752495e-05, "loss": 0.0586, "step": 11474 }, { "epoch": 5.335192933519293, "grad_norm": 1.7031713724136353, "learning_rate": 4.1998834468060454e-05, "loss": 0.0912, "step": 11476 }, { "epoch": 5.336122733612274, "grad_norm": 1.5753668546676636, "learning_rate": 4.2056340227734845e-05, "loss": 0.0842, "step": 11478 }, { "epoch": 5.337052533705253, "grad_norm": 1.9848612546920776, "learning_rate": 4.211367764821717e-05, "loss": 0.075, "step": 11480 }, { "epoch": 5.337982333798234, "grad_norm": 2.2176876068115234, "learning_rate": 4.217084616361012e-05, "loss": 0.1039, "step": 11482 }, { "epoch": 5.338912133891213, "grad_norm": 1.9406460523605347, "learning_rate": 4.2227845209683655e-05, "loss": 0.1202, "step": 11484 }, { "epoch": 5.339841933984193, "grad_norm": 2.7035505771636963, "learning_rate": 4.2284674223880065e-05, "loss": 0.104, "step": 11486 }, { "epoch": 5.340771734077173, "grad_norm": 1.8226118087768555, "learning_rate": 4.234133264532006e-05, "loss": 0.0938, "step": 11488 }, { "epoch": 5.341701534170153, "grad_norm": 2.653550386428833, "learning_rate": 4.239781991480782e-05, "loss": 0.1218, "step": 11490 }, { "epoch": 5.3426313342631335, "grad_norm": 2.2135233879089355, "learning_rate": 4.245413547483675e-05, "loss": 0.11, "step": 11492 }, { "epoch": 5.343561134356113, "grad_norm": 2.694705009460449, "learning_rate": 4.251027876959513e-05, "loss": 0.1385, "step": 11494 }, { "epoch": 5.344490934449094, "grad_norm": 2.471039295196533, "learning_rate": 4.2566249244971164e-05, "loss": 0.136, "step": 11496 }, { "epoch": 5.345420734542073, "grad_norm": 2.990072250366211, "learning_rate": 4.262204634855893e-05, "loss": 0.13, "step": 11498 }, { "epoch": 5.346350534635054, "grad_norm": 2.195366144180298, "learning_rate": 4.2677669529663615e-05, "loss": 0.1004, "step": 11500 }, { "epoch": 5.347280334728033, "grad_norm": 1.9143753051757812, "learning_rate": 4.2733118239306806e-05, "loss": 0.1346, "step": 11502 }, { "epoch": 5.348210134821014, "grad_norm": 1.5260533094406128, "learning_rate": 4.278839193023213e-05, "loss": 0.0983, "step": 11504 }, { "epoch": 5.349139934913993, "grad_norm": 1.5649428367614746, "learning_rate": 4.2843490056910494e-05, "loss": 0.1277, "step": 11506 }, { "epoch": 5.350069735006974, "grad_norm": 1.6205252408981323, "learning_rate": 4.2898412075545705e-05, "loss": 0.0809, "step": 11508 }, { "epoch": 5.3509995350999535, "grad_norm": 1.5659239292144775, "learning_rate": 4.2953157444079665e-05, "loss": 0.1026, "step": 11510 }, { "epoch": 5.351929335192933, "grad_norm": 1.4845839738845825, "learning_rate": 4.300772562219759e-05, "loss": 0.087, "step": 11512 }, { "epoch": 5.352859135285914, "grad_norm": 1.2029093503952026, "learning_rate": 4.306211607133369e-05, "loss": 0.0538, "step": 11514 }, { "epoch": 5.353788935378893, "grad_norm": 0.9126359224319458, "learning_rate": 4.3116328254676145e-05, "loss": 0.093, "step": 11516 }, { "epoch": 5.354718735471874, "grad_norm": 1.0192923545837402, "learning_rate": 4.317036163717252e-05, "loss": 0.066, "step": 11518 }, { "epoch": 5.355648535564853, "grad_norm": 1.9714549779891968, "learning_rate": 4.322421568553526e-05, "loss": 0.1279, "step": 11520 }, { "epoch": 5.356578335657834, "grad_norm": 1.4170626401901245, "learning_rate": 4.327788986824654e-05, "loss": 0.0732, "step": 11522 }, { "epoch": 5.357508135750813, "grad_norm": 2.524904251098633, "learning_rate": 4.333138365556396e-05, "loss": 0.1047, "step": 11524 }, { "epoch": 5.358437935843794, "grad_norm": 1.91628897190094, "learning_rate": 4.338469651952532e-05, "loss": 0.1419, "step": 11526 }, { "epoch": 5.3593677359367735, "grad_norm": 2.905731439590454, "learning_rate": 4.343782793395436e-05, "loss": 0.1199, "step": 11528 }, { "epoch": 5.360297536029754, "grad_norm": 2.006937265396118, "learning_rate": 4.3490777374465206e-05, "loss": 0.0875, "step": 11530 }, { "epoch": 5.361227336122734, "grad_norm": 1.3017675876617432, "learning_rate": 4.354354431846841e-05, "loss": 0.0863, "step": 11532 }, { "epoch": 5.362157136215713, "grad_norm": 2.5263593196868896, "learning_rate": 4.359612824517559e-05, "loss": 0.1108, "step": 11534 }, { "epoch": 5.363086936308694, "grad_norm": 1.658360242843628, "learning_rate": 4.3648528635604474e-05, "loss": 0.1027, "step": 11536 }, { "epoch": 5.364016736401673, "grad_norm": 1.8428912162780762, "learning_rate": 4.370074497258451e-05, "loss": 0.0805, "step": 11538 }, { "epoch": 5.364946536494654, "grad_norm": 1.5796273946762085, "learning_rate": 4.375277674076141e-05, "loss": 0.1246, "step": 11540 }, { "epoch": 5.365876336587633, "grad_norm": 1.9814680814743042, "learning_rate": 4.380462342660274e-05, "loss": 0.1164, "step": 11542 }, { "epoch": 5.366806136680614, "grad_norm": 1.8096002340316772, "learning_rate": 4.385628451840257e-05, "loss": 0.0965, "step": 11544 }, { "epoch": 5.3677359367735935, "grad_norm": 2.142352342605591, "learning_rate": 4.3907759506286745e-05, "loss": 0.1152, "step": 11546 }, { "epoch": 5.368665736866574, "grad_norm": 1.7390378713607788, "learning_rate": 4.3959047882218024e-05, "loss": 0.0991, "step": 11548 }, { "epoch": 5.369595536959554, "grad_norm": 1.4798375368118286, "learning_rate": 4.401014914000072e-05, "loss": 0.1105, "step": 11550 }, { "epoch": 5.370525337052534, "grad_norm": 1.8578903675079346, "learning_rate": 4.406106277528612e-05, "loss": 0.1109, "step": 11552 }, { "epoch": 5.371455137145514, "grad_norm": 2.594890832901001, "learning_rate": 4.411178828557725e-05, "loss": 0.1121, "step": 11554 }, { "epoch": 5.372384937238493, "grad_norm": 1.7592145204544067, "learning_rate": 4.416232517023372e-05, "loss": 0.1306, "step": 11556 }, { "epoch": 5.373314737331474, "grad_norm": 2.1334269046783447, "learning_rate": 4.421267293047691e-05, "loss": 0.0937, "step": 11558 }, { "epoch": 5.374244537424453, "grad_norm": 2.657442569732666, "learning_rate": 4.4262831069394696e-05, "loss": 0.1323, "step": 11560 }, { "epoch": 5.375174337517434, "grad_norm": 2.2011539936065674, "learning_rate": 4.431279909194654e-05, "loss": 0.1348, "step": 11562 }, { "epoch": 5.3761041376104135, "grad_norm": 1.2952550649642944, "learning_rate": 4.4362576504968296e-05, "loss": 0.0965, "step": 11564 }, { "epoch": 5.377033937703394, "grad_norm": 2.6670000553131104, "learning_rate": 4.441216281717689e-05, "loss": 0.1361, "step": 11566 }, { "epoch": 5.377963737796374, "grad_norm": 1.7321116924285889, "learning_rate": 4.4461557539175526e-05, "loss": 0.0725, "step": 11568 }, { "epoch": 5.378893537889354, "grad_norm": 1.7977274656295776, "learning_rate": 4.451076018345823e-05, "loss": 0.1151, "step": 11570 }, { "epoch": 5.379823337982334, "grad_norm": 1.4992939233779907, "learning_rate": 4.455977026441471e-05, "loss": 0.1326, "step": 11572 }, { "epoch": 5.380753138075314, "grad_norm": 2.565723180770874, "learning_rate": 4.460858729833523e-05, "loss": 0.1209, "step": 11574 }, { "epoch": 5.381682938168294, "grad_norm": 1.5369479656219482, "learning_rate": 4.4657210803415417e-05, "loss": 0.0945, "step": 11576 }, { "epoch": 5.382612738261274, "grad_norm": 2.048795700073242, "learning_rate": 4.4705640299760966e-05, "loss": 0.0981, "step": 11578 }, { "epoch": 5.383542538354254, "grad_norm": 2.7538375854492188, "learning_rate": 4.47538753093922e-05, "loss": 0.1221, "step": 11580 }, { "epoch": 5.3844723384472335, "grad_norm": 1.8352173566818237, "learning_rate": 4.480191535624914e-05, "loss": 0.0898, "step": 11582 }, { "epoch": 5.385402138540214, "grad_norm": 1.7872314453125, "learning_rate": 4.4849759966195866e-05, "loss": 0.0974, "step": 11584 }, { "epoch": 5.386331938633194, "grad_norm": 1.0448623895645142, "learning_rate": 4.489740866702535e-05, "loss": 0.088, "step": 11586 }, { "epoch": 5.387261738726174, "grad_norm": 2.4310684204101562, "learning_rate": 4.4944860988464255e-05, "loss": 0.1098, "step": 11588 }, { "epoch": 5.388191538819154, "grad_norm": 1.7633298635482788, "learning_rate": 4.499211646217721e-05, "loss": 0.0943, "step": 11590 }, { "epoch": 5.389121338912134, "grad_norm": 1.431494116783142, "learning_rate": 4.5039174621771877e-05, "loss": 0.0901, "step": 11592 }, { "epoch": 5.390051139005114, "grad_norm": 1.789330244064331, "learning_rate": 4.508603500280312e-05, "loss": 0.0686, "step": 11594 }, { "epoch": 5.390980939098094, "grad_norm": 2.4541308879852295, "learning_rate": 4.513269714277796e-05, "loss": 0.0998, "step": 11596 }, { "epoch": 5.391910739191074, "grad_norm": 1.5370616912841797, "learning_rate": 4.517916058116e-05, "loss": 0.096, "step": 11598 }, { "epoch": 5.3928405392840535, "grad_norm": 1.0903874635696411, "learning_rate": 4.522542485937365e-05, "loss": 0.0881, "step": 11600 }, { "epoch": 5.393770339377034, "grad_norm": 1.8697644472122192, "learning_rate": 4.5271489520809326e-05, "loss": 0.0946, "step": 11602 }, { "epoch": 5.394700139470014, "grad_norm": 1.3854204416275024, "learning_rate": 4.5317354110827304e-05, "loss": 0.1008, "step": 11604 }, { "epoch": 5.395629939562994, "grad_norm": 2.6887855529785156, "learning_rate": 4.536301817676266e-05, "loss": 0.1298, "step": 11606 }, { "epoch": 5.396559739655974, "grad_norm": 1.6542638540267944, "learning_rate": 4.5408481267929544e-05, "loss": 0.1068, "step": 11608 }, { "epoch": 5.397489539748954, "grad_norm": 1.8354402780532837, "learning_rate": 4.545374293562551e-05, "loss": 0.1413, "step": 11610 }, { "epoch": 5.398419339841934, "grad_norm": 1.3672248125076294, "learning_rate": 4.549880273313631e-05, "loss": 0.1115, "step": 11612 }, { "epoch": 5.399349139934914, "grad_norm": 1.5183223485946655, "learning_rate": 4.554366021573972e-05, "loss": 0.0979, "step": 11614 }, { "epoch": 5.400278940027894, "grad_norm": 0.6600474715232849, "learning_rate": 4.558831494071067e-05, "loss": 0.0511, "step": 11616 }, { "epoch": 5.401208740120874, "grad_norm": 2.5264148712158203, "learning_rate": 4.5632766467324945e-05, "loss": 0.1395, "step": 11618 }, { "epoch": 5.402138540213854, "grad_norm": 2.209649085998535, "learning_rate": 4.567701435686397e-05, "loss": 0.0872, "step": 11620 }, { "epoch": 5.4030683403068345, "grad_norm": 2.005600929260254, "learning_rate": 4.5721058172618996e-05, "loss": 0.0892, "step": 11622 }, { "epoch": 5.403998140399814, "grad_norm": 2.194577217102051, "learning_rate": 4.576489747989528e-05, "loss": 0.1521, "step": 11624 }, { "epoch": 5.404927940492794, "grad_norm": 0.8155162930488586, "learning_rate": 4.5808531846016574e-05, "loss": 0.0983, "step": 11626 }, { "epoch": 5.405857740585774, "grad_norm": 1.8537741899490356, "learning_rate": 4.585196084032924e-05, "loss": 0.1236, "step": 11628 }, { "epoch": 5.406787540678754, "grad_norm": 1.2617179155349731, "learning_rate": 4.589518403420669e-05, "loss": 0.1357, "step": 11630 }, { "epoch": 5.407717340771734, "grad_norm": 2.3203861713409424, "learning_rate": 4.5938201001053495e-05, "loss": 0.1044, "step": 11632 }, { "epoch": 5.408647140864714, "grad_norm": 2.492465019226074, "learning_rate": 4.598101131630948e-05, "loss": 0.1014, "step": 11634 }, { "epoch": 5.409576940957694, "grad_norm": 2.4575765132904053, "learning_rate": 4.602361455745418e-05, "loss": 0.1334, "step": 11636 }, { "epoch": 5.410506741050674, "grad_norm": 2.712552070617676, "learning_rate": 4.6066010304010773e-05, "loss": 0.104, "step": 11638 }, { "epoch": 5.4114365411436545, "grad_norm": 2.6647660732269287, "learning_rate": 4.6108198137550305e-05, "loss": 0.1283, "step": 11640 }, { "epoch": 5.412366341236634, "grad_norm": 2.2328505516052246, "learning_rate": 4.6150177641696e-05, "loss": 0.1113, "step": 11642 }, { "epoch": 5.413296141329614, "grad_norm": 1.7551579475402832, "learning_rate": 4.6191948402127e-05, "loss": 0.0888, "step": 11644 }, { "epoch": 5.414225941422594, "grad_norm": 2.129760980606079, "learning_rate": 4.6233510006582846e-05, "loss": 0.1264, "step": 11646 }, { "epoch": 5.415155741515574, "grad_norm": 2.062561511993408, "learning_rate": 4.627486204486721e-05, "loss": 0.1321, "step": 11648 }, { "epoch": 5.416085541608554, "grad_norm": 3.2503623962402344, "learning_rate": 4.6316004108852197e-05, "loss": 0.1422, "step": 11650 }, { "epoch": 5.417015341701534, "grad_norm": 2.3973312377929688, "learning_rate": 4.635693579248233e-05, "loss": 0.0957, "step": 11652 }, { "epoch": 5.417945141794514, "grad_norm": 2.7722744941711426, "learning_rate": 4.639765669177825e-05, "loss": 0.13, "step": 11654 }, { "epoch": 5.418874941887494, "grad_norm": 2.785311698913574, "learning_rate": 4.643816640484125e-05, "loss": 0.159, "step": 11656 }, { "epoch": 5.4198047419804745, "grad_norm": 2.567335367202759, "learning_rate": 4.647846453185673e-05, "loss": 0.1306, "step": 11658 }, { "epoch": 5.420734542073454, "grad_norm": 2.3532044887542725, "learning_rate": 4.651855067509852e-05, "loss": 0.1062, "step": 11660 }, { "epoch": 5.421664342166435, "grad_norm": 2.760995626449585, "learning_rate": 4.6558424438932504e-05, "loss": 0.1029, "step": 11662 }, { "epoch": 5.422594142259414, "grad_norm": 2.2725772857666016, "learning_rate": 4.6598085429820766e-05, "loss": 0.0943, "step": 11664 }, { "epoch": 5.423523942352395, "grad_norm": 1.521316647529602, "learning_rate": 4.663753325632543e-05, "loss": 0.0825, "step": 11666 }, { "epoch": 5.424453742445374, "grad_norm": 2.567868709564209, "learning_rate": 4.667676752911219e-05, "loss": 0.1539, "step": 11668 }, { "epoch": 5.425383542538354, "grad_norm": 2.613746404647827, "learning_rate": 4.671578786095473e-05, "loss": 0.1226, "step": 11670 }, { "epoch": 5.426313342631334, "grad_norm": 2.0784683227539062, "learning_rate": 4.6754593866738066e-05, "loss": 0.1239, "step": 11672 }, { "epoch": 5.427243142724314, "grad_norm": 2.0960094928741455, "learning_rate": 4.679318516346263e-05, "loss": 0.1114, "step": 11674 }, { "epoch": 5.4281729428172945, "grad_norm": 4.208705425262451, "learning_rate": 4.6831561370247934e-05, "loss": 0.2744, "step": 11676 }, { "epoch": 5.429102742910274, "grad_norm": 2.919036626815796, "learning_rate": 4.686972210833623e-05, "loss": 0.137, "step": 11678 }, { "epoch": 5.4300325430032546, "grad_norm": 1.9325984716415405, "learning_rate": 4.690766700109656e-05, "loss": 0.1081, "step": 11680 }, { "epoch": 5.430962343096234, "grad_norm": 3.5560193061828613, "learning_rate": 4.6945395674028e-05, "loss": 0.1066, "step": 11682 }, { "epoch": 5.431892143189215, "grad_norm": 1.7896685600280762, "learning_rate": 4.698290775476384e-05, "loss": 0.1267, "step": 11684 }, { "epoch": 5.432821943282194, "grad_norm": 2.4913816452026367, "learning_rate": 4.7020202873075056e-05, "loss": 0.1407, "step": 11686 }, { "epoch": 5.433751743375175, "grad_norm": 3.8738110065460205, "learning_rate": 4.705728066087377e-05, "loss": 0.1032, "step": 11688 }, { "epoch": 5.434681543468154, "grad_norm": 1.454643964767456, "learning_rate": 4.7094140752217295e-05, "loss": 0.0609, "step": 11690 }, { "epoch": 5.435611343561134, "grad_norm": 2.2513587474823, "learning_rate": 4.713078278331132e-05, "loss": 0.1032, "step": 11692 }, { "epoch": 5.4365411436541144, "grad_norm": 3.4301507472991943, "learning_rate": 4.716720639251386e-05, "loss": 0.1183, "step": 11694 }, { "epoch": 5.437470943747094, "grad_norm": 2.7095234394073486, "learning_rate": 4.7203411220338585e-05, "loss": 0.1425, "step": 11696 }, { "epoch": 5.4384007438400745, "grad_norm": 2.382868766784668, "learning_rate": 4.7239396909458404e-05, "loss": 0.1215, "step": 11698 }, { "epoch": 5.439330543933054, "grad_norm": 1.4316179752349854, "learning_rate": 4.727516310470916e-05, "loss": 0.0874, "step": 11700 }, { "epoch": 5.440260344026035, "grad_norm": 1.6868622303009033, "learning_rate": 4.731070945309289e-05, "loss": 0.1032, "step": 11702 }, { "epoch": 5.441190144119014, "grad_norm": 1.7840362787246704, "learning_rate": 4.734603560378156e-05, "loss": 0.1018, "step": 11704 }, { "epoch": 5.442119944211995, "grad_norm": 2.0920891761779785, "learning_rate": 4.7381141208120235e-05, "loss": 0.0949, "step": 11706 }, { "epoch": 5.443049744304974, "grad_norm": 2.9362592697143555, "learning_rate": 4.741602591963085e-05, "loss": 0.1378, "step": 11708 }, { "epoch": 5.443979544397955, "grad_norm": 1.3517308235168457, "learning_rate": 4.7450689394015366e-05, "loss": 0.0619, "step": 11710 }, { "epoch": 5.4449093444909344, "grad_norm": 1.3744841814041138, "learning_rate": 4.7485131289159235e-05, "loss": 0.0548, "step": 11712 }, { "epoch": 5.445839144583914, "grad_norm": 3.0623857975006104, "learning_rate": 4.751935126513492e-05, "loss": 0.1234, "step": 11714 }, { "epoch": 5.4467689446768945, "grad_norm": 2.047879457473755, "learning_rate": 4.755334898420503e-05, "loss": 0.0804, "step": 11716 }, { "epoch": 5.447698744769874, "grad_norm": 2.6838314533233643, "learning_rate": 4.7587124110825814e-05, "loss": 0.1483, "step": 11718 }, { "epoch": 5.448628544862855, "grad_norm": 1.6428555250167847, "learning_rate": 4.762067631165049e-05, "loss": 0.0836, "step": 11720 }, { "epoch": 5.449558344955834, "grad_norm": 1.9551936388015747, "learning_rate": 4.7654005255532224e-05, "loss": 0.1247, "step": 11722 }, { "epoch": 5.450488145048815, "grad_norm": 2.3506579399108887, "learning_rate": 4.7687110613527926e-05, "loss": 0.1237, "step": 11724 }, { "epoch": 5.451417945141794, "grad_norm": 1.4498238563537598, "learning_rate": 4.771999205890098e-05, "loss": 0.1159, "step": 11726 }, { "epoch": 5.452347745234775, "grad_norm": 1.3439427614212036, "learning_rate": 4.775264926712486e-05, "loss": 0.1353, "step": 11728 }, { "epoch": 5.453277545327754, "grad_norm": 1.812652587890625, "learning_rate": 4.778508191588611e-05, "loss": 0.1274, "step": 11730 }, { "epoch": 5.454207345420735, "grad_norm": 1.6453092098236084, "learning_rate": 4.7817289685087536e-05, "loss": 0.0903, "step": 11732 }, { "epoch": 5.4551371455137145, "grad_norm": 2.415403127670288, "learning_rate": 4.7849272256851546e-05, "loss": 0.1204, "step": 11734 }, { "epoch": 5.456066945606695, "grad_norm": 2.279010057449341, "learning_rate": 4.788102931552294e-05, "loss": 0.0976, "step": 11736 }, { "epoch": 5.456996745699675, "grad_norm": 2.4393696784973145, "learning_rate": 4.791256054767244e-05, "loss": 0.1219, "step": 11738 }, { "epoch": 5.457926545792654, "grad_norm": 2.2679648399353027, "learning_rate": 4.7943865642099524e-05, "loss": 0.113, "step": 11740 }, { "epoch": 5.458856345885635, "grad_norm": 2.3314599990844727, "learning_rate": 4.79749442898355e-05, "loss": 0.111, "step": 11742 }, { "epoch": 5.459786145978614, "grad_norm": 1.5484025478363037, "learning_rate": 4.800579618414674e-05, "loss": 0.0982, "step": 11744 }, { "epoch": 5.460715946071595, "grad_norm": 1.4073541164398193, "learning_rate": 4.8036421020537415e-05, "loss": 0.1233, "step": 11746 }, { "epoch": 5.461645746164574, "grad_norm": 2.53513765335083, "learning_rate": 4.8066818496752875e-05, "loss": 0.1386, "step": 11748 }, { "epoch": 5.462575546257555, "grad_norm": 2.9809823036193848, "learning_rate": 4.809698831278215e-05, "loss": 0.1442, "step": 11750 }, { "epoch": 5.4635053463505345, "grad_norm": 2.6609890460968018, "learning_rate": 4.8126930170861416e-05, "loss": 0.1676, "step": 11752 }, { "epoch": 5.464435146443515, "grad_norm": 2.2598609924316406, "learning_rate": 4.8156643775476644e-05, "loss": 0.1472, "step": 11754 }, { "epoch": 5.465364946536495, "grad_norm": 1.6745439767837524, "learning_rate": 4.81861288333665e-05, "loss": 0.1404, "step": 11756 }, { "epoch": 5.466294746629474, "grad_norm": 2.10998272895813, "learning_rate": 4.821538505352541e-05, "loss": 0.1073, "step": 11758 }, { "epoch": 5.467224546722455, "grad_norm": 1.5127824544906616, "learning_rate": 4.824441214720625e-05, "loss": 0.1002, "step": 11760 }, { "epoch": 5.468154346815434, "grad_norm": 1.9410761594772339, "learning_rate": 4.827320982792339e-05, "loss": 0.0933, "step": 11762 }, { "epoch": 5.469084146908415, "grad_norm": 1.5026793479919434, "learning_rate": 4.830177781145528e-05, "loss": 0.0932, "step": 11764 }, { "epoch": 5.470013947001394, "grad_norm": 2.388162136077881, "learning_rate": 4.8330115815847445e-05, "loss": 0.1235, "step": 11766 }, { "epoch": 5.470943747094375, "grad_norm": 3.1633074283599854, "learning_rate": 4.835822356141529e-05, "loss": 0.1413, "step": 11768 }, { "epoch": 5.4718735471873545, "grad_norm": 2.247767210006714, "learning_rate": 4.838610077074666e-05, "loss": 0.1111, "step": 11770 }, { "epoch": 5.472803347280335, "grad_norm": 2.2790603637695312, "learning_rate": 4.841374716870477e-05, "loss": 0.1505, "step": 11772 }, { "epoch": 5.473733147373315, "grad_norm": 2.0760414600372314, "learning_rate": 4.844116248243088e-05, "loss": 0.0935, "step": 11774 }, { "epoch": 5.474662947466295, "grad_norm": 3.042098045349121, "learning_rate": 4.8468346441346846e-05, "loss": 0.1368, "step": 11776 }, { "epoch": 5.475592747559275, "grad_norm": 1.2782421112060547, "learning_rate": 4.8495298777158e-05, "loss": 0.086, "step": 11778 }, { "epoch": 5.476522547652255, "grad_norm": 2.641951322555542, "learning_rate": 4.852201922385563e-05, "loss": 0.1179, "step": 11780 }, { "epoch": 5.477452347745235, "grad_norm": 1.6939235925674438, "learning_rate": 4.854850751771975e-05, "loss": 0.0871, "step": 11782 }, { "epoch": 5.478382147838214, "grad_norm": 1.7170170545578003, "learning_rate": 4.857476339732162e-05, "loss": 0.1391, "step": 11784 }, { "epoch": 5.479311947931195, "grad_norm": 2.387863874435425, "learning_rate": 4.8600786603526234e-05, "loss": 0.088, "step": 11786 }, { "epoch": 5.4802417480241745, "grad_norm": 2.494882583618164, "learning_rate": 4.862657687949513e-05, "loss": 0.1083, "step": 11788 }, { "epoch": 5.481171548117155, "grad_norm": 2.072171688079834, "learning_rate": 4.8652133970688656e-05, "loss": 0.1366, "step": 11790 }, { "epoch": 5.482101348210135, "grad_norm": 1.617631435394287, "learning_rate": 4.867745762486864e-05, "loss": 0.0811, "step": 11792 }, { "epoch": 5.483031148303115, "grad_norm": 3.069532871246338, "learning_rate": 4.870254759210081e-05, "loss": 0.1801, "step": 11794 }, { "epoch": 5.483960948396095, "grad_norm": 1.9873507022857666, "learning_rate": 4.872740362475736e-05, "loss": 0.1057, "step": 11796 }, { "epoch": 5.484890748489075, "grad_norm": 2.223661184310913, "learning_rate": 4.87520254775193e-05, "loss": 0.1215, "step": 11798 }, { "epoch": 5.485820548582055, "grad_norm": 1.6756218671798706, "learning_rate": 4.877641290737883e-05, "loss": 0.0798, "step": 11800 }, { "epoch": 5.486750348675034, "grad_norm": 2.002896547317505, "learning_rate": 4.8800565673641914e-05, "loss": 0.1117, "step": 11802 }, { "epoch": 5.487680148768015, "grad_norm": 1.868399977684021, "learning_rate": 4.8824483537930486e-05, "loss": 0.111, "step": 11804 }, { "epoch": 5.4886099488609945, "grad_norm": 2.0496768951416016, "learning_rate": 4.884816626418484e-05, "loss": 0.0893, "step": 11806 }, { "epoch": 5.489539748953975, "grad_norm": 2.4192025661468506, "learning_rate": 4.887161361866609e-05, "loss": 0.0766, "step": 11808 }, { "epoch": 5.490469549046955, "grad_norm": 2.3564603328704834, "learning_rate": 4.8894825369958255e-05, "loss": 0.1311, "step": 11810 }, { "epoch": 5.491399349139935, "grad_norm": 2.4234116077423096, "learning_rate": 4.8917801288970776e-05, "loss": 0.1229, "step": 11812 }, { "epoch": 5.492329149232915, "grad_norm": 2.020427942276001, "learning_rate": 4.894054114894056e-05, "loss": 0.1202, "step": 11814 }, { "epoch": 5.493258949325895, "grad_norm": 2.844505786895752, "learning_rate": 4.896304472543438e-05, "loss": 0.1548, "step": 11816 }, { "epoch": 5.494188749418875, "grad_norm": 1.326344609260559, "learning_rate": 4.898531179635109e-05, "loss": 0.0969, "step": 11818 }, { "epoch": 5.495118549511855, "grad_norm": 2.001908540725708, "learning_rate": 4.9007342141923585e-05, "loss": 0.104, "step": 11820 }, { "epoch": 5.496048349604835, "grad_norm": 2.5183374881744385, "learning_rate": 4.9029135544721315e-05, "loss": 0.1417, "step": 11822 }, { "epoch": 5.496978149697815, "grad_norm": 2.3961400985717773, "learning_rate": 4.9050691789652154e-05, "loss": 0.0981, "step": 11824 }, { "epoch": 5.497907949790795, "grad_norm": 3.1362764835357666, "learning_rate": 4.907201066396469e-05, "loss": 0.1141, "step": 11826 }, { "epoch": 5.498837749883775, "grad_norm": 2.1390528678894043, "learning_rate": 4.9093091957250265e-05, "loss": 0.1395, "step": 11828 }, { "epoch": 5.499767549976755, "grad_norm": 1.7153836488723755, "learning_rate": 4.911393546144499e-05, "loss": 0.0697, "step": 11830 }, { "epoch": 5.500697350069735, "grad_norm": 3.239154577255249, "learning_rate": 4.9134540970831886e-05, "loss": 0.132, "step": 11832 }, { "epoch": 5.501627150162715, "grad_norm": 2.0893619060516357, "learning_rate": 4.9154908282042884e-05, "loss": 0.1285, "step": 11834 }, { "epoch": 5.502556950255695, "grad_norm": 1.405985713005066, "learning_rate": 4.9175037194060904e-05, "loss": 0.0857, "step": 11836 }, { "epoch": 5.503486750348675, "grad_norm": 3.6623148918151855, "learning_rate": 4.919492750822165e-05, "loss": 0.191, "step": 11838 }, { "epoch": 5.504416550441655, "grad_norm": 2.5034945011138916, "learning_rate": 4.921457902821578e-05, "loss": 0.1313, "step": 11840 }, { "epoch": 5.505346350534635, "grad_norm": 1.7481107711791992, "learning_rate": 4.923399156009074e-05, "loss": 0.1035, "step": 11842 }, { "epoch": 5.506276150627615, "grad_norm": 2.189845323562622, "learning_rate": 4.925316491225267e-05, "loss": 0.1112, "step": 11844 }, { "epoch": 5.507205950720595, "grad_norm": 1.7282317876815796, "learning_rate": 4.9272098895468304e-05, "loss": 0.1056, "step": 11846 }, { "epoch": 5.508135750813575, "grad_norm": 2.464292287826538, "learning_rate": 4.929079332286687e-05, "loss": 0.0969, "step": 11848 }, { "epoch": 5.509065550906556, "grad_norm": 2.2443959712982178, "learning_rate": 4.930924800994192e-05, "loss": 0.1434, "step": 11850 }, { "epoch": 5.509995350999535, "grad_norm": 2.681490182876587, "learning_rate": 4.932746277455318e-05, "loss": 0.1368, "step": 11852 }, { "epoch": 5.510925151092515, "grad_norm": 2.764941692352295, "learning_rate": 4.934543743692822e-05, "loss": 0.1051, "step": 11854 }, { "epoch": 5.511854951185495, "grad_norm": 2.781749963760376, "learning_rate": 4.936317181966443e-05, "loss": 0.1421, "step": 11856 }, { "epoch": 5.512784751278475, "grad_norm": 1.7809687852859497, "learning_rate": 4.9380665747730596e-05, "loss": 0.1156, "step": 11858 }, { "epoch": 5.513714551371455, "grad_norm": 2.708528995513916, "learning_rate": 4.939791904846867e-05, "loss": 0.1213, "step": 11860 }, { "epoch": 5.514644351464435, "grad_norm": 1.9972039461135864, "learning_rate": 4.9414931551595626e-05, "loss": 0.0955, "step": 11862 }, { "epoch": 5.5155741515574155, "grad_norm": 2.8171024322509766, "learning_rate": 4.9431703089204826e-05, "loss": 0.1273, "step": 11864 }, { "epoch": 5.516503951650395, "grad_norm": 2.709940195083618, "learning_rate": 4.944823349576804e-05, "loss": 0.148, "step": 11866 }, { "epoch": 5.517433751743376, "grad_norm": 1.5246673822402954, "learning_rate": 4.946452260813679e-05, "loss": 0.0798, "step": 11868 }, { "epoch": 5.518363551836355, "grad_norm": 2.5714287757873535, "learning_rate": 4.948057026554412e-05, "loss": 0.1235, "step": 11870 }, { "epoch": 5.519293351929335, "grad_norm": 2.2719414234161377, "learning_rate": 4.949637630960618e-05, "loss": 0.0963, "step": 11872 }, { "epoch": 5.520223152022315, "grad_norm": 1.6954447031021118, "learning_rate": 4.9511940584323604e-05, "loss": 0.1435, "step": 11874 }, { "epoch": 5.521152952115295, "grad_norm": 2.9471771717071533, "learning_rate": 4.952726293608336e-05, "loss": 0.1317, "step": 11876 }, { "epoch": 5.522082752208275, "grad_norm": 1.1425927877426147, "learning_rate": 4.954234321365998e-05, "loss": 0.0619, "step": 11878 }, { "epoch": 5.523012552301255, "grad_norm": 2.535700559616089, "learning_rate": 4.955718126821723e-05, "loss": 0.1195, "step": 11880 }, { "epoch": 5.5239423523942355, "grad_norm": 2.7112972736358643, "learning_rate": 4.95717769533095e-05, "loss": 0.1675, "step": 11882 }, { "epoch": 5.524872152487215, "grad_norm": 1.4990960359573364, "learning_rate": 4.958613012488324e-05, "loss": 0.1187, "step": 11884 }, { "epoch": 5.525801952580196, "grad_norm": 2.273869514465332, "learning_rate": 4.9600240641278517e-05, "loss": 0.1301, "step": 11886 }, { "epoch": 5.526731752673175, "grad_norm": 1.6723792552947998, "learning_rate": 4.961410836323014e-05, "loss": 0.1399, "step": 11888 }, { "epoch": 5.527661552766155, "grad_norm": 2.1205763816833496, "learning_rate": 4.962773315386936e-05, "loss": 0.1342, "step": 11890 }, { "epoch": 5.528591352859135, "grad_norm": 2.5226173400878906, "learning_rate": 4.964111487872495e-05, "loss": 0.1056, "step": 11892 }, { "epoch": 5.529521152952116, "grad_norm": 3.1856186389923096, "learning_rate": 4.965425340572471e-05, "loss": 0.1211, "step": 11894 }, { "epoch": 5.530450953045095, "grad_norm": 1.9029148817062378, "learning_rate": 4.9667148605196695e-05, "loss": 0.0811, "step": 11896 }, { "epoch": 5.531380753138075, "grad_norm": 2.192324638366699, "learning_rate": 4.9679800349870474e-05, "loss": 0.1057, "step": 11898 }, { "epoch": 5.5323105532310555, "grad_norm": 2.8305108547210693, "learning_rate": 4.9692208514878464e-05, "loss": 0.1432, "step": 11900 }, { "epoch": 5.533240353324035, "grad_norm": 2.087164878845215, "learning_rate": 4.9704372977757034e-05, "loss": 0.0888, "step": 11902 }, { "epoch": 5.534170153417016, "grad_norm": 2.2912919521331787, "learning_rate": 4.971629361844787e-05, "loss": 0.1225, "step": 11904 }, { "epoch": 5.535099953509995, "grad_norm": 1.5888521671295166, "learning_rate": 4.9727970319299065e-05, "loss": 0.0701, "step": 11906 }, { "epoch": 5.536029753602976, "grad_norm": 2.30216383934021, "learning_rate": 4.9739402965066296e-05, "loss": 0.1446, "step": 11908 }, { "epoch": 5.536959553695955, "grad_norm": 2.2801859378814697, "learning_rate": 4.9750591442913975e-05, "loss": 0.1208, "step": 11910 }, { "epoch": 5.537889353788936, "grad_norm": 1.945631504058838, "learning_rate": 4.976153564241632e-05, "loss": 0.1268, "step": 11912 }, { "epoch": 5.538819153881915, "grad_norm": 1.9629048109054565, "learning_rate": 4.9772235455558504e-05, "loss": 0.1402, "step": 11914 }, { "epoch": 5.539748953974895, "grad_norm": 2.107573986053467, "learning_rate": 4.97826907767377e-05, "loss": 0.0939, "step": 11916 }, { "epoch": 5.5406787540678755, "grad_norm": 1.4290574789047241, "learning_rate": 4.97929015027641e-05, "loss": 0.085, "step": 11918 }, { "epoch": 5.541608554160855, "grad_norm": 1.5926597118377686, "learning_rate": 4.980286753286199e-05, "loss": 0.1, "step": 11920 }, { "epoch": 5.542538354253836, "grad_norm": 1.944895625114441, "learning_rate": 4.981258876867063e-05, "loss": 0.112, "step": 11922 }, { "epoch": 5.543468154346815, "grad_norm": 0.7679511308670044, "learning_rate": 4.982206511424538e-05, "loss": 0.0458, "step": 11924 }, { "epoch": 5.544397954439796, "grad_norm": 1.5932140350341797, "learning_rate": 4.983129647605854e-05, "loss": 0.1332, "step": 11926 }, { "epoch": 5.545327754532775, "grad_norm": 2.8184375762939453, "learning_rate": 4.9840282763000254e-05, "loss": 0.1199, "step": 11928 }, { "epoch": 5.546257554625756, "grad_norm": 2.1125447750091553, "learning_rate": 4.984902388637953e-05, "loss": 0.1307, "step": 11930 }, { "epoch": 5.547187354718735, "grad_norm": 2.7879257202148438, "learning_rate": 4.9857519759925014e-05, "loss": 0.1231, "step": 11932 }, { "epoch": 5.548117154811716, "grad_norm": 2.0936450958251953, "learning_rate": 4.986577029978585e-05, "loss": 0.1799, "step": 11934 }, { "epoch": 5.5490469549046955, "grad_norm": 1.28288996219635, "learning_rate": 4.987377542453253e-05, "loss": 0.1118, "step": 11936 }, { "epoch": 5.549976754997676, "grad_norm": 1.772781252861023, "learning_rate": 4.988153505515774e-05, "loss": 0.0924, "step": 11938 }, { "epoch": 5.550906555090656, "grad_norm": 1.206789493560791, "learning_rate": 4.988904911507703e-05, "loss": 0.1275, "step": 11940 }, { "epoch": 5.551836355183635, "grad_norm": 2.3846018314361572, "learning_rate": 4.989631753012967e-05, "loss": 0.1059, "step": 11942 }, { "epoch": 5.552766155276616, "grad_norm": 1.9874818325042725, "learning_rate": 4.9903340228579345e-05, "loss": 0.1019, "step": 11944 }, { "epoch": 5.553695955369595, "grad_norm": 2.5135183334350586, "learning_rate": 4.991011714111483e-05, "loss": 0.1067, "step": 11946 }, { "epoch": 5.554625755462576, "grad_norm": 2.0927929878234863, "learning_rate": 4.991664820085077e-05, "loss": 0.1243, "step": 11948 }, { "epoch": 5.555555555555555, "grad_norm": 2.316547393798828, "learning_rate": 4.992293334332823e-05, "loss": 0.1178, "step": 11950 }, { "epoch": 5.556485355648536, "grad_norm": 3.020996332168579, "learning_rate": 4.9928972506515375e-05, "loss": 0.1783, "step": 11952 }, { "epoch": 5.5574151557415155, "grad_norm": 2.8923537731170654, "learning_rate": 4.993476563080813e-05, "loss": 0.1596, "step": 11954 }, { "epoch": 5.558344955834496, "grad_norm": 1.4572856426239014, "learning_rate": 4.9940312659030664e-05, "loss": 0.1177, "step": 11956 }, { "epoch": 5.5592747559274756, "grad_norm": 3.8799779415130615, "learning_rate": 4.994561353643607e-05, "loss": 0.2114, "step": 11958 }, { "epoch": 5.560204556020455, "grad_norm": 3.0525193214416504, "learning_rate": 4.995066821070682e-05, "loss": 0.0857, "step": 11960 }, { "epoch": 5.561134356113436, "grad_norm": 2.4550116062164307, "learning_rate": 4.9955476631955325e-05, "loss": 0.1838, "step": 11962 }, { "epoch": 5.562064156206415, "grad_norm": 2.9158904552459717, "learning_rate": 4.9960038752724414e-05, "loss": 0.1052, "step": 11964 }, { "epoch": 5.562993956299396, "grad_norm": 2.4588217735290527, "learning_rate": 4.996435452798778e-05, "loss": 0.1482, "step": 11966 }, { "epoch": 5.563923756392375, "grad_norm": 2.898466110229492, "learning_rate": 4.996842391515048e-05, "loss": 0.1358, "step": 11968 }, { "epoch": 5.564853556485356, "grad_norm": 2.5273537635803223, "learning_rate": 4.997224687404929e-05, "loss": 0.1366, "step": 11970 }, { "epoch": 5.5657833565783355, "grad_norm": 2.61429500579834, "learning_rate": 4.997582336695316e-05, "loss": 0.091, "step": 11972 }, { "epoch": 5.566713156671316, "grad_norm": 6.124009609222412, "learning_rate": 4.9979153358563537e-05, "loss": 0.1561, "step": 11974 }, { "epoch": 5.5676429567642955, "grad_norm": 2.4786536693573, "learning_rate": 4.9982236816014766e-05, "loss": 0.1155, "step": 11976 }, { "epoch": 5.568572756857276, "grad_norm": 1.6343231201171875, "learning_rate": 4.9985073708874355e-05, "loss": 0.1073, "step": 11978 }, { "epoch": 5.569502556950256, "grad_norm": 2.5738346576690674, "learning_rate": 4.998766400914331e-05, "loss": 0.1319, "step": 11980 }, { "epoch": 5.570432357043236, "grad_norm": 1.7106469869613647, "learning_rate": 4.999000769125644e-05, "loss": 0.1168, "step": 11982 }, { "epoch": 5.571362157136216, "grad_norm": 1.815358281135559, "learning_rate": 4.999210473208253e-05, "loss": 0.0922, "step": 11984 }, { "epoch": 5.572291957229195, "grad_norm": 2.4478847980499268, "learning_rate": 4.9993955110924636e-05, "loss": 0.1312, "step": 11986 }, { "epoch": 5.573221757322176, "grad_norm": 2.9535510540008545, "learning_rate": 4.999555880952026e-05, "loss": 0.1642, "step": 11988 }, { "epoch": 5.5741515574151554, "grad_norm": 2.7093141078948975, "learning_rate": 4.999691581204156e-05, "loss": 0.1198, "step": 11990 }, { "epoch": 5.575081357508136, "grad_norm": 1.8879942893981934, "learning_rate": 4.9998026105095446e-05, "loss": 0.1178, "step": 11992 }, { "epoch": 5.5760111576011155, "grad_norm": 2.102362871170044, "learning_rate": 4.9998889677723783e-05, "loss": 0.1043, "step": 11994 }, { "epoch": 5.576940957694096, "grad_norm": 1.6910426616668701, "learning_rate": 4.999950652140346e-05, "loss": 0.0871, "step": 11996 }, { "epoch": 5.577870757787076, "grad_norm": 2.0920488834381104, "learning_rate": 4.99998766300465e-05, "loss": 0.0955, "step": 11998 }, { "epoch": 5.578800557880056, "grad_norm": 3.024637460708618, "learning_rate": 5.0000000000000057e-05, "loss": 0.1434, "step": 12000 }, { "epoch": 5.578800557880056, "eval_cer": 0.19212749665814438, "eval_loss": 0.2926497459411621, "eval_runtime": 402.0021, "eval_samples_per_second": 31.577, "eval_steps_per_second": 0.988, "step": 12000 }, { "epoch": 5.579730357973036, "grad_norm": 2.3835995197296143, "learning_rate": 4.9999876630046515e-05, "loss": 0.098, "step": 12002 }, { "epoch": 5.580660158066015, "grad_norm": 2.0845417976379395, "learning_rate": 4.999950652140348e-05, "loss": 0.119, "step": 12004 }, { "epoch": 5.581589958158996, "grad_norm": 1.3448872566223145, "learning_rate": 4.9998889677723804e-05, "loss": 0.0746, "step": 12006 }, { "epoch": 5.582519758251975, "grad_norm": 2.373384714126587, "learning_rate": 4.9998026105095466e-05, "loss": 0.1019, "step": 12008 }, { "epoch": 5.583449558344956, "grad_norm": 1.8152124881744385, "learning_rate": 4.999691581204158e-05, "loss": 0.0794, "step": 12010 }, { "epoch": 5.5843793584379355, "grad_norm": 2.9302313327789307, "learning_rate": 4.999555880952029e-05, "loss": 0.1348, "step": 12012 }, { "epoch": 5.585309158530916, "grad_norm": 2.307882070541382, "learning_rate": 4.999395511092467e-05, "loss": 0.1179, "step": 12014 }, { "epoch": 5.586238958623896, "grad_norm": 2.989107608795166, "learning_rate": 4.999210473208256e-05, "loss": 0.1494, "step": 12016 }, { "epoch": 5.587168758716876, "grad_norm": 2.192401885986328, "learning_rate": 4.9990007691256466e-05, "loss": 0.1181, "step": 12018 }, { "epoch": 5.588098558809856, "grad_norm": 2.868638277053833, "learning_rate": 4.9987664009143345e-05, "loss": 0.1705, "step": 12020 }, { "epoch": 5.589028358902836, "grad_norm": 1.7769299745559692, "learning_rate": 4.998507370887437e-05, "loss": 0.1073, "step": 12022 }, { "epoch": 5.589958158995816, "grad_norm": 2.1639177799224854, "learning_rate": 4.998223681601478e-05, "loss": 0.0965, "step": 12024 }, { "epoch": 5.590887959088796, "grad_norm": 1.7253971099853516, "learning_rate": 4.997915335856355e-05, "loss": 0.1062, "step": 12026 }, { "epoch": 5.591817759181776, "grad_norm": 3.1004958152770996, "learning_rate": 4.9975823366953165e-05, "loss": 0.1282, "step": 12028 }, { "epoch": 5.5927475592747555, "grad_norm": 2.4100241661071777, "learning_rate": 4.99722468740493e-05, "loss": 0.178, "step": 12030 }, { "epoch": 5.593677359367736, "grad_norm": 2.6951091289520264, "learning_rate": 4.9968423915150494e-05, "loss": 0.1568, "step": 12032 }, { "epoch": 5.594607159460716, "grad_norm": 1.9734125137329102, "learning_rate": 4.9964354527987796e-05, "loss": 0.152, "step": 12034 }, { "epoch": 5.595536959553696, "grad_norm": 1.563323736190796, "learning_rate": 4.996003875272443e-05, "loss": 0.0734, "step": 12036 }, { "epoch": 5.596466759646676, "grad_norm": 2.7438883781433105, "learning_rate": 4.9955476631955345e-05, "loss": 0.1096, "step": 12038 }, { "epoch": 5.597396559739656, "grad_norm": 2.726170301437378, "learning_rate": 4.995066821070685e-05, "loss": 0.1498, "step": 12040 }, { "epoch": 5.598326359832636, "grad_norm": 2.130122184753418, "learning_rate": 4.994561353643609e-05, "loss": 0.1593, "step": 12042 }, { "epoch": 5.599256159925616, "grad_norm": 2.717402935028076, "learning_rate": 4.9940312659030684e-05, "loss": 0.0894, "step": 12044 }, { "epoch": 5.600185960018596, "grad_norm": 2.1613588333129883, "learning_rate": 4.993476563080814e-05, "loss": 0.1306, "step": 12046 }, { "epoch": 5.6011157601115755, "grad_norm": 1.5653856992721558, "learning_rate": 4.9928972506515395e-05, "loss": 0.0895, "step": 12048 }, { "epoch": 5.602045560204556, "grad_norm": 3.027595043182373, "learning_rate": 4.992293334332825e-05, "loss": 0.123, "step": 12050 }, { "epoch": 5.6029753602975365, "grad_norm": 1.9017832279205322, "learning_rate": 4.991664820085079e-05, "loss": 0.1045, "step": 12052 }, { "epoch": 5.603905160390516, "grad_norm": 1.835963249206543, "learning_rate": 4.991011714111486e-05, "loss": 0.0901, "step": 12054 }, { "epoch": 5.604834960483496, "grad_norm": 1.8386601209640503, "learning_rate": 4.990334022857935e-05, "loss": 0.1134, "step": 12056 }, { "epoch": 5.605764760576476, "grad_norm": 1.2364888191223145, "learning_rate": 4.989631753012969e-05, "loss": 0.0939, "step": 12058 }, { "epoch": 5.606694560669456, "grad_norm": 2.112565755844116, "learning_rate": 4.988904911507706e-05, "loss": 0.1458, "step": 12060 }, { "epoch": 5.607624360762436, "grad_norm": 2.09568452835083, "learning_rate": 4.988153505515776e-05, "loss": 0.1211, "step": 12062 }, { "epoch": 5.608554160855416, "grad_norm": 1.4684691429138184, "learning_rate": 4.987377542453255e-05, "loss": 0.0864, "step": 12064 }, { "epoch": 5.609483960948396, "grad_norm": 2.7460803985595703, "learning_rate": 4.986577029978585e-05, "loss": 0.1779, "step": 12066 }, { "epoch": 5.610413761041376, "grad_norm": 3.066030263900757, "learning_rate": 4.9857519759925014e-05, "loss": 0.1419, "step": 12068 }, { "epoch": 5.6113435611343565, "grad_norm": 2.343114137649536, "learning_rate": 4.984902388637953e-05, "loss": 0.1281, "step": 12070 }, { "epoch": 5.612273361227336, "grad_norm": 1.3017572164535522, "learning_rate": 4.984028276300025e-05, "loss": 0.0746, "step": 12072 }, { "epoch": 5.613203161320316, "grad_norm": 2.1948652267456055, "learning_rate": 4.983129647605853e-05, "loss": 0.1267, "step": 12074 }, { "epoch": 5.614132961413296, "grad_norm": 2.448621988296509, "learning_rate": 4.982206511424538e-05, "loss": 0.1063, "step": 12076 }, { "epoch": 5.615062761506276, "grad_norm": 2.168165683746338, "learning_rate": 4.9812588768670635e-05, "loss": 0.0833, "step": 12078 }, { "epoch": 5.615992561599256, "grad_norm": 1.7066223621368408, "learning_rate": 4.980286753286199e-05, "loss": 0.0799, "step": 12080 }, { "epoch": 5.616922361692236, "grad_norm": 2.6960551738739014, "learning_rate": 4.979290150276411e-05, "loss": 0.1012, "step": 12082 }, { "epoch": 5.617852161785216, "grad_norm": 1.9871145486831665, "learning_rate": 4.978269077673771e-05, "loss": 0.1778, "step": 12084 }, { "epoch": 5.618781961878196, "grad_norm": 2.24430775642395, "learning_rate": 4.977223545555851e-05, "loss": 0.1202, "step": 12086 }, { "epoch": 5.6197117619711765, "grad_norm": 1.831337571144104, "learning_rate": 4.976153564241633e-05, "loss": 0.106, "step": 12088 }, { "epoch": 5.620641562064156, "grad_norm": 1.4218615293502808, "learning_rate": 4.975059144291399e-05, "loss": 0.1055, "step": 12090 }, { "epoch": 5.621571362157137, "grad_norm": 1.6542062759399414, "learning_rate": 4.97394029650663e-05, "loss": 0.1, "step": 12092 }, { "epoch": 5.622501162250116, "grad_norm": 2.2312662601470947, "learning_rate": 4.972797031929909e-05, "loss": 0.1025, "step": 12094 }, { "epoch": 5.623430962343097, "grad_norm": 2.2637012004852295, "learning_rate": 4.9716293618447884e-05, "loss": 0.1335, "step": 12096 }, { "epoch": 5.624360762436076, "grad_norm": 2.9750001430511475, "learning_rate": 4.9704372977757055e-05, "loss": 0.0988, "step": 12098 }, { "epoch": 5.625290562529056, "grad_norm": 2.002894878387451, "learning_rate": 4.9692208514878484e-05, "loss": 0.125, "step": 12100 }, { "epoch": 5.626220362622036, "grad_norm": 2.2169718742370605, "learning_rate": 4.967980034987053e-05, "loss": 0.1315, "step": 12102 }, { "epoch": 5.627150162715016, "grad_norm": 3.2505431175231934, "learning_rate": 4.966714860519676e-05, "loss": 0.2002, "step": 12104 }, { "epoch": 5.6280799628079965, "grad_norm": 2.133143901824951, "learning_rate": 4.965425340572476e-05, "loss": 0.1142, "step": 12106 }, { "epoch": 5.629009762900976, "grad_norm": 1.9527764320373535, "learning_rate": 4.9641114878725e-05, "loss": 0.1194, "step": 12108 }, { "epoch": 5.629939562993957, "grad_norm": 1.826464056968689, "learning_rate": 4.962773315386939e-05, "loss": 0.1271, "step": 12110 }, { "epoch": 5.630869363086936, "grad_norm": 2.261060953140259, "learning_rate": 4.961410836323019e-05, "loss": 0.1444, "step": 12112 }, { "epoch": 5.631799163179917, "grad_norm": 1.4889825582504272, "learning_rate": 4.960024064127856e-05, "loss": 0.133, "step": 12114 }, { "epoch": 5.632728963272896, "grad_norm": 1.6509050130844116, "learning_rate": 4.9586130124883304e-05, "loss": 0.097, "step": 12116 }, { "epoch": 5.633658763365876, "grad_norm": 1.7660341262817383, "learning_rate": 4.957177695330953e-05, "loss": 0.075, "step": 12118 }, { "epoch": 5.634588563458856, "grad_norm": 2.2069711685180664, "learning_rate": 4.955718126821726e-05, "loss": 0.1205, "step": 12120 }, { "epoch": 5.635518363551836, "grad_norm": 2.5803773403167725, "learning_rate": 4.954234321366002e-05, "loss": 0.1306, "step": 12122 }, { "epoch": 5.6364481636448165, "grad_norm": 1.6743826866149902, "learning_rate": 4.952726293608341e-05, "loss": 0.0943, "step": 12124 }, { "epoch": 5.637377963737796, "grad_norm": 2.5834290981292725, "learning_rate": 4.951194058432366e-05, "loss": 0.1879, "step": 12126 }, { "epoch": 5.638307763830777, "grad_norm": 1.8242405652999878, "learning_rate": 4.9496376309606226e-05, "loss": 0.0849, "step": 12128 }, { "epoch": 5.639237563923756, "grad_norm": 2.710932731628418, "learning_rate": 4.94805702655442e-05, "loss": 0.1732, "step": 12130 }, { "epoch": 5.640167364016737, "grad_norm": 2.1719400882720947, "learning_rate": 4.9464522608136846e-05, "loss": 0.1329, "step": 12132 }, { "epoch": 5.641097164109716, "grad_norm": 2.445949077606201, "learning_rate": 4.944823349576811e-05, "loss": 0.0964, "step": 12134 }, { "epoch": 5.642026964202697, "grad_norm": 3.08278489112854, "learning_rate": 4.9431703089204886e-05, "loss": 0.1015, "step": 12136 }, { "epoch": 5.642956764295676, "grad_norm": 2.8748624324798584, "learning_rate": 4.941493155159568e-05, "loss": 0.101, "step": 12138 }, { "epoch": 5.643886564388657, "grad_norm": 1.6961852312088013, "learning_rate": 4.939791904846874e-05, "loss": 0.1012, "step": 12140 }, { "epoch": 5.6448163644816365, "grad_norm": 1.8215585947036743, "learning_rate": 4.938066574773065e-05, "loss": 0.0807, "step": 12142 }, { "epoch": 5.645746164574616, "grad_norm": 2.068312644958496, "learning_rate": 4.936317181966448e-05, "loss": 0.1031, "step": 12144 }, { "epoch": 5.646675964667597, "grad_norm": 1.4846142530441284, "learning_rate": 4.934543743692827e-05, "loss": 0.074, "step": 12146 }, { "epoch": 5.647605764760576, "grad_norm": 3.1980040073394775, "learning_rate": 4.932746277455322e-05, "loss": 0.1584, "step": 12148 }, { "epoch": 5.648535564853557, "grad_norm": 2.952009916305542, "learning_rate": 4.930924800994196e-05, "loss": 0.1571, "step": 12150 }, { "epoch": 5.649465364946536, "grad_norm": 2.1363284587860107, "learning_rate": 4.92907933228669e-05, "loss": 0.1115, "step": 12152 }, { "epoch": 5.650395165039517, "grad_norm": 3.610966205596924, "learning_rate": 4.9272098895468324e-05, "loss": 0.1062, "step": 12154 }, { "epoch": 5.651324965132496, "grad_norm": 2.2018914222717285, "learning_rate": 4.925316491225271e-05, "loss": 0.1285, "step": 12156 }, { "epoch": 5.652254765225477, "grad_norm": 2.797412395477295, "learning_rate": 4.9233991560090804e-05, "loss": 0.1391, "step": 12158 }, { "epoch": 5.6531845653184565, "grad_norm": 1.7440377473831177, "learning_rate": 4.921457902821582e-05, "loss": 0.0819, "step": 12160 }, { "epoch": 5.654114365411436, "grad_norm": 2.167586326599121, "learning_rate": 4.9194927508221685e-05, "loss": 0.1153, "step": 12162 }, { "epoch": 5.655044165504417, "grad_norm": 1.8216216564178467, "learning_rate": 4.917503719406091e-05, "loss": 0.1436, "step": 12164 }, { "epoch": 5.655973965597396, "grad_norm": 2.208026885986328, "learning_rate": 4.915490828204292e-05, "loss": 0.1256, "step": 12166 }, { "epoch": 5.656903765690377, "grad_norm": 1.7314934730529785, "learning_rate": 4.9134540970831906e-05, "loss": 0.087, "step": 12168 }, { "epoch": 5.657833565783356, "grad_norm": 2.385089159011841, "learning_rate": 4.9113935461445e-05, "loss": 0.125, "step": 12170 }, { "epoch": 5.658763365876337, "grad_norm": 2.1521193981170654, "learning_rate": 4.909309195725032e-05, "loss": 0.115, "step": 12172 }, { "epoch": 5.659693165969316, "grad_norm": 1.6186316013336182, "learning_rate": 4.907201066396473e-05, "loss": 0.0935, "step": 12174 }, { "epoch": 5.660622966062297, "grad_norm": 2.267411470413208, "learning_rate": 4.90506917896522e-05, "loss": 0.14, "step": 12176 }, { "epoch": 5.6615527661552765, "grad_norm": 2.022319793701172, "learning_rate": 4.902913554472137e-05, "loss": 0.1329, "step": 12178 }, { "epoch": 5.662482566248257, "grad_norm": 1.7055734395980835, "learning_rate": 4.9007342141923646e-05, "loss": 0.0913, "step": 12180 }, { "epoch": 5.663412366341237, "grad_norm": 2.490086793899536, "learning_rate": 4.898531179635115e-05, "loss": 0.1726, "step": 12182 }, { "epoch": 5.664342166434217, "grad_norm": 2.2211008071899414, "learning_rate": 4.896304472543446e-05, "loss": 0.1183, "step": 12184 }, { "epoch": 5.665271966527197, "grad_norm": 2.179575204849243, "learning_rate": 4.894054114894063e-05, "loss": 0.1335, "step": 12186 }, { "epoch": 5.666201766620176, "grad_norm": 1.4525997638702393, "learning_rate": 4.89178012889708e-05, "loss": 0.1284, "step": 12188 }, { "epoch": 5.667131566713157, "grad_norm": 2.0994644165039062, "learning_rate": 4.88948253699583e-05, "loss": 0.0915, "step": 12190 }, { "epoch": 5.668061366806136, "grad_norm": 2.868997097015381, "learning_rate": 4.887161361866614e-05, "loss": 0.111, "step": 12192 }, { "epoch": 5.668991166899117, "grad_norm": 1.8334192037582397, "learning_rate": 4.88481662641849e-05, "loss": 0.1027, "step": 12194 }, { "epoch": 5.6699209669920965, "grad_norm": 1.6890747547149658, "learning_rate": 4.882448353793054e-05, "loss": 0.1036, "step": 12196 }, { "epoch": 5.670850767085077, "grad_norm": 2.680837869644165, "learning_rate": 4.880056567364198e-05, "loss": 0.1221, "step": 12198 }, { "epoch": 5.671780567178057, "grad_norm": 2.272047281265259, "learning_rate": 4.877641290737889e-05, "loss": 0.1035, "step": 12200 }, { "epoch": 5.672710367271037, "grad_norm": 2.1602494716644287, "learning_rate": 4.8752025477519356e-05, "loss": 0.1448, "step": 12202 }, { "epoch": 5.673640167364017, "grad_norm": 2.5603902339935303, "learning_rate": 4.872740362475742e-05, "loss": 0.103, "step": 12204 }, { "epoch": 5.674569967456996, "grad_norm": 1.8160781860351562, "learning_rate": 4.870254759210087e-05, "loss": 0.1226, "step": 12206 }, { "epoch": 5.675499767549977, "grad_norm": 1.6739107370376587, "learning_rate": 4.8677457624868665e-05, "loss": 0.0975, "step": 12208 }, { "epoch": 5.676429567642957, "grad_norm": 2.402470588684082, "learning_rate": 4.865213397068871e-05, "loss": 0.1241, "step": 12210 }, { "epoch": 5.677359367735937, "grad_norm": 1.828646183013916, "learning_rate": 4.862657687949522e-05, "loss": 0.0912, "step": 12212 }, { "epoch": 5.6782891678289165, "grad_norm": 2.6709542274475098, "learning_rate": 4.86007866035263e-05, "loss": 0.1448, "step": 12214 }, { "epoch": 5.679218967921897, "grad_norm": 1.9817997217178345, "learning_rate": 4.8574763397321695e-05, "loss": 0.0894, "step": 12216 }, { "epoch": 5.680148768014877, "grad_norm": 1.6447025537490845, "learning_rate": 4.8548507517719836e-05, "loss": 0.1186, "step": 12218 }, { "epoch": 5.681078568107857, "grad_norm": 2.436039447784424, "learning_rate": 4.852201922385572e-05, "loss": 0.1521, "step": 12220 }, { "epoch": 5.682008368200837, "grad_norm": 1.9271334409713745, "learning_rate": 4.8495298777158093e-05, "loss": 0.1606, "step": 12222 }, { "epoch": 5.682938168293817, "grad_norm": 1.4700766801834106, "learning_rate": 4.846834644134695e-05, "loss": 0.1223, "step": 12224 }, { "epoch": 5.683867968386797, "grad_norm": 1.770084023475647, "learning_rate": 4.8441162482431e-05, "loss": 0.1108, "step": 12226 }, { "epoch": 5.684797768479777, "grad_norm": 2.88207745552063, "learning_rate": 4.841374716870486e-05, "loss": 0.1609, "step": 12228 }, { "epoch": 5.685727568572757, "grad_norm": 2.2564167976379395, "learning_rate": 4.838610077074676e-05, "loss": 0.0942, "step": 12230 }, { "epoch": 5.6866573686657365, "grad_norm": 2.265681028366089, "learning_rate": 4.8358223561415365e-05, "loss": 0.1141, "step": 12232 }, { "epoch": 5.687587168758717, "grad_norm": 1.3703240156173706, "learning_rate": 4.833011581584754e-05, "loss": 0.0822, "step": 12234 }, { "epoch": 5.6885169688516966, "grad_norm": 2.2994191646575928, "learning_rate": 4.8301777811455364e-05, "loss": 0.1399, "step": 12236 }, { "epoch": 5.689446768944677, "grad_norm": 1.7603665590286255, "learning_rate": 4.8273209827923466e-05, "loss": 0.13, "step": 12238 }, { "epoch": 5.690376569037657, "grad_norm": 2.208536386489868, "learning_rate": 4.824441214720637e-05, "loss": 0.143, "step": 12240 }, { "epoch": 5.691306369130637, "grad_norm": 1.7238024473190308, "learning_rate": 4.8215385053525474e-05, "loss": 0.1328, "step": 12242 }, { "epoch": 5.692236169223617, "grad_norm": 2.783395290374756, "learning_rate": 4.818612883336659e-05, "loss": 0.1342, "step": 12244 }, { "epoch": 5.693165969316597, "grad_norm": 3.9441041946411133, "learning_rate": 4.8156643775476746e-05, "loss": 0.095, "step": 12246 }, { "epoch": 5.694095769409577, "grad_norm": 2.5294535160064697, "learning_rate": 4.812693017086151e-05, "loss": 0.1398, "step": 12248 }, { "epoch": 5.695025569502557, "grad_norm": 1.5865086317062378, "learning_rate": 4.809698831278225e-05, "loss": 0.1207, "step": 12250 }, { "epoch": 5.695955369595537, "grad_norm": 2.163966655731201, "learning_rate": 4.8066818496752936e-05, "loss": 0.1696, "step": 12252 }, { "epoch": 5.696885169688517, "grad_norm": 2.671772003173828, "learning_rate": 4.803642102053755e-05, "loss": 0.1397, "step": 12254 }, { "epoch": 5.697814969781497, "grad_norm": 2.1437079906463623, "learning_rate": 4.800579618414683e-05, "loss": 0.1061, "step": 12256 }, { "epoch": 5.698744769874477, "grad_norm": 2.395221471786499, "learning_rate": 4.797494428983559e-05, "loss": 0.1516, "step": 12258 }, { "epoch": 5.699674569967457, "grad_norm": 1.375505805015564, "learning_rate": 4.794386564209961e-05, "loss": 0.0932, "step": 12260 }, { "epoch": 5.700604370060437, "grad_norm": 1.620627760887146, "learning_rate": 4.791256054767251e-05, "loss": 0.1099, "step": 12262 }, { "epoch": 5.701534170153417, "grad_norm": 2.9099605083465576, "learning_rate": 4.788102931552302e-05, "loss": 0.1253, "step": 12264 }, { "epoch": 5.702463970246397, "grad_norm": 1.8653929233551025, "learning_rate": 4.7849272256851634e-05, "loss": 0.1458, "step": 12266 }, { "epoch": 5.703393770339377, "grad_norm": 3.427870750427246, "learning_rate": 4.781728968508766e-05, "loss": 0.1684, "step": 12268 }, { "epoch": 5.704323570432357, "grad_norm": 2.4086318016052246, "learning_rate": 4.77850819158862e-05, "loss": 0.114, "step": 12270 }, { "epoch": 5.705253370525337, "grad_norm": 1.5993238687515259, "learning_rate": 4.775264926712495e-05, "loss": 0.1269, "step": 12272 }, { "epoch": 5.706183170618317, "grad_norm": 1.5889217853546143, "learning_rate": 4.771999205890108e-05, "loss": 0.1166, "step": 12274 }, { "epoch": 5.707112970711297, "grad_norm": 1.9319357872009277, "learning_rate": 4.768711061352799e-05, "loss": 0.0996, "step": 12276 }, { "epoch": 5.708042770804277, "grad_norm": 3.0326993465423584, "learning_rate": 4.765400525553233e-05, "loss": 0.1297, "step": 12278 }, { "epoch": 5.708972570897257, "grad_norm": 2.3318824768066406, "learning_rate": 4.76206763116506e-05, "loss": 0.116, "step": 12280 }, { "epoch": 5.709902370990237, "grad_norm": 1.9912774562835693, "learning_rate": 4.7587124110825956e-05, "loss": 0.1073, "step": 12282 }, { "epoch": 5.710832171083217, "grad_norm": 3.1672215461730957, "learning_rate": 4.755334898420514e-05, "loss": 0.1319, "step": 12284 }, { "epoch": 5.711761971176197, "grad_norm": 2.182358980178833, "learning_rate": 4.7519351265135004e-05, "loss": 0.1077, "step": 12286 }, { "epoch": 5.712691771269177, "grad_norm": 2.331815481185913, "learning_rate": 4.748513128915935e-05, "loss": 0.1162, "step": 12288 }, { "epoch": 5.713621571362157, "grad_norm": 2.669405698776245, "learning_rate": 4.7450689394015495e-05, "loss": 0.1216, "step": 12290 }, { "epoch": 5.714551371455137, "grad_norm": 2.2362892627716064, "learning_rate": 4.741602591963097e-05, "loss": 0.1402, "step": 12292 }, { "epoch": 5.7154811715481175, "grad_norm": 2.5283377170562744, "learning_rate": 4.738114120812039e-05, "loss": 0.1136, "step": 12294 }, { "epoch": 5.716410971641097, "grad_norm": 1.9550998210906982, "learning_rate": 4.734603560378163e-05, "loss": 0.1273, "step": 12296 }, { "epoch": 5.717340771734078, "grad_norm": 2.4200563430786133, "learning_rate": 4.731070945309301e-05, "loss": 0.1217, "step": 12298 }, { "epoch": 5.718270571827057, "grad_norm": 1.718603253364563, "learning_rate": 4.727516310470929e-05, "loss": 0.0855, "step": 12300 }, { "epoch": 5.719200371920037, "grad_norm": 2.2833237648010254, "learning_rate": 4.7239396909458526e-05, "loss": 0.0974, "step": 12302 }, { "epoch": 5.720130172013017, "grad_norm": 2.135573148727417, "learning_rate": 4.720341122033871e-05, "loss": 0.0925, "step": 12304 }, { "epoch": 5.721059972105997, "grad_norm": 2.2947773933410645, "learning_rate": 4.7167206392513985e-05, "loss": 0.1404, "step": 12306 }, { "epoch": 5.721989772198977, "grad_norm": 2.134279727935791, "learning_rate": 4.7130782783311475e-05, "loss": 0.0848, "step": 12308 }, { "epoch": 5.722919572291957, "grad_norm": 2.4128003120422363, "learning_rate": 4.7094140752217376e-05, "loss": 0.1075, "step": 12310 }, { "epoch": 5.7238493723849375, "grad_norm": 1.5221009254455566, "learning_rate": 4.7057280660873896e-05, "loss": 0.1183, "step": 12312 }, { "epoch": 5.724779172477917, "grad_norm": 2.559770345687866, "learning_rate": 4.702020287307517e-05, "loss": 0.1469, "step": 12314 }, { "epoch": 5.725708972570898, "grad_norm": 2.3400259017944336, "learning_rate": 4.6982907754763974e-05, "loss": 0.1055, "step": 12316 }, { "epoch": 5.726638772663877, "grad_norm": 1.9097723960876465, "learning_rate": 4.694539567402813e-05, "loss": 0.0971, "step": 12318 }, { "epoch": 5.727568572756857, "grad_norm": 1.511232614517212, "learning_rate": 4.690766700109665e-05, "loss": 0.1214, "step": 12320 }, { "epoch": 5.728498372849837, "grad_norm": 1.9083517789840698, "learning_rate": 4.686972210833641e-05, "loss": 0.0919, "step": 12322 }, { "epoch": 5.729428172942817, "grad_norm": 2.8205490112304688, "learning_rate": 4.6831561370248077e-05, "loss": 0.1486, "step": 12324 }, { "epoch": 5.730357973035797, "grad_norm": 2.784832715988159, "learning_rate": 4.679318516346277e-05, "loss": 0.1228, "step": 12326 }, { "epoch": 5.731287773128777, "grad_norm": 1.4410370588302612, "learning_rate": 4.6754593866738215e-05, "loss": 0.124, "step": 12328 }, { "epoch": 5.7322175732217575, "grad_norm": 2.0227911472320557, "learning_rate": 4.6715787860954844e-05, "loss": 0.1133, "step": 12330 }, { "epoch": 5.733147373314737, "grad_norm": 3.0432851314544678, "learning_rate": 4.667676752911234e-05, "loss": 0.138, "step": 12332 }, { "epoch": 5.734077173407718, "grad_norm": 1.5445119142532349, "learning_rate": 4.663753325632559e-05, "loss": 0.0748, "step": 12334 }, { "epoch": 5.735006973500697, "grad_norm": 2.402883291244507, "learning_rate": 4.6598085429820976e-05, "loss": 0.101, "step": 12336 }, { "epoch": 5.735936773593678, "grad_norm": 3.7446131706237793, "learning_rate": 4.6558424438932673e-05, "loss": 0.1296, "step": 12338 }, { "epoch": 5.736866573686657, "grad_norm": 1.74805748462677, "learning_rate": 4.6518550675098644e-05, "loss": 0.0909, "step": 12340 }, { "epoch": 5.737796373779638, "grad_norm": 2.784475803375244, "learning_rate": 4.647846453185689e-05, "loss": 0.1021, "step": 12342 }, { "epoch": 5.738726173872617, "grad_norm": 2.0652284622192383, "learning_rate": 4.6438166404841415e-05, "loss": 0.1259, "step": 12344 }, { "epoch": 5.739655973965597, "grad_norm": 2.3885931968688965, "learning_rate": 4.6397656691778406e-05, "loss": 0.1092, "step": 12346 }, { "epoch": 5.7405857740585775, "grad_norm": 2.588369846343994, "learning_rate": 4.6356935792482486e-05, "loss": 0.1191, "step": 12348 }, { "epoch": 5.741515574151557, "grad_norm": 1.7510216236114502, "learning_rate": 4.63160041088524e-05, "loss": 0.0679, "step": 12350 }, { "epoch": 5.742445374244538, "grad_norm": 1.9478851556777954, "learning_rate": 4.627486204486737e-05, "loss": 0.136, "step": 12352 }, { "epoch": 5.743375174337517, "grad_norm": 1.6838325262069702, "learning_rate": 4.623351000658297e-05, "loss": 0.0956, "step": 12354 }, { "epoch": 5.744304974430498, "grad_norm": 2.0785634517669678, "learning_rate": 4.6191948402127156e-05, "loss": 0.1118, "step": 12356 }, { "epoch": 5.745234774523477, "grad_norm": 1.852612853050232, "learning_rate": 4.615017764169615e-05, "loss": 0.0983, "step": 12358 }, { "epoch": 5.746164574616458, "grad_norm": 1.2563927173614502, "learning_rate": 4.610819813755046e-05, "loss": 0.0648, "step": 12360 }, { "epoch": 5.747094374709437, "grad_norm": 1.6485228538513184, "learning_rate": 4.6066010304010916e-05, "loss": 0.0952, "step": 12362 }, { "epoch": 5.748024174802417, "grad_norm": 1.2189995050430298, "learning_rate": 4.602361455745431e-05, "loss": 0.0823, "step": 12364 }, { "epoch": 5.7489539748953975, "grad_norm": 1.9871772527694702, "learning_rate": 4.5981011316309596e-05, "loss": 0.074, "step": 12366 }, { "epoch": 5.749883774988378, "grad_norm": 2.710007905960083, "learning_rate": 4.593820100105363e-05, "loss": 0.1275, "step": 12368 }, { "epoch": 5.750813575081358, "grad_norm": 1.624398946762085, "learning_rate": 4.5895184034206826e-05, "loss": 0.1272, "step": 12370 }, { "epoch": 5.751743375174337, "grad_norm": 1.2696672677993774, "learning_rate": 4.585196084032937e-05, "loss": 0.0908, "step": 12372 }, { "epoch": 5.752673175267318, "grad_norm": 2.110342025756836, "learning_rate": 4.5808531846016655e-05, "loss": 0.1107, "step": 12374 }, { "epoch": 5.753602975360297, "grad_norm": 2.414073944091797, "learning_rate": 4.576489747989542e-05, "loss": 0.1641, "step": 12376 }, { "epoch": 5.754532775453278, "grad_norm": 2.1350786685943604, "learning_rate": 4.572105817261917e-05, "loss": 0.137, "step": 12378 }, { "epoch": 5.755462575546257, "grad_norm": 2.1611897945404053, "learning_rate": 4.567701435686411e-05, "loss": 0.1198, "step": 12380 }, { "epoch": 5.756392375639238, "grad_norm": 1.2804145812988281, "learning_rate": 4.563276646732508e-05, "loss": 0.0851, "step": 12382 }, { "epoch": 5.7573221757322175, "grad_norm": 1.9430269002914429, "learning_rate": 4.558831494071075e-05, "loss": 0.082, "step": 12384 }, { "epoch": 5.758251975825198, "grad_norm": 2.2528023719787598, "learning_rate": 4.5543660215739854e-05, "loss": 0.1168, "step": 12386 }, { "epoch": 5.759181775918178, "grad_norm": 2.320835590362549, "learning_rate": 4.5498802733136436e-05, "loss": 0.0886, "step": 12388 }, { "epoch": 5.760111576011157, "grad_norm": 1.8953158855438232, "learning_rate": 4.5453742935625685e-05, "loss": 0.0786, "step": 12390 }, { "epoch": 5.761041376104138, "grad_norm": 1.6290079355239868, "learning_rate": 4.540848126792973e-05, "loss": 0.1108, "step": 12392 }, { "epoch": 5.761971176197117, "grad_norm": 6.101031303405762, "learning_rate": 4.5363018176762795e-05, "loss": 0.1225, "step": 12394 }, { "epoch": 5.762900976290098, "grad_norm": 2.6799728870391846, "learning_rate": 4.531735411082743e-05, "loss": 0.1683, "step": 12396 }, { "epoch": 5.763830776383077, "grad_norm": 2.6068906784057617, "learning_rate": 4.527148952080939e-05, "loss": 0.1228, "step": 12398 }, { "epoch": 5.764760576476058, "grad_norm": 2.0909719467163086, "learning_rate": 4.5225424859373765e-05, "loss": 0.1309, "step": 12400 }, { "epoch": 5.7656903765690375, "grad_norm": 2.2844245433807373, "learning_rate": 4.517916058116012e-05, "loss": 0.103, "step": 12402 }, { "epoch": 5.766620176662018, "grad_norm": 2.5082273483276367, "learning_rate": 4.513269714277814e-05, "loss": 0.1134, "step": 12404 }, { "epoch": 5.767549976754998, "grad_norm": 2.341315984725952, "learning_rate": 4.508603500280325e-05, "loss": 0.0995, "step": 12406 }, { "epoch": 5.768479776847977, "grad_norm": 1.8019741773605347, "learning_rate": 4.503917462177195e-05, "loss": 0.1004, "step": 12408 }, { "epoch": 5.769409576940958, "grad_norm": 1.8710129261016846, "learning_rate": 4.499211646217733e-05, "loss": 0.1206, "step": 12410 }, { "epoch": 5.770339377033938, "grad_norm": 2.7136871814727783, "learning_rate": 4.494486098846438e-05, "loss": 0.1692, "step": 12412 }, { "epoch": 5.771269177126918, "grad_norm": 2.9637253284454346, "learning_rate": 4.489740866702547e-05, "loss": 0.1742, "step": 12414 }, { "epoch": 5.772198977219897, "grad_norm": 2.1291143894195557, "learning_rate": 4.484975996619599e-05, "loss": 0.1014, "step": 12416 }, { "epoch": 5.773128777312878, "grad_norm": 1.7829033136367798, "learning_rate": 4.480191535624926e-05, "loss": 0.0922, "step": 12418 }, { "epoch": 5.7740585774058575, "grad_norm": 2.0319175720214844, "learning_rate": 4.475387530939232e-05, "loss": 0.079, "step": 12420 }, { "epoch": 5.774988377498838, "grad_norm": 2.5195810794830322, "learning_rate": 4.4705640299761095e-05, "loss": 0.089, "step": 12422 }, { "epoch": 5.775918177591818, "grad_norm": 2.230316638946533, "learning_rate": 4.465721080341554e-05, "loss": 0.0912, "step": 12424 }, { "epoch": 5.776847977684798, "grad_norm": 2.2720651626586914, "learning_rate": 4.4608587298335354e-05, "loss": 0.1141, "step": 12426 }, { "epoch": 5.777777777777778, "grad_norm": 2.098383903503418, "learning_rate": 4.455977026441477e-05, "loss": 0.1257, "step": 12428 }, { "epoch": 5.778707577870758, "grad_norm": 2.4948818683624268, "learning_rate": 4.451076018345834e-05, "loss": 0.108, "step": 12430 }, { "epoch": 5.779637377963738, "grad_norm": 1.7528849840164185, "learning_rate": 4.446155753917571e-05, "loss": 0.0815, "step": 12432 }, { "epoch": 5.780567178056717, "grad_norm": 1.673179030418396, "learning_rate": 4.441216281717701e-05, "loss": 0.1101, "step": 12434 }, { "epoch": 5.781496978149698, "grad_norm": 2.504709482192993, "learning_rate": 4.436257650496842e-05, "loss": 0.0958, "step": 12436 }, { "epoch": 5.7824267782426775, "grad_norm": 1.5669493675231934, "learning_rate": 4.4312799091946664e-05, "loss": 0.0966, "step": 12438 }, { "epoch": 5.783356578335658, "grad_norm": 2.0608723163604736, "learning_rate": 4.426283106939482e-05, "loss": 0.1587, "step": 12440 }, { "epoch": 5.784286378428638, "grad_norm": 2.2176387310028076, "learning_rate": 4.4212672930476976e-05, "loss": 0.1376, "step": 12442 }, { "epoch": 5.785216178521618, "grad_norm": 1.6616874933242798, "learning_rate": 4.416232517023384e-05, "loss": 0.1326, "step": 12444 }, { "epoch": 5.786145978614598, "grad_norm": 1.9638333320617676, "learning_rate": 4.4111788285577416e-05, "loss": 0.1104, "step": 12446 }, { "epoch": 5.787075778707578, "grad_norm": 1.8168063163757324, "learning_rate": 4.4061062775286234e-05, "loss": 0.0763, "step": 12448 }, { "epoch": 5.788005578800558, "grad_norm": 2.11787748336792, "learning_rate": 4.401014914000083e-05, "loss": 0.1386, "step": 12450 }, { "epoch": 5.788935378893538, "grad_norm": 2.235992670059204, "learning_rate": 4.3959047882218085e-05, "loss": 0.0904, "step": 12452 }, { "epoch": 5.789865178986518, "grad_norm": 1.7824243307113647, "learning_rate": 4.390775950628687e-05, "loss": 0.0928, "step": 12454 }, { "epoch": 5.790794979079498, "grad_norm": 1.7108298540115356, "learning_rate": 4.3856284518402695e-05, "loss": 0.0661, "step": 12456 }, { "epoch": 5.791724779172478, "grad_norm": 2.131162643432617, "learning_rate": 4.380462342660286e-05, "loss": 0.0797, "step": 12458 }, { "epoch": 5.792654579265458, "grad_norm": 1.6208244562149048, "learning_rate": 4.375277674076159e-05, "loss": 0.066, "step": 12460 }, { "epoch": 5.793584379358438, "grad_norm": 2.230475664138794, "learning_rate": 4.370074497258458e-05, "loss": 0.1588, "step": 12462 }, { "epoch": 5.794514179451418, "grad_norm": 2.2380971908569336, "learning_rate": 4.36485286356046e-05, "loss": 0.1175, "step": 12464 }, { "epoch": 5.795443979544398, "grad_norm": 2.4453327655792236, "learning_rate": 4.35961282451757e-05, "loss": 0.1, "step": 12466 }, { "epoch": 5.796373779637378, "grad_norm": 2.144193649291992, "learning_rate": 4.354354431846853e-05, "loss": 0.1226, "step": 12468 }, { "epoch": 5.797303579730358, "grad_norm": 3.5475072860717773, "learning_rate": 4.3490777374465335e-05, "loss": 0.1469, "step": 12470 }, { "epoch": 5.798233379823338, "grad_norm": 2.8523640632629395, "learning_rate": 4.3437827933954404e-05, "loss": 0.1581, "step": 12472 }, { "epoch": 5.799163179916318, "grad_norm": 1.9079859256744385, "learning_rate": 4.338469651952549e-05, "loss": 0.0606, "step": 12474 }, { "epoch": 5.800092980009298, "grad_norm": 1.5889806747436523, "learning_rate": 4.333138365556407e-05, "loss": 0.0929, "step": 12476 }, { "epoch": 5.801022780102278, "grad_norm": 2.2937569618225098, "learning_rate": 4.327788986824664e-05, "loss": 0.0935, "step": 12478 }, { "epoch": 5.801952580195258, "grad_norm": 2.6413447856903076, "learning_rate": 4.322421568553536e-05, "loss": 0.1608, "step": 12480 }, { "epoch": 5.802882380288238, "grad_norm": 1.9301010370254517, "learning_rate": 4.3170361637172616e-05, "loss": 0.096, "step": 12482 }, { "epoch": 5.803812180381218, "grad_norm": 2.709564447402954, "learning_rate": 4.311632825467624e-05, "loss": 0.1136, "step": 12484 }, { "epoch": 5.804741980474198, "grad_norm": 1.2223458290100098, "learning_rate": 4.3062116071333794e-05, "loss": 0.1157, "step": 12486 }, { "epoch": 5.805671780567178, "grad_norm": 2.4365317821502686, "learning_rate": 4.3007725622197694e-05, "loss": 0.0957, "step": 12488 }, { "epoch": 5.806601580660158, "grad_norm": 1.4435033798217773, "learning_rate": 4.2953157444079774e-05, "loss": 0.0766, "step": 12490 }, { "epoch": 5.807531380753138, "grad_norm": 1.7843962907791138, "learning_rate": 4.2898412075545813e-05, "loss": 0.0965, "step": 12492 }, { "epoch": 5.808461180846118, "grad_norm": 2.004945755004883, "learning_rate": 4.28434900569106e-05, "loss": 0.1116, "step": 12494 }, { "epoch": 5.8093909809390984, "grad_norm": 2.6670989990234375, "learning_rate": 4.278839193023218e-05, "loss": 0.1402, "step": 12496 }, { "epoch": 5.810320781032078, "grad_norm": 2.0297765731811523, "learning_rate": 4.273311823930693e-05, "loss": 0.0961, "step": 12498 }, { "epoch": 5.8112505811250585, "grad_norm": 2.524228096008301, "learning_rate": 4.2677669529663805e-05, "loss": 0.1164, "step": 12500 }, { "epoch": 5.812180381218038, "grad_norm": 1.9763257503509521, "learning_rate": 4.2622046348559064e-05, "loss": 0.0913, "step": 12502 }, { "epoch": 5.813110181311018, "grad_norm": 1.8777437210083008, "learning_rate": 4.256624924497129e-05, "loss": 0.1053, "step": 12504 }, { "epoch": 5.814039981403998, "grad_norm": 1.764502763748169, "learning_rate": 4.2510278769595195e-05, "loss": 0.0609, "step": 12506 }, { "epoch": 5.814969781496978, "grad_norm": 1.8128196001052856, "learning_rate": 4.245413547483689e-05, "loss": 0.1054, "step": 12508 }, { "epoch": 5.815899581589958, "grad_norm": 1.6660940647125244, "learning_rate": 4.2397819914807964e-05, "loss": 0.1044, "step": 12510 }, { "epoch": 5.816829381682938, "grad_norm": 1.8121857643127441, "learning_rate": 4.23413326453202e-05, "loss": 0.0921, "step": 12512 }, { "epoch": 5.817759181775918, "grad_norm": 1.1861469745635986, "learning_rate": 4.228467422388027e-05, "loss": 0.0658, "step": 12514 }, { "epoch": 5.818688981868898, "grad_norm": 2.7674405574798584, "learning_rate": 4.222784520968372e-05, "loss": 0.1278, "step": 12516 }, { "epoch": 5.8196187819618785, "grad_norm": 1.7144279479980469, "learning_rate": 4.2170846163610254e-05, "loss": 0.0914, "step": 12518 }, { "epoch": 5.820548582054858, "grad_norm": 1.9853260517120361, "learning_rate": 4.21136776482173e-05, "loss": 0.099, "step": 12520 }, { "epoch": 5.821478382147838, "grad_norm": 2.853935718536377, "learning_rate": 4.2056340227734954e-05, "loss": 0.0856, "step": 12522 }, { "epoch": 5.822408182240818, "grad_norm": 2.088120937347412, "learning_rate": 4.199883446806056e-05, "loss": 0.1574, "step": 12524 }, { "epoch": 5.823337982333798, "grad_norm": 2.359513521194458, "learning_rate": 4.194116093675261e-05, "loss": 0.1093, "step": 12526 }, { "epoch": 5.824267782426778, "grad_norm": 1.9720423221588135, "learning_rate": 4.1883320203025694e-05, "loss": 0.0938, "step": 12528 }, { "epoch": 5.825197582519758, "grad_norm": 2.4064087867736816, "learning_rate": 4.182531283774432e-05, "loss": 0.1216, "step": 12530 }, { "epoch": 5.826127382612738, "grad_norm": 1.9201300144195557, "learning_rate": 4.176713941341803e-05, "loss": 0.1255, "step": 12532 }, { "epoch": 5.827057182705718, "grad_norm": 2.6424670219421387, "learning_rate": 4.170880050419489e-05, "loss": 0.1121, "step": 12534 }, { "epoch": 5.8279869827986985, "grad_norm": 2.3180654048919678, "learning_rate": 4.1650296685856325e-05, "loss": 0.1104, "step": 12536 }, { "epoch": 5.828916782891678, "grad_norm": 1.4629520177841187, "learning_rate": 4.1591628535811535e-05, "loss": 0.0572, "step": 12538 }, { "epoch": 5.829846582984659, "grad_norm": 1.9328056573867798, "learning_rate": 4.1532796633091336e-05, "loss": 0.0921, "step": 12540 }, { "epoch": 5.830776383077638, "grad_norm": 1.584564208984375, "learning_rate": 4.1473801558342996e-05, "loss": 0.0745, "step": 12542 }, { "epoch": 5.831706183170619, "grad_norm": 1.8288564682006836, "learning_rate": 4.1414643893823954e-05, "loss": 0.1291, "step": 12544 }, { "epoch": 5.832635983263598, "grad_norm": 1.7873016595840454, "learning_rate": 4.1355324223396537e-05, "loss": 0.1136, "step": 12546 }, { "epoch": 5.833565783356578, "grad_norm": 2.12092924118042, "learning_rate": 4.129584313252203e-05, "loss": 0.0949, "step": 12548 }, { "epoch": 5.834495583449558, "grad_norm": 1.4676612615585327, "learning_rate": 4.1236201208254617e-05, "loss": 0.0902, "step": 12550 }, { "epoch": 5.835425383542538, "grad_norm": 2.1044375896453857, "learning_rate": 4.1176399039236164e-05, "loss": 0.1137, "step": 12552 }, { "epoch": 5.8363551836355185, "grad_norm": 1.9668172597885132, "learning_rate": 4.111643721568988e-05, "loss": 0.0798, "step": 12554 }, { "epoch": 5.837284983728498, "grad_norm": 1.3889586925506592, "learning_rate": 4.1056316329414677e-05, "loss": 0.0833, "step": 12556 }, { "epoch": 5.838214783821479, "grad_norm": 2.692190170288086, "learning_rate": 4.099603697377949e-05, "loss": 0.09, "step": 12558 }, { "epoch": 5.839144583914458, "grad_norm": 1.3971818685531616, "learning_rate": 4.093559974371725e-05, "loss": 0.084, "step": 12560 }, { "epoch": 5.840074384007439, "grad_norm": 2.345846176147461, "learning_rate": 4.087500523571906e-05, "loss": 0.0885, "step": 12562 }, { "epoch": 5.841004184100418, "grad_norm": 1.5218727588653564, "learning_rate": 4.0814254047828184e-05, "loss": 0.1286, "step": 12564 }, { "epoch": 5.841933984193398, "grad_norm": 2.504326343536377, "learning_rate": 4.075334677963427e-05, "loss": 0.0775, "step": 12566 }, { "epoch": 5.842863784286378, "grad_norm": 1.6311426162719727, "learning_rate": 4.069228403226759e-05, "loss": 0.1239, "step": 12568 }, { "epoch": 5.843793584379359, "grad_norm": 2.467662811279297, "learning_rate": 4.063106640839268e-05, "loss": 0.103, "step": 12570 }, { "epoch": 5.8447233844723385, "grad_norm": 2.376504421234131, "learning_rate": 4.0569694512202825e-05, "loss": 0.0778, "step": 12572 }, { "epoch": 5.845653184565318, "grad_norm": 1.2858819961547852, "learning_rate": 4.0508168949413886e-05, "loss": 0.0975, "step": 12574 }, { "epoch": 5.846582984658299, "grad_norm": 1.455694556236267, "learning_rate": 4.044649032725839e-05, "loss": 0.0831, "step": 12576 }, { "epoch": 5.847512784751278, "grad_norm": 2.0077297687530518, "learning_rate": 4.0384659254479354e-05, "loss": 0.0929, "step": 12578 }, { "epoch": 5.848442584844259, "grad_norm": 1.165464162826538, "learning_rate": 4.0322676341324456e-05, "loss": 0.1052, "step": 12580 }, { "epoch": 5.849372384937238, "grad_norm": 2.562197208404541, "learning_rate": 4.0260542199540146e-05, "loss": 0.0941, "step": 12582 }, { "epoch": 5.850302185030219, "grad_norm": 2.578064441680908, "learning_rate": 4.0198257442365105e-05, "loss": 0.115, "step": 12584 }, { "epoch": 5.851231985123198, "grad_norm": 2.260519504547119, "learning_rate": 4.0135822684525044e-05, "loss": 0.1077, "step": 12586 }, { "epoch": 5.852161785216179, "grad_norm": 1.8755253553390503, "learning_rate": 4.007323854222566e-05, "loss": 0.0992, "step": 12588 }, { "epoch": 5.8530915853091585, "grad_norm": 1.2532405853271484, "learning_rate": 4.001050563314711e-05, "loss": 0.077, "step": 12590 }, { "epoch": 5.854021385402138, "grad_norm": 1.467947244644165, "learning_rate": 3.9947624576438016e-05, "loss": 0.1077, "step": 12592 }, { "epoch": 5.854951185495119, "grad_norm": 2.486992835998535, "learning_rate": 3.9884595992708886e-05, "loss": 0.1043, "step": 12594 }, { "epoch": 5.855880985588098, "grad_norm": 2.487854242324829, "learning_rate": 3.982142050402654e-05, "loss": 0.0979, "step": 12596 }, { "epoch": 5.856810785681079, "grad_norm": 2.182842969894409, "learning_rate": 3.97580987339074e-05, "loss": 0.0991, "step": 12598 }, { "epoch": 5.857740585774058, "grad_norm": 1.7774951457977295, "learning_rate": 3.969463130731183e-05, "loss": 0.1204, "step": 12600 }, { "epoch": 5.858670385867039, "grad_norm": 1.920225977897644, "learning_rate": 3.9631018850637794e-05, "loss": 0.0956, "step": 12602 }, { "epoch": 5.859600185960018, "grad_norm": 2.1961417198181152, "learning_rate": 3.9567261991714404e-05, "loss": 0.1028, "step": 12604 }, { "epoch": 5.860529986052999, "grad_norm": 1.7956180572509766, "learning_rate": 3.9503361359796284e-05, "loss": 0.0934, "step": 12606 }, { "epoch": 5.8614597861459785, "grad_norm": 2.163783311843872, "learning_rate": 3.943931758555678e-05, "loss": 0.0828, "step": 12608 }, { "epoch": 5.862389586238959, "grad_norm": 1.539665937423706, "learning_rate": 3.937513130108202e-05, "loss": 0.1042, "step": 12610 }, { "epoch": 5.863319386331939, "grad_norm": 2.0996832847595215, "learning_rate": 3.9310803139864795e-05, "loss": 0.1066, "step": 12612 }, { "epoch": 5.864249186424919, "grad_norm": 2.1134135723114014, "learning_rate": 3.924633373679808e-05, "loss": 0.1196, "step": 12614 }, { "epoch": 5.865178986517899, "grad_norm": 1.771055817604065, "learning_rate": 3.918172372816894e-05, "loss": 0.0647, "step": 12616 }, { "epoch": 5.866108786610878, "grad_norm": 1.5750805139541626, "learning_rate": 3.911697375165192e-05, "loss": 0.0632, "step": 12618 }, { "epoch": 5.867038586703859, "grad_norm": 2.2137041091918945, "learning_rate": 3.9052084446303294e-05, "loss": 0.1271, "step": 12620 }, { "epoch": 5.867968386796838, "grad_norm": 1.9085735082626343, "learning_rate": 3.898705645255425e-05, "loss": 0.0775, "step": 12622 }, { "epoch": 5.868898186889819, "grad_norm": 1.7608975172042847, "learning_rate": 3.8921890412204745e-05, "loss": 0.1157, "step": 12624 }, { "epoch": 5.8698279869827985, "grad_norm": 1.361556053161621, "learning_rate": 3.885658696841736e-05, "loss": 0.111, "step": 12626 }, { "epoch": 5.870757787075779, "grad_norm": 2.460414409637451, "learning_rate": 3.879114676571073e-05, "loss": 0.1112, "step": 12628 }, { "epoch": 5.871687587168759, "grad_norm": 1.975178837776184, "learning_rate": 3.872557044995331e-05, "loss": 0.1011, "step": 12630 }, { "epoch": 5.872617387261739, "grad_norm": 1.9638670682907104, "learning_rate": 3.865985866835679e-05, "loss": 0.0938, "step": 12632 }, { "epoch": 5.873547187354719, "grad_norm": 2.9590890407562256, "learning_rate": 3.8594012069469844e-05, "loss": 0.0964, "step": 12634 }, { "epoch": 5.874476987447698, "grad_norm": 2.3413777351379395, "learning_rate": 3.8528031303171976e-05, "loss": 0.1345, "step": 12636 }, { "epoch": 5.875406787540679, "grad_norm": 2.180675983428955, "learning_rate": 3.8461917020666545e-05, "loss": 0.1143, "step": 12638 }, { "epoch": 5.876336587633658, "grad_norm": 2.2291817665100098, "learning_rate": 3.8395669874474915e-05, "loss": 0.0867, "step": 12640 }, { "epoch": 5.877266387726639, "grad_norm": 2.1938183307647705, "learning_rate": 3.832929051842975e-05, "loss": 0.0881, "step": 12642 }, { "epoch": 5.8781961878196185, "grad_norm": 1.3347498178482056, "learning_rate": 3.826277960766836e-05, "loss": 0.093, "step": 12644 }, { "epoch": 5.879125987912599, "grad_norm": 1.2921854257583618, "learning_rate": 3.819613779862671e-05, "loss": 0.0928, "step": 12646 }, { "epoch": 5.880055788005579, "grad_norm": 1.3534587621688843, "learning_rate": 3.8129365749032406e-05, "loss": 0.0695, "step": 12648 }, { "epoch": 5.880985588098559, "grad_norm": 1.661928653717041, "learning_rate": 3.806246411789877e-05, "loss": 0.0755, "step": 12650 }, { "epoch": 5.881915388191539, "grad_norm": 2.7976043224334717, "learning_rate": 3.799543356551783e-05, "loss": 0.127, "step": 12652 }, { "epoch": 5.882845188284519, "grad_norm": 1.7153993844985962, "learning_rate": 3.7928274753453916e-05, "loss": 0.0997, "step": 12654 }, { "epoch": 5.883774988377499, "grad_norm": 2.345790147781372, "learning_rate": 3.78609883445377e-05, "loss": 0.0911, "step": 12656 }, { "epoch": 5.884704788470479, "grad_norm": 1.847261667251587, "learning_rate": 3.779357500285863e-05, "loss": 0.092, "step": 12658 }, { "epoch": 5.885634588563459, "grad_norm": 1.531227469444275, "learning_rate": 3.772603539375934e-05, "loss": 0.067, "step": 12660 }, { "epoch": 5.8865643886564385, "grad_norm": 2.2183663845062256, "learning_rate": 3.765837018382833e-05, "loss": 0.0644, "step": 12662 }, { "epoch": 5.887494188749419, "grad_norm": 2.347368001937866, "learning_rate": 3.759058004089409e-05, "loss": 0.0994, "step": 12664 }, { "epoch": 5.888423988842399, "grad_norm": 1.7099281549453735, "learning_rate": 3.752266563401786e-05, "loss": 0.1042, "step": 12666 }, { "epoch": 5.889353788935379, "grad_norm": 2.0705666542053223, "learning_rate": 3.745462763348727e-05, "loss": 0.1016, "step": 12668 }, { "epoch": 5.890283589028359, "grad_norm": 1.524127721786499, "learning_rate": 3.7386466710810234e-05, "loss": 0.0966, "step": 12670 }, { "epoch": 5.891213389121339, "grad_norm": 2.4282920360565186, "learning_rate": 3.73181835387073e-05, "loss": 0.0743, "step": 12672 }, { "epoch": 5.892143189214319, "grad_norm": 1.699442982673645, "learning_rate": 3.724977879110597e-05, "loss": 0.0801, "step": 12674 }, { "epoch": 5.893072989307299, "grad_norm": 1.8700140714645386, "learning_rate": 3.7181253143133404e-05, "loss": 0.0915, "step": 12676 }, { "epoch": 5.894002789400279, "grad_norm": 1.9323118925094604, "learning_rate": 3.711260727111001e-05, "loss": 0.0909, "step": 12678 }, { "epoch": 5.8949325894932585, "grad_norm": 1.984694242477417, "learning_rate": 3.704384185254299e-05, "loss": 0.1026, "step": 12680 }, { "epoch": 5.895862389586239, "grad_norm": 3.136770009994507, "learning_rate": 3.697495756611902e-05, "loss": 0.1061, "step": 12682 }, { "epoch": 5.896792189679219, "grad_norm": 1.6665529012680054, "learning_rate": 3.690595509169852e-05, "loss": 0.1045, "step": 12684 }, { "epoch": 5.897721989772199, "grad_norm": 2.2622690200805664, "learning_rate": 3.683683511030788e-05, "loss": 0.0855, "step": 12686 }, { "epoch": 5.898651789865179, "grad_norm": 2.891225814819336, "learning_rate": 3.6767598304133364e-05, "loss": 0.11, "step": 12688 }, { "epoch": 5.899581589958159, "grad_norm": 2.536715507507324, "learning_rate": 3.6698245356514436e-05, "loss": 0.0954, "step": 12690 }, { "epoch": 5.900511390051139, "grad_norm": 2.340327024459839, "learning_rate": 3.662877695193653e-05, "loss": 0.109, "step": 12692 }, { "epoch": 5.901441190144119, "grad_norm": 1.8911162614822388, "learning_rate": 3.655919377602483e-05, "loss": 0.0966, "step": 12694 }, { "epoch": 5.902370990237099, "grad_norm": 1.3123372793197632, "learning_rate": 3.648949651553727e-05, "loss": 0.0803, "step": 12696 }, { "epoch": 5.903300790330079, "grad_norm": 1.3063011169433594, "learning_rate": 3.6419685858357526e-05, "loss": 0.0527, "step": 12698 }, { "epoch": 5.904230590423059, "grad_norm": 1.593124270439148, "learning_rate": 3.634976249348875e-05, "loss": 0.0746, "step": 12700 }, { "epoch": 5.9051603905160395, "grad_norm": 2.112694263458252, "learning_rate": 3.627972711104617e-05, "loss": 0.1652, "step": 12702 }, { "epoch": 5.906090190609019, "grad_norm": 1.2948874235153198, "learning_rate": 3.62095804022509e-05, "loss": 0.0493, "step": 12704 }, { "epoch": 5.907019990701999, "grad_norm": 2.424405574798584, "learning_rate": 3.613932305942247e-05, "loss": 0.1243, "step": 12706 }, { "epoch": 5.907949790794979, "grad_norm": 1.776635766029358, "learning_rate": 3.606895577597256e-05, "loss": 0.0797, "step": 12708 }, { "epoch": 5.908879590887959, "grad_norm": 1.7711074352264404, "learning_rate": 3.599847924639795e-05, "loss": 0.1263, "step": 12710 }, { "epoch": 5.909809390980939, "grad_norm": 2.413684129714966, "learning_rate": 3.5927894166273356e-05, "loss": 0.0742, "step": 12712 }, { "epoch": 5.910739191073919, "grad_norm": 2.1432735919952393, "learning_rate": 3.585720123224519e-05, "loss": 0.0839, "step": 12714 }, { "epoch": 5.911668991166899, "grad_norm": 1.4422991275787354, "learning_rate": 3.5786401142024e-05, "loss": 0.0698, "step": 12716 }, { "epoch": 5.912598791259879, "grad_norm": 2.429687261581421, "learning_rate": 3.571549459437828e-05, "loss": 0.0749, "step": 12718 }, { "epoch": 5.9135285913528595, "grad_norm": 1.7488341331481934, "learning_rate": 3.5644482289126936e-05, "loss": 0.1088, "step": 12720 }, { "epoch": 5.914458391445839, "grad_norm": 1.5201761722564697, "learning_rate": 3.557336492713257e-05, "loss": 0.0928, "step": 12722 }, { "epoch": 5.915388191538819, "grad_norm": 2.1953327655792236, "learning_rate": 3.550214321029519e-05, "loss": 0.0826, "step": 12724 }, { "epoch": 5.916317991631799, "grad_norm": 2.4268558025360107, "learning_rate": 3.5430817841544126e-05, "loss": 0.0845, "step": 12726 }, { "epoch": 5.91724779172478, "grad_norm": 1.9725035429000854, "learning_rate": 3.535938952483215e-05, "loss": 0.1059, "step": 12728 }, { "epoch": 5.918177591817759, "grad_norm": 1.8085644245147705, "learning_rate": 3.528785896512781e-05, "loss": 0.1043, "step": 12730 }, { "epoch": 5.919107391910739, "grad_norm": 1.2001217603683472, "learning_rate": 3.521622686840878e-05, "loss": 0.0716, "step": 12732 }, { "epoch": 5.920037192003719, "grad_norm": 2.0317728519439697, "learning_rate": 3.5144493941655096e-05, "loss": 0.0799, "step": 12734 }, { "epoch": 5.920966992096699, "grad_norm": 2.595679998397827, "learning_rate": 3.507266089284154e-05, "loss": 0.1043, "step": 12736 }, { "epoch": 5.9218967921896795, "grad_norm": 2.126004934310913, "learning_rate": 3.500072843093163e-05, "loss": 0.084, "step": 12738 }, { "epoch": 5.922826592282659, "grad_norm": 2.143115520477295, "learning_rate": 3.492869726586957e-05, "loss": 0.103, "step": 12740 }, { "epoch": 5.92375639237564, "grad_norm": 1.4913634061813354, "learning_rate": 3.485656810857379e-05, "loss": 0.1104, "step": 12742 }, { "epoch": 5.924686192468619, "grad_norm": 2.264984130859375, "learning_rate": 3.4784341670930126e-05, "loss": 0.1157, "step": 12744 }, { "epoch": 5.9256159925616, "grad_norm": 1.1634900569915771, "learning_rate": 3.471201866578418e-05, "loss": 0.0682, "step": 12746 }, { "epoch": 5.926545792654579, "grad_norm": 1.645839810371399, "learning_rate": 3.463959980693498e-05, "loss": 0.071, "step": 12748 }, { "epoch": 5.927475592747559, "grad_norm": 1.932644248008728, "learning_rate": 3.4567085809127195e-05, "loss": 0.0495, "step": 12750 }, { "epoch": 5.928405392840539, "grad_norm": 2.60508394241333, "learning_rate": 3.449447738804502e-05, "loss": 0.1116, "step": 12752 }, { "epoch": 5.929335192933519, "grad_norm": 1.4826359748840332, "learning_rate": 3.44217752603041e-05, "loss": 0.0811, "step": 12754 }, { "epoch": 5.9302649930264995, "grad_norm": 2.840425968170166, "learning_rate": 3.434898014344501e-05, "loss": 0.0841, "step": 12756 }, { "epoch": 5.931194793119479, "grad_norm": 3.0595273971557617, "learning_rate": 3.427609275592632e-05, "loss": 0.1018, "step": 12758 }, { "epoch": 5.9321245932124596, "grad_norm": 2.9116806983947754, "learning_rate": 3.4203113817116957e-05, "loss": 0.0955, "step": 12760 }, { "epoch": 5.933054393305439, "grad_norm": 1.2570229768753052, "learning_rate": 3.413004404728975e-05, "loss": 0.0857, "step": 12762 }, { "epoch": 5.93398419339842, "grad_norm": 2.1901187896728516, "learning_rate": 3.405688416761365e-05, "loss": 0.1101, "step": 12764 }, { "epoch": 5.934913993491399, "grad_norm": 2.0596654415130615, "learning_rate": 3.398363490014724e-05, "loss": 0.0698, "step": 12766 }, { "epoch": 5.93584379358438, "grad_norm": 1.67765212059021, "learning_rate": 3.391029696783129e-05, "loss": 0.0882, "step": 12768 }, { "epoch": 5.936773593677359, "grad_norm": 1.531288981437683, "learning_rate": 3.383687109448141e-05, "loss": 0.0913, "step": 12770 }, { "epoch": 5.93770339377034, "grad_norm": 1.9222815036773682, "learning_rate": 3.37633580047815e-05, "loss": 0.0892, "step": 12772 }, { "epoch": 5.9386331938633194, "grad_norm": 1.721731185913086, "learning_rate": 3.3689758424276e-05, "loss": 0.0839, "step": 12774 }, { "epoch": 5.939562993956299, "grad_norm": 2.5128931999206543, "learning_rate": 3.3616073079362885e-05, "loss": 0.1292, "step": 12776 }, { "epoch": 5.9404927940492795, "grad_norm": 1.8511494398117065, "learning_rate": 3.354230269728709e-05, "loss": 0.0778, "step": 12778 }, { "epoch": 5.941422594142259, "grad_norm": 1.3415356874465942, "learning_rate": 3.346844800613225e-05, "loss": 0.0614, "step": 12780 }, { "epoch": 5.94235239423524, "grad_norm": 1.3587360382080078, "learning_rate": 3.339450973481453e-05, "loss": 0.0759, "step": 12782 }, { "epoch": 5.943282194328219, "grad_norm": 2.008603572845459, "learning_rate": 3.332048861307472e-05, "loss": 0.13, "step": 12784 }, { "epoch": 5.9442119944212, "grad_norm": 1.705855131149292, "learning_rate": 3.324638537147134e-05, "loss": 0.0637, "step": 12786 }, { "epoch": 5.945141794514179, "grad_norm": 1.9109724760055542, "learning_rate": 3.3172200741373624e-05, "loss": 0.1163, "step": 12788 }, { "epoch": 5.94607159460716, "grad_norm": 3.116912841796875, "learning_rate": 3.309793545495368e-05, "loss": 0.1649, "step": 12790 }, { "epoch": 5.947001394700139, "grad_norm": 2.441354274749756, "learning_rate": 3.302359024518022e-05, "loss": 0.1392, "step": 12792 }, { "epoch": 5.947931194793119, "grad_norm": 2.702509641647339, "learning_rate": 3.294916584581022e-05, "loss": 0.1075, "step": 12794 }, { "epoch": 5.9488609948860995, "grad_norm": 2.1541152000427246, "learning_rate": 3.2874662991382616e-05, "loss": 0.1186, "step": 12796 }, { "epoch": 5.949790794979079, "grad_norm": 1.784018874168396, "learning_rate": 3.280008241721041e-05, "loss": 0.1005, "step": 12798 }, { "epoch": 5.95072059507206, "grad_norm": 2.156798839569092, "learning_rate": 3.2725424859373684e-05, "loss": 0.1174, "step": 12800 }, { "epoch": 5.951650395165039, "grad_norm": 2.3077619075775146, "learning_rate": 3.2650691054712566e-05, "loss": 0.0881, "step": 12802 }, { "epoch": 5.95258019525802, "grad_norm": 2.1723971366882324, "learning_rate": 3.2575881740819273e-05, "loss": 0.1363, "step": 12804 }, { "epoch": 5.953509995350999, "grad_norm": 2.6105432510375977, "learning_rate": 3.250099765603188e-05, "loss": 0.1209, "step": 12806 }, { "epoch": 5.95443979544398, "grad_norm": 1.2066704034805298, "learning_rate": 3.242603953942589e-05, "loss": 0.075, "step": 12808 }, { "epoch": 5.955369595536959, "grad_norm": 1.3088715076446533, "learning_rate": 3.235100813080758e-05, "loss": 0.0945, "step": 12810 }, { "epoch": 5.95629939562994, "grad_norm": 2.4228732585906982, "learning_rate": 3.2275904170706824e-05, "loss": 0.0937, "step": 12812 }, { "epoch": 5.9572291957229195, "grad_norm": 1.9537429809570312, "learning_rate": 3.220072840036922e-05, "loss": 0.1136, "step": 12814 }, { "epoch": 5.9581589958159, "grad_norm": 1.8511638641357422, "learning_rate": 3.2125481561749436e-05, "loss": 0.1281, "step": 12816 }, { "epoch": 5.95908879590888, "grad_norm": 2.137538194656372, "learning_rate": 3.205016439750323e-05, "loss": 0.0639, "step": 12818 }, { "epoch": 5.960018596001859, "grad_norm": 2.0000016689300537, "learning_rate": 3.1974777650980695e-05, "loss": 0.0951, "step": 12820 }, { "epoch": 5.96094839609484, "grad_norm": 1.440139889717102, "learning_rate": 3.189932206621866e-05, "loss": 0.0761, "step": 12822 }, { "epoch": 5.961878196187819, "grad_norm": 1.487687587738037, "learning_rate": 3.182379838793311e-05, "loss": 0.0902, "step": 12824 }, { "epoch": 5.9628079962808, "grad_norm": 2.176578998565674, "learning_rate": 3.1748207361512437e-05, "loss": 0.0837, "step": 12826 }, { "epoch": 5.963737796373779, "grad_norm": 1.7468969821929932, "learning_rate": 3.1672549733009464e-05, "loss": 0.0969, "step": 12828 }, { "epoch": 5.96466759646676, "grad_norm": 2.064628839492798, "learning_rate": 3.159682624913436e-05, "loss": 0.0936, "step": 12830 }, { "epoch": 5.9655973965597395, "grad_norm": 1.8159832954406738, "learning_rate": 3.1521037657247416e-05, "loss": 0.1032, "step": 12832 }, { "epoch": 5.96652719665272, "grad_norm": 1.0413326025009155, "learning_rate": 3.144518470535145e-05, "loss": 0.061, "step": 12834 }, { "epoch": 5.9674569967457, "grad_norm": 2.242917776107788, "learning_rate": 3.1369268142084556e-05, "loss": 0.1128, "step": 12836 }, { "epoch": 5.968386796838679, "grad_norm": 1.360742211341858, "learning_rate": 3.1293288716712396e-05, "loss": 0.0554, "step": 12838 }, { "epoch": 5.96931659693166, "grad_norm": 1.603527307510376, "learning_rate": 3.121724717912139e-05, "loss": 0.0733, "step": 12840 }, { "epoch": 5.970246397024639, "grad_norm": 2.0500240325927734, "learning_rate": 3.114114427981073e-05, "loss": 0.1041, "step": 12842 }, { "epoch": 5.97117619711762, "grad_norm": 2.2164902687072754, "learning_rate": 3.106498076988521e-05, "loss": 0.0564, "step": 12844 }, { "epoch": 5.972105997210599, "grad_norm": 1.7220884561538696, "learning_rate": 3.098875740104803e-05, "loss": 0.0696, "step": 12846 }, { "epoch": 5.97303579730358, "grad_norm": 1.726553201675415, "learning_rate": 3.091247492559306e-05, "loss": 0.0698, "step": 12848 }, { "epoch": 5.9739655973965595, "grad_norm": 2.1800904273986816, "learning_rate": 3.083613409639763e-05, "loss": 0.0841, "step": 12850 }, { "epoch": 5.97489539748954, "grad_norm": 2.567237377166748, "learning_rate": 3.075973566691481e-05, "loss": 0.1294, "step": 12852 }, { "epoch": 5.97582519758252, "grad_norm": 1.6333156824111938, "learning_rate": 3.068328039116617e-05, "loss": 0.066, "step": 12854 }, { "epoch": 5.9767549976755, "grad_norm": 2.0698742866516113, "learning_rate": 3.060676902373458e-05, "loss": 0.1389, "step": 12856 }, { "epoch": 5.97768479776848, "grad_norm": 1.6617354154586792, "learning_rate": 3.053020231975619e-05, "loss": 0.0826, "step": 12858 }, { "epoch": 5.97861459786146, "grad_norm": 1.8813962936401367, "learning_rate": 3.0453581034913537e-05, "loss": 0.1194, "step": 12860 }, { "epoch": 5.97954439795444, "grad_norm": 2.305602550506592, "learning_rate": 3.0376905925427857e-05, "loss": 0.0899, "step": 12862 }, { "epoch": 5.980474198047419, "grad_norm": 1.5752153396606445, "learning_rate": 3.030017774805135e-05, "loss": 0.0772, "step": 12864 }, { "epoch": 5.9814039981404, "grad_norm": 2.1647207736968994, "learning_rate": 3.022339726006032e-05, "loss": 0.1, "step": 12866 }, { "epoch": 5.9823337982333795, "grad_norm": 2.3482136726379395, "learning_rate": 3.0146565219247022e-05, "loss": 0.0811, "step": 12868 }, { "epoch": 5.98326359832636, "grad_norm": 1.8554720878601074, "learning_rate": 3.006968238391285e-05, "loss": 0.0765, "step": 12870 }, { "epoch": 5.98419339841934, "grad_norm": 1.9653997421264648, "learning_rate": 2.999274951286026e-05, "loss": 0.085, "step": 12872 }, { "epoch": 5.98512319851232, "grad_norm": 1.212368130683899, "learning_rate": 2.991576736538547e-05, "loss": 0.0606, "step": 12874 }, { "epoch": 5.9860529986053, "grad_norm": 1.2822353839874268, "learning_rate": 2.983873670127152e-05, "loss": 0.0498, "step": 12876 }, { "epoch": 5.98698279869828, "grad_norm": 1.361193060874939, "learning_rate": 2.976165828077972e-05, "loss": 0.0705, "step": 12878 }, { "epoch": 5.98791259879126, "grad_norm": 1.9404796361923218, "learning_rate": 2.9684532864643143e-05, "loss": 0.0721, "step": 12880 }, { "epoch": 5.988842398884239, "grad_norm": 1.0866174697875977, "learning_rate": 2.9607361214058318e-05, "loss": 0.0746, "step": 12882 }, { "epoch": 5.98977219897722, "grad_norm": 1.7517999410629272, "learning_rate": 2.9530144090678462e-05, "loss": 0.0698, "step": 12884 }, { "epoch": 5.9907019990702, "grad_norm": 1.2198930978775024, "learning_rate": 2.9452882256605245e-05, "loss": 0.0495, "step": 12886 }, { "epoch": 5.99163179916318, "grad_norm": 2.493791341781616, "learning_rate": 2.9375576474381854e-05, "loss": 0.0818, "step": 12888 }, { "epoch": 5.99256159925616, "grad_norm": 1.7449913024902344, "learning_rate": 2.929822750698524e-05, "loss": 0.0973, "step": 12890 }, { "epoch": 5.99349139934914, "grad_norm": 2.0404646396636963, "learning_rate": 2.9220836117818307e-05, "loss": 0.0774, "step": 12892 }, { "epoch": 5.99442119944212, "grad_norm": 1.5566002130508423, "learning_rate": 2.9143403070703003e-05, "loss": 0.0786, "step": 12894 }, { "epoch": 5.9953509995351, "grad_norm": 1.849350929260254, "learning_rate": 2.9065929129872145e-05, "loss": 0.0987, "step": 12896 }, { "epoch": 5.99628079962808, "grad_norm": 1.5735865831375122, "learning_rate": 2.8988415059962173e-05, "loss": 0.0627, "step": 12898 }, { "epoch": 5.99721059972106, "grad_norm": 1.4633938074111938, "learning_rate": 2.8910861626005752e-05, "loss": 0.0541, "step": 12900 }, { "epoch": 5.99814039981404, "grad_norm": 1.4479475021362305, "learning_rate": 2.8833269593423957e-05, "loss": 0.0972, "step": 12902 }, { "epoch": 5.99907019990702, "grad_norm": 2.2298166751861572, "learning_rate": 2.875563972801892e-05, "loss": 0.1028, "step": 12904 }, { "epoch": 6.0, "grad_norm": 2.3352551460266113, "learning_rate": 2.8677972795965974e-05, "loss": 0.0751, "step": 12906 }, { "epoch": 6.00092980009298, "grad_norm": 1.3462495803833008, "learning_rate": 2.8600269563806302e-05, "loss": 0.0595, "step": 12908 }, { "epoch": 6.00185960018596, "grad_norm": 1.8382556438446045, "learning_rate": 2.8522530798439615e-05, "loss": 0.0647, "step": 12910 }, { "epoch": 6.00278940027894, "grad_norm": 1.1479978561401367, "learning_rate": 2.8444757267115958e-05, "loss": 0.0396, "step": 12912 }, { "epoch": 6.00371920037192, "grad_norm": 1.9803670644760132, "learning_rate": 2.8366949737428783e-05, "loss": 0.0925, "step": 12914 }, { "epoch": 6.0046490004649, "grad_norm": 0.8046181201934814, "learning_rate": 2.8289108977307077e-05, "loss": 0.0691, "step": 12916 }, { "epoch": 6.00557880055788, "grad_norm": 0.8234753012657166, "learning_rate": 2.821123575500755e-05, "loss": 0.0395, "step": 12918 }, { "epoch": 6.00650860065086, "grad_norm": 1.0858395099639893, "learning_rate": 2.813333083910763e-05, "loss": 0.0517, "step": 12920 }, { "epoch": 6.00743840074384, "grad_norm": 1.0481816530227661, "learning_rate": 2.8055394998497217e-05, "loss": 0.0588, "step": 12922 }, { "epoch": 6.00836820083682, "grad_norm": 1.8355960845947266, "learning_rate": 2.7977429002371774e-05, "loss": 0.0417, "step": 12924 }, { "epoch": 6.0092980009298005, "grad_norm": 1.0734847784042358, "learning_rate": 2.7899433620224022e-05, "loss": 0.0422, "step": 12926 }, { "epoch": 6.01022780102278, "grad_norm": 0.9723220467567444, "learning_rate": 2.782140962183699e-05, "loss": 0.0506, "step": 12928 }, { "epoch": 6.01115760111576, "grad_norm": 1.3237355947494507, "learning_rate": 2.774335777727613e-05, "loss": 0.0429, "step": 12930 }, { "epoch": 6.01208740120874, "grad_norm": 1.5607668161392212, "learning_rate": 2.766527885688146e-05, "loss": 0.0657, "step": 12932 }, { "epoch": 6.01301720130172, "grad_norm": 1.373487114906311, "learning_rate": 2.758717363126057e-05, "loss": 0.0541, "step": 12934 }, { "epoch": 6.0139470013947, "grad_norm": 0.9166557788848877, "learning_rate": 2.750904287128034e-05, "loss": 0.0347, "step": 12936 }, { "epoch": 6.01487680148768, "grad_norm": 1.2642649412155151, "learning_rate": 2.743088734806e-05, "loss": 0.045, "step": 12938 }, { "epoch": 6.01580660158066, "grad_norm": 1.464198350906372, "learning_rate": 2.7352707832962926e-05, "loss": 0.062, "step": 12940 }, { "epoch": 6.01673640167364, "grad_norm": 1.492685317993164, "learning_rate": 2.7274505097589185e-05, "loss": 0.0596, "step": 12942 }, { "epoch": 6.0176662017666205, "grad_norm": 1.5413881540298462, "learning_rate": 2.7196279913768567e-05, "loss": 0.044, "step": 12944 }, { "epoch": 6.0185960018596, "grad_norm": 0.7860512733459473, "learning_rate": 2.711803305355179e-05, "loss": 0.0543, "step": 12946 }, { "epoch": 6.019525801952581, "grad_norm": 1.5613597631454468, "learning_rate": 2.7039765289203936e-05, "loss": 0.0451, "step": 12948 }, { "epoch": 6.02045560204556, "grad_norm": 0.8996490836143494, "learning_rate": 2.696147739319617e-05, "loss": 0.0334, "step": 12950 }, { "epoch": 6.02138540213854, "grad_norm": 0.9184300899505615, "learning_rate": 2.6883170138198323e-05, "loss": 0.0274, "step": 12952 }, { "epoch": 6.02231520223152, "grad_norm": 1.4983773231506348, "learning_rate": 2.680484429707157e-05, "loss": 0.0693, "step": 12954 }, { "epoch": 6.0232450023245, "grad_norm": 0.98252272605896, "learning_rate": 2.6726500642860072e-05, "loss": 0.0396, "step": 12956 }, { "epoch": 6.02417480241748, "grad_norm": 1.0303034782409668, "learning_rate": 2.6648139948784423e-05, "loss": 0.0422, "step": 12958 }, { "epoch": 6.02510460251046, "grad_norm": 0.8282851576805115, "learning_rate": 2.6569762988232862e-05, "loss": 0.0388, "step": 12960 }, { "epoch": 6.0260344026034405, "grad_norm": 1.506130576133728, "learning_rate": 2.6491370534754254e-05, "loss": 0.044, "step": 12962 }, { "epoch": 6.02696420269642, "grad_norm": 1.2245745658874512, "learning_rate": 2.641296336205065e-05, "loss": 0.0457, "step": 12964 }, { "epoch": 6.027894002789401, "grad_norm": 1.722273826599121, "learning_rate": 2.6334542243969007e-05, "loss": 0.0612, "step": 12966 }, { "epoch": 6.02882380288238, "grad_norm": 2.019249677658081, "learning_rate": 2.625610795449428e-05, "loss": 0.0677, "step": 12968 }, { "epoch": 6.029753602975361, "grad_norm": 0.7777581810951233, "learning_rate": 2.6177661267740977e-05, "loss": 0.0302, "step": 12970 }, { "epoch": 6.03068340306834, "grad_norm": 0.9843727350234985, "learning_rate": 2.6099202957946583e-05, "loss": 0.044, "step": 12972 }, { "epoch": 6.03161320316132, "grad_norm": 0.6569740772247314, "learning_rate": 2.602073379946276e-05, "loss": 0.0272, "step": 12974 }, { "epoch": 6.0325430032543, "grad_norm": 1.0775038003921509, "learning_rate": 2.5942254566748334e-05, "loss": 0.0226, "step": 12976 }, { "epoch": 6.03347280334728, "grad_norm": 1.019217848777771, "learning_rate": 2.5863766034361828e-05, "loss": 0.0743, "step": 12978 }, { "epoch": 6.0344026034402605, "grad_norm": 0.9996134638786316, "learning_rate": 2.578526897695319e-05, "loss": 0.0371, "step": 12980 }, { "epoch": 6.03533240353324, "grad_norm": 1.221914529800415, "learning_rate": 2.5706764169256774e-05, "loss": 0.0436, "step": 12982 }, { "epoch": 6.036262203626221, "grad_norm": 0.9551999568939209, "learning_rate": 2.5628252386083427e-05, "loss": 0.0287, "step": 12984 }, { "epoch": 6.0371920037192, "grad_norm": 1.8682180643081665, "learning_rate": 2.5549734402312576e-05, "loss": 0.0541, "step": 12986 }, { "epoch": 6.038121803812181, "grad_norm": 0.564440906047821, "learning_rate": 2.5471210992885206e-05, "loss": 0.0477, "step": 12988 }, { "epoch": 6.03905160390516, "grad_norm": 1.490181803703308, "learning_rate": 2.539268293279548e-05, "loss": 0.0439, "step": 12990 }, { "epoch": 6.039981403998141, "grad_norm": 1.515268325805664, "learning_rate": 2.531415099708383e-05, "loss": 0.0534, "step": 12992 }, { "epoch": 6.04091120409112, "grad_norm": 0.780473530292511, "learning_rate": 2.5235615960828663e-05, "loss": 0.0301, "step": 12994 }, { "epoch": 6.0418410041841, "grad_norm": 1.549038052558899, "learning_rate": 2.5157078599138906e-05, "loss": 0.0659, "step": 12996 }, { "epoch": 6.0427708042770805, "grad_norm": 0.6811447143554688, "learning_rate": 2.507853968714697e-05, "loss": 0.0365, "step": 12998 }, { "epoch": 6.04370060437006, "grad_norm": 1.3991062641143799, "learning_rate": 2.499999999999994e-05, "loss": 0.0606, "step": 13000 }, { "epoch": 6.04370060437006, "eval_cer": 0.15159460546981476, "eval_loss": 0.227281391620636, "eval_runtime": 403.2578, "eval_samples_per_second": 31.479, "eval_steps_per_second": 0.984, "step": 13000 }, { "epoch": 6.044630404463041, "grad_norm": 1.4816045761108398, "learning_rate": 2.4921460312852997e-05, "loss": 0.0607, "step": 13002 }, { "epoch": 6.04556020455602, "grad_norm": 1.337095022201538, "learning_rate": 2.4842921400861063e-05, "loss": 0.042, "step": 13004 }, { "epoch": 6.046490004649001, "grad_norm": 1.093112826347351, "learning_rate": 2.4764384039171397e-05, "loss": 0.0385, "step": 13006 }, { "epoch": 6.04741980474198, "grad_norm": 1.004042625427246, "learning_rate": 2.4685849002916234e-05, "loss": 0.0642, "step": 13008 }, { "epoch": 6.048349604834961, "grad_norm": 0.9545464515686035, "learning_rate": 2.4607317067204404e-05, "loss": 0.0406, "step": 13010 }, { "epoch": 6.04927940492794, "grad_norm": 1.2444086074829102, "learning_rate": 2.4528789007114766e-05, "loss": 0.0462, "step": 13012 }, { "epoch": 6.050209205020921, "grad_norm": 1.2632994651794434, "learning_rate": 2.44502655976873e-05, "loss": 0.0694, "step": 13014 }, { "epoch": 6.0511390051139005, "grad_norm": 1.9290745258331299, "learning_rate": 2.437174761391654e-05, "loss": 0.0424, "step": 13016 }, { "epoch": 6.052068805206881, "grad_norm": 1.4175277948379517, "learning_rate": 2.429323583074319e-05, "loss": 0.093, "step": 13018 }, { "epoch": 6.052998605299861, "grad_norm": 0.7390515804290771, "learning_rate": 2.421473102304678e-05, "loss": 0.0486, "step": 13020 }, { "epoch": 6.05392840539284, "grad_norm": 1.3935467004776, "learning_rate": 2.4136233965638232e-05, "loss": 0.0395, "step": 13022 }, { "epoch": 6.054858205485821, "grad_norm": 1.6413702964782715, "learning_rate": 2.4057745433251547e-05, "loss": 0.0441, "step": 13024 }, { "epoch": 6.0557880055788, "grad_norm": 1.2296993732452393, "learning_rate": 2.3979266200537208e-05, "loss": 0.0447, "step": 13026 }, { "epoch": 6.056717805671781, "grad_norm": 1.478309154510498, "learning_rate": 2.3900797042053382e-05, "loss": 0.0622, "step": 13028 }, { "epoch": 6.05764760576476, "grad_norm": 1.860205054283142, "learning_rate": 2.38223387322589e-05, "loss": 0.0582, "step": 13030 }, { "epoch": 6.058577405857741, "grad_norm": 1.8541343212127686, "learning_rate": 2.3743892045505777e-05, "loss": 0.0715, "step": 13032 }, { "epoch": 6.0595072059507205, "grad_norm": 1.5203773975372314, "learning_rate": 2.3665457756030964e-05, "loss": 0.0519, "step": 13034 }, { "epoch": 6.060437006043701, "grad_norm": 1.3108654022216797, "learning_rate": 2.358703663794941e-05, "loss": 0.0448, "step": 13036 }, { "epoch": 6.0613668061366806, "grad_norm": 1.069931983947754, "learning_rate": 2.350862946524572e-05, "loss": 0.0378, "step": 13038 }, { "epoch": 6.062296606229661, "grad_norm": 1.6504982709884644, "learning_rate": 2.3430237011767113e-05, "loss": 0.0509, "step": 13040 }, { "epoch": 6.063226406322641, "grad_norm": 2.1563076972961426, "learning_rate": 2.3351860051215552e-05, "loss": 0.0617, "step": 13042 }, { "epoch": 6.06415620641562, "grad_norm": 1.223863124847412, "learning_rate": 2.327349935713981e-05, "loss": 0.0414, "step": 13044 }, { "epoch": 6.065086006508601, "grad_norm": 1.3791073560714722, "learning_rate": 2.319515570292849e-05, "loss": 0.0476, "step": 13046 }, { "epoch": 6.06601580660158, "grad_norm": 1.325822353363037, "learning_rate": 2.3116829861801737e-05, "loss": 0.0477, "step": 13048 }, { "epoch": 6.066945606694561, "grad_norm": 0.7990695238113403, "learning_rate": 2.3038522606803893e-05, "loss": 0.0778, "step": 13050 }, { "epoch": 6.0678754067875404, "grad_norm": 1.2880779504776, "learning_rate": 2.2960234710796036e-05, "loss": 0.0411, "step": 13052 }, { "epoch": 6.068805206880521, "grad_norm": 1.5798134803771973, "learning_rate": 2.2881966946448102e-05, "loss": 0.077, "step": 13054 }, { "epoch": 6.0697350069735005, "grad_norm": 1.526837944984436, "learning_rate": 2.280372008623141e-05, "loss": 0.0577, "step": 13056 }, { "epoch": 6.070664807066481, "grad_norm": 1.1948294639587402, "learning_rate": 2.2725494902410702e-05, "loss": 0.0585, "step": 13058 }, { "epoch": 6.071594607159461, "grad_norm": 1.6175339221954346, "learning_rate": 2.264729216703714e-05, "loss": 0.047, "step": 13060 }, { "epoch": 6.072524407252441, "grad_norm": 1.498956561088562, "learning_rate": 2.2569112651940067e-05, "loss": 0.0388, "step": 13062 }, { "epoch": 6.073454207345421, "grad_norm": 1.5509850978851318, "learning_rate": 2.249095712871955e-05, "loss": 0.0791, "step": 13064 }, { "epoch": 6.0743840074384, "grad_norm": 1.0245441198349, "learning_rate": 2.2412826368739406e-05, "loss": 0.0387, "step": 13066 }, { "epoch": 6.075313807531381, "grad_norm": 1.4616271257400513, "learning_rate": 2.2334721143118426e-05, "loss": 0.0495, "step": 13068 }, { "epoch": 6.07624360762436, "grad_norm": 1.1600021123886108, "learning_rate": 2.2256642222723842e-05, "loss": 0.0566, "step": 13070 }, { "epoch": 6.077173407717341, "grad_norm": 1.3164331912994385, "learning_rate": 2.217859037816298e-05, "loss": 0.0298, "step": 13072 }, { "epoch": 6.0781032078103205, "grad_norm": 1.2871218919754028, "learning_rate": 2.2100566379775946e-05, "loss": 0.047, "step": 13074 }, { "epoch": 6.079033007903301, "grad_norm": 1.3747154474258423, "learning_rate": 2.2022570997628286e-05, "loss": 0.0564, "step": 13076 }, { "epoch": 6.079962807996281, "grad_norm": 1.0192880630493164, "learning_rate": 2.1944605001502664e-05, "loss": 0.0547, "step": 13078 }, { "epoch": 6.080892608089261, "grad_norm": 1.0986518859863281, "learning_rate": 2.1866669160892343e-05, "loss": 0.0484, "step": 13080 }, { "epoch": 6.081822408182241, "grad_norm": 1.1293498277664185, "learning_rate": 2.1788764244992427e-05, "loss": 0.0434, "step": 13082 }, { "epoch": 6.082752208275221, "grad_norm": 0.8628703355789185, "learning_rate": 2.1710891022692898e-05, "loss": 0.05, "step": 13084 }, { "epoch": 6.083682008368201, "grad_norm": 0.6597083806991577, "learning_rate": 2.1633050262571196e-05, "loss": 0.0189, "step": 13086 }, { "epoch": 6.08461180846118, "grad_norm": 1.4381951093673706, "learning_rate": 2.1555242732884017e-05, "loss": 0.0368, "step": 13088 }, { "epoch": 6.085541608554161, "grad_norm": 1.5643795728683472, "learning_rate": 2.147746920156045e-05, "loss": 0.0404, "step": 13090 }, { "epoch": 6.0864714086471405, "grad_norm": 1.129448652267456, "learning_rate": 2.139973043619367e-05, "loss": 0.0552, "step": 13092 }, { "epoch": 6.087401208740121, "grad_norm": 1.2390077114105225, "learning_rate": 2.1322027204034005e-05, "loss": 0.0452, "step": 13094 }, { "epoch": 6.088331008833101, "grad_norm": 1.0587831735610962, "learning_rate": 2.124436027198106e-05, "loss": 0.0358, "step": 13096 }, { "epoch": 6.089260808926081, "grad_norm": 1.4319461584091187, "learning_rate": 2.1166730406575934e-05, "loss": 0.0633, "step": 13098 }, { "epoch": 6.090190609019061, "grad_norm": 1.3271284103393555, "learning_rate": 2.1089138373994223e-05, "loss": 0.0414, "step": 13100 }, { "epoch": 6.091120409112041, "grad_norm": 0.8073123693466187, "learning_rate": 2.10115849400378e-05, "loss": 0.057, "step": 13102 }, { "epoch": 6.092050209205021, "grad_norm": 1.0994857549667358, "learning_rate": 2.0934070870127915e-05, "loss": 0.0397, "step": 13104 }, { "epoch": 6.092980009298001, "grad_norm": 0.8345053791999817, "learning_rate": 2.0856596929296975e-05, "loss": 0.0405, "step": 13106 }, { "epoch": 6.093909809390981, "grad_norm": 1.6601994037628174, "learning_rate": 2.0779163882181584e-05, "loss": 0.0619, "step": 13108 }, { "epoch": 6.0948396094839605, "grad_norm": 1.0046833753585815, "learning_rate": 2.070177249301474e-05, "loss": 0.037, "step": 13110 }, { "epoch": 6.095769409576941, "grad_norm": 1.4808748960494995, "learning_rate": 2.062442352561804e-05, "loss": 0.0778, "step": 13112 }, { "epoch": 6.096699209669921, "grad_norm": 1.0771121978759766, "learning_rate": 2.0547117743394738e-05, "loss": 0.0375, "step": 13114 }, { "epoch": 6.097629009762901, "grad_norm": 0.8685406446456909, "learning_rate": 2.0469855909321604e-05, "loss": 0.0399, "step": 13116 }, { "epoch": 6.098558809855881, "grad_norm": 0.8798629641532898, "learning_rate": 2.0392638785941667e-05, "loss": 0.0485, "step": 13118 }, { "epoch": 6.099488609948861, "grad_norm": 0.49144911766052246, "learning_rate": 2.0315467135356843e-05, "loss": 0.0319, "step": 13120 }, { "epoch": 6.100418410041841, "grad_norm": 1.078231692314148, "learning_rate": 2.0238341719220173e-05, "loss": 0.034, "step": 13122 }, { "epoch": 6.101348210134821, "grad_norm": 1.198488473892212, "learning_rate": 2.016126329872846e-05, "loss": 0.0459, "step": 13124 }, { "epoch": 6.102278010227801, "grad_norm": 1.3041963577270508, "learning_rate": 2.0084232634614513e-05, "loss": 0.0486, "step": 13126 }, { "epoch": 6.103207810320781, "grad_norm": 1.7090091705322266, "learning_rate": 2.0007250487139802e-05, "loss": 0.0562, "step": 13128 }, { "epoch": 6.104137610413761, "grad_norm": 1.155657410621643, "learning_rate": 1.9930317616087213e-05, "loss": 0.042, "step": 13130 }, { "epoch": 6.105067410506741, "grad_norm": 0.8386728763580322, "learning_rate": 1.9853434780752953e-05, "loss": 0.0341, "step": 13132 }, { "epoch": 6.105997210599721, "grad_norm": 1.0768616199493408, "learning_rate": 1.9776602739939656e-05, "loss": 0.0386, "step": 13134 }, { "epoch": 6.106927010692701, "grad_norm": 1.2748912572860718, "learning_rate": 1.969982225194863e-05, "loss": 0.0483, "step": 13136 }, { "epoch": 6.107856810785681, "grad_norm": 1.3845505714416504, "learning_rate": 1.9623094074572126e-05, "loss": 0.041, "step": 13138 }, { "epoch": 6.108786610878661, "grad_norm": 1.1454384326934814, "learning_rate": 1.9546418965086445e-05, "loss": 0.0526, "step": 13140 }, { "epoch": 6.109716410971641, "grad_norm": 1.3649134635925293, "learning_rate": 1.946979768024379e-05, "loss": 0.0445, "step": 13142 }, { "epoch": 6.110646211064621, "grad_norm": 1.5899869203567505, "learning_rate": 1.939323097626549e-05, "loss": 0.0641, "step": 13144 }, { "epoch": 6.111576011157601, "grad_norm": 0.9365867972373962, "learning_rate": 1.931671960883382e-05, "loss": 0.0629, "step": 13146 }, { "epoch": 6.112505811250581, "grad_norm": 1.2011761665344238, "learning_rate": 1.924026433308518e-05, "loss": 0.0481, "step": 13148 }, { "epoch": 6.1134356113435615, "grad_norm": 2.0893819332122803, "learning_rate": 1.916386590360236e-05, "loss": 0.0755, "step": 13150 }, { "epoch": 6.114365411436541, "grad_norm": 1.084583044052124, "learning_rate": 1.9087525074406838e-05, "loss": 0.045, "step": 13152 }, { "epoch": 6.115295211529521, "grad_norm": 1.1330134868621826, "learning_rate": 1.9011242598951956e-05, "loss": 0.0635, "step": 13154 }, { "epoch": 6.116225011622501, "grad_norm": 0.9065934419631958, "learning_rate": 1.893501923011478e-05, "loss": 0.043, "step": 13156 }, { "epoch": 6.117154811715481, "grad_norm": 1.229514718055725, "learning_rate": 1.885885572018935e-05, "loss": 0.0399, "step": 13158 }, { "epoch": 6.118084611808461, "grad_norm": 0.8626127243041992, "learning_rate": 1.878275282087869e-05, "loss": 0.0487, "step": 13160 }, { "epoch": 6.119014411901441, "grad_norm": 0.8463934659957886, "learning_rate": 1.8706711283287505e-05, "loss": 0.0189, "step": 13162 }, { "epoch": 6.119944211994421, "grad_norm": 0.9374286532402039, "learning_rate": 1.8630731857915426e-05, "loss": 0.033, "step": 13164 }, { "epoch": 6.120874012087401, "grad_norm": 1.1279456615447998, "learning_rate": 1.8554815294648446e-05, "loss": 0.053, "step": 13166 }, { "epoch": 6.1218038121803815, "grad_norm": 1.3347184658050537, "learning_rate": 1.8478962342752566e-05, "loss": 0.0406, "step": 13168 }, { "epoch": 6.122733612273361, "grad_norm": 0.721285879611969, "learning_rate": 1.8403173750865712e-05, "loss": 0.0323, "step": 13170 }, { "epoch": 6.123663412366342, "grad_norm": 1.0034668445587158, "learning_rate": 1.832745026699061e-05, "loss": 0.0312, "step": 13172 }, { "epoch": 6.124593212459321, "grad_norm": 2.9105327129364014, "learning_rate": 1.8251792638487556e-05, "loss": 0.0692, "step": 13174 }, { "epoch": 6.125523012552302, "grad_norm": 0.7293165922164917, "learning_rate": 1.8176201612066797e-05, "loss": 0.033, "step": 13176 }, { "epoch": 6.126452812645281, "grad_norm": 0.9135260581970215, "learning_rate": 1.810067793378133e-05, "loss": 0.0467, "step": 13178 }, { "epoch": 6.127382612738261, "grad_norm": 1.4599963426589966, "learning_rate": 1.8025222349019287e-05, "loss": 0.0652, "step": 13180 }, { "epoch": 6.128312412831241, "grad_norm": 1.205283761024475, "learning_rate": 1.7949835602496742e-05, "loss": 0.0313, "step": 13182 }, { "epoch": 6.129242212924221, "grad_norm": 0.8146945834159851, "learning_rate": 1.7874518438250624e-05, "loss": 0.0426, "step": 13184 }, { "epoch": 6.1301720130172015, "grad_norm": 0.6396738290786743, "learning_rate": 1.779927159963076e-05, "loss": 0.0242, "step": 13186 }, { "epoch": 6.131101813110181, "grad_norm": 1.128727674484253, "learning_rate": 1.7724095829293155e-05, "loss": 0.0611, "step": 13188 }, { "epoch": 6.132031613203162, "grad_norm": 2.0595967769622803, "learning_rate": 1.7648991869192314e-05, "loss": 0.0413, "step": 13190 }, { "epoch": 6.132961413296141, "grad_norm": 0.7434269785881042, "learning_rate": 1.757396046057409e-05, "loss": 0.0221, "step": 13192 }, { "epoch": 6.133891213389122, "grad_norm": 1.3250168561935425, "learning_rate": 1.74990023439681e-05, "loss": 0.0408, "step": 13194 }, { "epoch": 6.134821013482101, "grad_norm": 1.6306651830673218, "learning_rate": 1.7424118259180624e-05, "loss": 0.0511, "step": 13196 }, { "epoch": 6.135750813575082, "grad_norm": 1.8405256271362305, "learning_rate": 1.73493089452875e-05, "loss": 0.0717, "step": 13198 }, { "epoch": 6.136680613668061, "grad_norm": 0.9498974680900574, "learning_rate": 1.727457514062629e-05, "loss": 0.0566, "step": 13200 }, { "epoch": 6.137610413761041, "grad_norm": 0.7192453742027283, "learning_rate": 1.7199917582789565e-05, "loss": 0.0262, "step": 13202 }, { "epoch": 6.1385402138540215, "grad_norm": 1.0258151292800903, "learning_rate": 1.7125337008617362e-05, "loss": 0.0279, "step": 13204 }, { "epoch": 6.139470013947001, "grad_norm": 1.1941250562667847, "learning_rate": 1.7050834154189672e-05, "loss": 0.0396, "step": 13206 }, { "epoch": 6.140399814039982, "grad_norm": 1.0143848657608032, "learning_rate": 1.6976409754819754e-05, "loss": 0.0262, "step": 13208 }, { "epoch": 6.141329614132961, "grad_norm": 1.5423864126205444, "learning_rate": 1.690206454504622e-05, "loss": 0.0402, "step": 13210 }, { "epoch": 6.142259414225942, "grad_norm": 2.0843234062194824, "learning_rate": 1.682779925862644e-05, "loss": 0.0467, "step": 13212 }, { "epoch": 6.143189214318921, "grad_norm": 0.6658696532249451, "learning_rate": 1.6753614628528723e-05, "loss": 0.0245, "step": 13214 }, { "epoch": 6.144119014411902, "grad_norm": 1.2941607236862183, "learning_rate": 1.6679511386925255e-05, "loss": 0.038, "step": 13216 }, { "epoch": 6.145048814504881, "grad_norm": 1.5187844038009644, "learning_rate": 1.6605490265185448e-05, "loss": 0.0297, "step": 13218 }, { "epoch": 6.145978614597862, "grad_norm": 0.8502505421638489, "learning_rate": 1.6531551993867642e-05, "loss": 0.0364, "step": 13220 }, { "epoch": 6.1469084146908415, "grad_norm": 1.880949854850769, "learning_rate": 1.645769730271289e-05, "loss": 0.0448, "step": 13222 }, { "epoch": 6.147838214783821, "grad_norm": 1.2050482034683228, "learning_rate": 1.6383926920637093e-05, "loss": 0.0373, "step": 13224 }, { "epoch": 6.148768014876802, "grad_norm": 0.5342617034912109, "learning_rate": 1.6310241575724053e-05, "loss": 0.0344, "step": 13226 }, { "epoch": 6.149697814969781, "grad_norm": 1.5793380737304688, "learning_rate": 1.6236641995218554e-05, "loss": 0.0377, "step": 13228 }, { "epoch": 6.150627615062762, "grad_norm": 0.5217275619506836, "learning_rate": 1.6163128905518483e-05, "loss": 0.0199, "step": 13230 }, { "epoch": 6.151557415155741, "grad_norm": 0.9880577921867371, "learning_rate": 1.6089703032168686e-05, "loss": 0.049, "step": 13232 }, { "epoch": 6.152487215248722, "grad_norm": 1.5204843282699585, "learning_rate": 1.601636509985265e-05, "loss": 0.0497, "step": 13234 }, { "epoch": 6.153417015341701, "grad_norm": 1.428038477897644, "learning_rate": 1.5943115832386322e-05, "loss": 0.0504, "step": 13236 }, { "epoch": 6.154346815434682, "grad_norm": 0.5703763365745544, "learning_rate": 1.586995595271032e-05, "loss": 0.0271, "step": 13238 }, { "epoch": 6.1552766155276615, "grad_norm": 0.6559624075889587, "learning_rate": 1.579688618288303e-05, "loss": 0.0396, "step": 13240 }, { "epoch": 6.156206415620642, "grad_norm": 0.7976772785186768, "learning_rate": 1.5723907244073746e-05, "loss": 0.0378, "step": 13242 }, { "epoch": 6.157136215713622, "grad_norm": 2.0976483821868896, "learning_rate": 1.565101985655489e-05, "loss": 0.0306, "step": 13244 }, { "epoch": 6.158066015806601, "grad_norm": 0.8390588760375977, "learning_rate": 1.5578224739695884e-05, "loss": 0.031, "step": 13246 }, { "epoch": 6.158995815899582, "grad_norm": 0.9553635716438293, "learning_rate": 1.550552261195497e-05, "loss": 0.0302, "step": 13248 }, { "epoch": 6.159925615992561, "grad_norm": 1.8296383619308472, "learning_rate": 1.5432914190872716e-05, "loss": 0.0448, "step": 13250 }, { "epoch": 6.160855416085542, "grad_norm": 1.3372141122817993, "learning_rate": 1.536040019306509e-05, "loss": 0.0397, "step": 13252 }, { "epoch": 6.161785216178521, "grad_norm": 0.39880049228668213, "learning_rate": 1.5287981334215814e-05, "loss": 0.0221, "step": 13254 }, { "epoch": 6.162715016271502, "grad_norm": 0.9775078296661377, "learning_rate": 1.521565832906995e-05, "loss": 0.041, "step": 13256 }, { "epoch": 6.1636448163644815, "grad_norm": 0.7592629790306091, "learning_rate": 1.5143431891426198e-05, "loss": 0.0477, "step": 13258 }, { "epoch": 6.164574616457462, "grad_norm": 0.9615457057952881, "learning_rate": 1.5071302734130423e-05, "loss": 0.0215, "step": 13260 }, { "epoch": 6.165504416550442, "grad_norm": 0.7851611971855164, "learning_rate": 1.4999271569068363e-05, "loss": 0.0324, "step": 13262 }, { "epoch": 6.166434216643422, "grad_norm": 1.588639497756958, "learning_rate": 1.4927339107158377e-05, "loss": 0.0358, "step": 13264 }, { "epoch": 6.167364016736402, "grad_norm": 0.6235285401344299, "learning_rate": 1.485550605834499e-05, "loss": 0.0232, "step": 13266 }, { "epoch": 6.168293816829381, "grad_norm": 0.5778889656066895, "learning_rate": 1.4783773131591308e-05, "loss": 0.0315, "step": 13268 }, { "epoch": 6.169223616922362, "grad_norm": 1.010976791381836, "learning_rate": 1.4712141034872199e-05, "loss": 0.032, "step": 13270 }, { "epoch": 6.170153417015341, "grad_norm": 1.2210938930511475, "learning_rate": 1.4640610475167859e-05, "loss": 0.0301, "step": 13272 }, { "epoch": 6.171083217108322, "grad_norm": 0.9862406849861145, "learning_rate": 1.4569182158455794e-05, "loss": 0.0273, "step": 13274 }, { "epoch": 6.1720130172013015, "grad_norm": 1.1122740507125854, "learning_rate": 1.4497856789704814e-05, "loss": 0.0415, "step": 13276 }, { "epoch": 6.172942817294282, "grad_norm": 0.7897031307220459, "learning_rate": 1.4426635072867354e-05, "loss": 0.0274, "step": 13278 }, { "epoch": 6.173872617387262, "grad_norm": 1.0135903358459473, "learning_rate": 1.4355517710873163e-05, "loss": 0.0333, "step": 13280 }, { "epoch": 6.174802417480242, "grad_norm": 1.7388111352920532, "learning_rate": 1.428450540562182e-05, "loss": 0.0521, "step": 13282 }, { "epoch": 6.175732217573222, "grad_norm": 1.684861660003662, "learning_rate": 1.4213598857975939e-05, "loss": 0.047, "step": 13284 }, { "epoch": 6.176662017666202, "grad_norm": 1.707406759262085, "learning_rate": 1.4142798767754843e-05, "loss": 0.0586, "step": 13286 }, { "epoch": 6.177591817759182, "grad_norm": 1.5871778726577759, "learning_rate": 1.40721058337266e-05, "loss": 0.0281, "step": 13288 }, { "epoch": 6.178521617852161, "grad_norm": 1.1312438249588013, "learning_rate": 1.4001520753602087e-05, "loss": 0.0318, "step": 13290 }, { "epoch": 6.179451417945142, "grad_norm": 1.3134727478027344, "learning_rate": 1.3931044224027473e-05, "loss": 0.0462, "step": 13292 }, { "epoch": 6.1803812180381215, "grad_norm": 1.2513169050216675, "learning_rate": 1.3860676940577569e-05, "loss": 0.0303, "step": 13294 }, { "epoch": 6.181311018131102, "grad_norm": 2.0439748764038086, "learning_rate": 1.3790419597749212e-05, "loss": 0.0402, "step": 13296 }, { "epoch": 6.182240818224082, "grad_norm": 0.4215911626815796, "learning_rate": 1.3720272888953777e-05, "loss": 0.0183, "step": 13298 }, { "epoch": 6.183170618317062, "grad_norm": 1.4366532564163208, "learning_rate": 1.3650237506511279e-05, "loss": 0.0463, "step": 13300 }, { "epoch": 6.184100418410042, "grad_norm": 0.6666385531425476, "learning_rate": 1.3580314141642506e-05, "loss": 0.0436, "step": 13302 }, { "epoch": 6.185030218503022, "grad_norm": 0.9525788426399231, "learning_rate": 1.3510503484462761e-05, "loss": 0.0344, "step": 13304 }, { "epoch": 6.185960018596002, "grad_norm": 1.3069016933441162, "learning_rate": 1.3440806223975206e-05, "loss": 0.0543, "step": 13306 }, { "epoch": 6.186889818688982, "grad_norm": 0.9929739832878113, "learning_rate": 1.3371223048063509e-05, "loss": 0.0317, "step": 13308 }, { "epoch": 6.187819618781962, "grad_norm": 1.0519531965255737, "learning_rate": 1.3301754643485674e-05, "loss": 0.054, "step": 13310 }, { "epoch": 6.1887494188749415, "grad_norm": 1.050193190574646, "learning_rate": 1.323240169586658e-05, "loss": 0.0602, "step": 13312 }, { "epoch": 6.189679218967922, "grad_norm": 1.521438717842102, "learning_rate": 1.3163164889692138e-05, "loss": 0.0609, "step": 13314 }, { "epoch": 6.1906090190609016, "grad_norm": 0.9767948985099792, "learning_rate": 1.3094044908301507e-05, "loss": 0.0289, "step": 13316 }, { "epoch": 6.191538819153882, "grad_norm": 0.830549955368042, "learning_rate": 1.3025042433880926e-05, "loss": 0.0271, "step": 13318 }, { "epoch": 6.192468619246862, "grad_norm": 0.8380123972892761, "learning_rate": 1.2956158147457111e-05, "loss": 0.0416, "step": 13320 }, { "epoch": 6.193398419339842, "grad_norm": 0.8434740304946899, "learning_rate": 1.288739272889001e-05, "loss": 0.0297, "step": 13322 }, { "epoch": 6.194328219432822, "grad_norm": 1.7072944641113281, "learning_rate": 1.2818746856866695e-05, "loss": 0.0462, "step": 13324 }, { "epoch": 6.195258019525802, "grad_norm": 1.3900879621505737, "learning_rate": 1.2750221208894054e-05, "loss": 0.0375, "step": 13326 }, { "epoch": 6.196187819618782, "grad_norm": 0.7770211696624756, "learning_rate": 1.2681816461292645e-05, "loss": 0.0265, "step": 13328 }, { "epoch": 6.197117619711762, "grad_norm": 0.5494943261146545, "learning_rate": 1.261353328918979e-05, "loss": 0.0303, "step": 13330 }, { "epoch": 6.198047419804742, "grad_norm": 0.8953770399093628, "learning_rate": 1.2545372366512676e-05, "loss": 0.0271, "step": 13332 }, { "epoch": 6.198977219897722, "grad_norm": 0.9419543743133545, "learning_rate": 1.2477334365982243e-05, "loss": 0.0468, "step": 13334 }, { "epoch": 6.199907019990702, "grad_norm": 1.251715064048767, "learning_rate": 1.2409419959106011e-05, "loss": 0.0408, "step": 13336 }, { "epoch": 6.200836820083682, "grad_norm": 0.7710418105125427, "learning_rate": 1.2341629816171686e-05, "loss": 0.0384, "step": 13338 }, { "epoch": 6.201766620176662, "grad_norm": 0.7881470918655396, "learning_rate": 1.2273964606240682e-05, "loss": 0.0225, "step": 13340 }, { "epoch": 6.202696420269642, "grad_norm": 0.9503797292709351, "learning_rate": 1.2206424997141308e-05, "loss": 0.039, "step": 13342 }, { "epoch": 6.203626220362622, "grad_norm": 1.1250296831130981, "learning_rate": 1.2139011655462313e-05, "loss": 0.0325, "step": 13344 }, { "epoch": 6.204556020455602, "grad_norm": 0.8299534320831299, "learning_rate": 1.2071725246546087e-05, "loss": 0.0213, "step": 13346 }, { "epoch": 6.205485820548582, "grad_norm": 0.7586805820465088, "learning_rate": 1.2004566434482244e-05, "loss": 0.0377, "step": 13348 }, { "epoch": 6.206415620641562, "grad_norm": 1.0376719236373901, "learning_rate": 1.1937535882101303e-05, "loss": 0.0361, "step": 13350 }, { "epoch": 6.207345420734542, "grad_norm": 0.8414318561553955, "learning_rate": 1.1870634250967523e-05, "loss": 0.037, "step": 13352 }, { "epoch": 6.208275220827522, "grad_norm": 1.8440879583358765, "learning_rate": 1.1803862201373298e-05, "loss": 0.0559, "step": 13354 }, { "epoch": 6.209205020920502, "grad_norm": 1.101953387260437, "learning_rate": 1.1737220392331571e-05, "loss": 0.029, "step": 13356 }, { "epoch": 6.210134821013482, "grad_norm": 0.77997887134552, "learning_rate": 1.167070948157025e-05, "loss": 0.0191, "step": 13358 }, { "epoch": 6.211064621106462, "grad_norm": 1.0339266061782837, "learning_rate": 1.1604330125525091e-05, "loss": 0.0269, "step": 13360 }, { "epoch": 6.211994421199442, "grad_norm": 1.0214519500732422, "learning_rate": 1.1538082979333467e-05, "loss": 0.0383, "step": 13362 }, { "epoch": 6.212924221292422, "grad_norm": 2.210263967514038, "learning_rate": 1.1471968696828117e-05, "loss": 0.0673, "step": 13364 }, { "epoch": 6.213854021385402, "grad_norm": 0.7155078649520874, "learning_rate": 1.1405987930530094e-05, "loss": 0.0264, "step": 13366 }, { "epoch": 6.214783821478382, "grad_norm": 1.0634799003601074, "learning_rate": 1.1340141331643222e-05, "loss": 0.0477, "step": 13368 }, { "epoch": 6.215713621571362, "grad_norm": 0.9202408790588379, "learning_rate": 1.1274429550046697e-05, "loss": 0.0388, "step": 13370 }, { "epoch": 6.216643421664342, "grad_norm": 1.501988172531128, "learning_rate": 1.1208853234289202e-05, "loss": 0.0502, "step": 13372 }, { "epoch": 6.2175732217573225, "grad_norm": 1.174012541770935, "learning_rate": 1.1143413031582646e-05, "loss": 0.0309, "step": 13374 }, { "epoch": 6.218503021850302, "grad_norm": 0.6101492643356323, "learning_rate": 1.1078109587795266e-05, "loss": 0.0255, "step": 13376 }, { "epoch": 6.219432821943283, "grad_norm": 0.7045748829841614, "learning_rate": 1.1012943547445834e-05, "loss": 0.0361, "step": 13378 }, { "epoch": 6.220362622036262, "grad_norm": 0.7228851318359375, "learning_rate": 1.094791555369671e-05, "loss": 0.0186, "step": 13380 }, { "epoch": 6.221292422129242, "grad_norm": 1.247037410736084, "learning_rate": 1.0883026248348016e-05, "loss": 0.0443, "step": 13382 }, { "epoch": 6.222222222222222, "grad_norm": 0.9098255634307861, "learning_rate": 1.081827627183107e-05, "loss": 0.0341, "step": 13384 }, { "epoch": 6.223152022315202, "grad_norm": 1.0657179355621338, "learning_rate": 1.0753666263201853e-05, "loss": 0.0309, "step": 13386 }, { "epoch": 6.224081822408182, "grad_norm": 1.0619819164276123, "learning_rate": 1.0689196860135217e-05, "loss": 0.0212, "step": 13388 }, { "epoch": 6.225011622501162, "grad_norm": 0.681563138961792, "learning_rate": 1.0624868698918064e-05, "loss": 0.0399, "step": 13390 }, { "epoch": 6.2259414225941425, "grad_norm": 1.618045687675476, "learning_rate": 1.0560682414443307e-05, "loss": 0.0395, "step": 13392 }, { "epoch": 6.226871222687122, "grad_norm": 1.0310640335083008, "learning_rate": 1.0496638640203728e-05, "loss": 0.0543, "step": 13394 }, { "epoch": 6.227801022780103, "grad_norm": 1.5248435735702515, "learning_rate": 1.043273800828553e-05, "loss": 0.0339, "step": 13396 }, { "epoch": 6.228730822873082, "grad_norm": 0.4203420579433441, "learning_rate": 1.036898114936222e-05, "loss": 0.0313, "step": 13398 }, { "epoch": 6.229660622966063, "grad_norm": 1.2640302181243896, "learning_rate": 1.0305368692688112e-05, "loss": 0.0429, "step": 13400 }, { "epoch": 6.230590423059042, "grad_norm": 1.0135207176208496, "learning_rate": 1.0241901266092617e-05, "loss": 0.0415, "step": 13402 }, { "epoch": 6.231520223152022, "grad_norm": 0.7921913862228394, "learning_rate": 1.017857949597354e-05, "loss": 0.0248, "step": 13404 }, { "epoch": 6.232450023245002, "grad_norm": 0.7150092720985413, "learning_rate": 1.0115404007291115e-05, "loss": 0.0228, "step": 13406 }, { "epoch": 6.233379823337982, "grad_norm": 1.3255912065505981, "learning_rate": 1.0052375423561983e-05, "loss": 0.0379, "step": 13408 }, { "epoch": 6.2343096234309625, "grad_norm": 0.5995495915412903, "learning_rate": 9.989494366852824e-06, "loss": 0.0239, "step": 13410 }, { "epoch": 6.235239423523942, "grad_norm": 1.7396769523620605, "learning_rate": 9.926761457774348e-06, "loss": 0.0433, "step": 13412 }, { "epoch": 6.236169223616923, "grad_norm": 0.916858434677124, "learning_rate": 9.864177315474973e-06, "loss": 0.0308, "step": 13414 }, { "epoch": 6.237099023709902, "grad_norm": 1.7309414148330688, "learning_rate": 9.801742557634842e-06, "loss": 0.0419, "step": 13416 }, { "epoch": 6.238028823802883, "grad_norm": 1.3263626098632812, "learning_rate": 9.739457800459953e-06, "loss": 0.0649, "step": 13418 }, { "epoch": 6.238958623895862, "grad_norm": 0.6598104238510132, "learning_rate": 9.677323658675572e-06, "loss": 0.0244, "step": 13420 }, { "epoch": 6.239888423988843, "grad_norm": 0.42357632517814636, "learning_rate": 9.615340745520665e-06, "loss": 0.0421, "step": 13422 }, { "epoch": 6.240818224081822, "grad_norm": 0.6544501185417175, "learning_rate": 9.553509672741635e-06, "loss": 0.0202, "step": 13424 }, { "epoch": 6.241748024174802, "grad_norm": 0.8230735659599304, "learning_rate": 9.491831050586062e-06, "loss": 0.0272, "step": 13426 }, { "epoch": 6.2426778242677825, "grad_norm": 1.3265180587768555, "learning_rate": 9.430305487797186e-06, "loss": 0.0342, "step": 13428 }, { "epoch": 6.243607624360762, "grad_norm": 1.0199562311172485, "learning_rate": 9.36893359160734e-06, "loss": 0.0365, "step": 13430 }, { "epoch": 6.244537424453743, "grad_norm": 0.4201779067516327, "learning_rate": 9.307715967732494e-06, "loss": 0.0471, "step": 13432 }, { "epoch": 6.245467224546722, "grad_norm": 2.214818000793457, "learning_rate": 9.246653220365817e-06, "loss": 0.062, "step": 13434 }, { "epoch": 6.246397024639703, "grad_norm": 0.3848736882209778, "learning_rate": 9.185745952171836e-06, "loss": 0.0231, "step": 13436 }, { "epoch": 6.247326824732682, "grad_norm": 0.9985288381576538, "learning_rate": 9.124994764280968e-06, "loss": 0.0365, "step": 13438 }, { "epoch": 6.248256624825663, "grad_norm": 0.40129944682121277, "learning_rate": 9.064400256282711e-06, "loss": 0.0145, "step": 13440 }, { "epoch": 6.249186424918642, "grad_norm": 1.6195120811462402, "learning_rate": 9.003963026220533e-06, "loss": 0.0374, "step": 13442 }, { "epoch": 6.250116225011623, "grad_norm": 0.6028949022293091, "learning_rate": 8.943683670585353e-06, "loss": 0.0198, "step": 13444 }, { "epoch": 6.2510460251046025, "grad_norm": 0.7946553826332092, "learning_rate": 8.883562784310217e-06, "loss": 0.0291, "step": 13446 }, { "epoch": 6.251975825197582, "grad_norm": 0.8216686248779297, "learning_rate": 8.82360096076393e-06, "loss": 0.0338, "step": 13448 }, { "epoch": 6.252905625290563, "grad_norm": 1.2253133058547974, "learning_rate": 8.763798791745349e-06, "loss": 0.033, "step": 13450 }, { "epoch": 6.253835425383542, "grad_norm": 1.0015703439712524, "learning_rate": 8.704156867478009e-06, "loss": 0.0274, "step": 13452 }, { "epoch": 6.254765225476523, "grad_norm": 0.6878647804260254, "learning_rate": 8.644675776603424e-06, "loss": 0.0227, "step": 13454 }, { "epoch": 6.255695025569502, "grad_norm": 0.8336981534957886, "learning_rate": 8.585356106176075e-06, "loss": 0.0326, "step": 13456 }, { "epoch": 6.256624825662483, "grad_norm": 0.6330758929252625, "learning_rate": 8.526198441657101e-06, "loss": 0.0487, "step": 13458 }, { "epoch": 6.257554625755462, "grad_norm": 0.8177725672721863, "learning_rate": 8.4672033669087e-06, "loss": 0.0269, "step": 13460 }, { "epoch": 6.258484425848443, "grad_norm": 0.5348494052886963, "learning_rate": 8.408371464188503e-06, "loss": 0.0188, "step": 13462 }, { "epoch": 6.2594142259414225, "grad_norm": 1.0755860805511475, "learning_rate": 8.349703314143646e-06, "loss": 0.0277, "step": 13464 }, { "epoch": 6.260344026034403, "grad_norm": 0.8268308639526367, "learning_rate": 8.291199495805147e-06, "loss": 0.0194, "step": 13466 }, { "epoch": 6.261273826127383, "grad_norm": 0.5511892437934875, "learning_rate": 8.232860586582008e-06, "loss": 0.0386, "step": 13468 }, { "epoch": 6.262203626220362, "grad_norm": 1.1710851192474365, "learning_rate": 8.174687162255645e-06, "loss": 0.0442, "step": 13470 }, { "epoch": 6.263133426313343, "grad_norm": 0.37915313243865967, "learning_rate": 8.116679796974405e-06, "loss": 0.0298, "step": 13472 }, { "epoch": 6.264063226406322, "grad_norm": 0.9557525515556335, "learning_rate": 8.058839063247432e-06, "loss": 0.0201, "step": 13474 }, { "epoch": 6.264993026499303, "grad_norm": 0.8166242837905884, "learning_rate": 8.001165531939477e-06, "loss": 0.0332, "step": 13476 }, { "epoch": 6.265922826592282, "grad_norm": 1.2281765937805176, "learning_rate": 7.943659772265082e-06, "loss": 0.0256, "step": 13478 }, { "epoch": 6.266852626685263, "grad_norm": 1.0658708810806274, "learning_rate": 7.886322351782749e-06, "loss": 0.039, "step": 13480 }, { "epoch": 6.2677824267782425, "grad_norm": 1.0229144096374512, "learning_rate": 7.829153836389796e-06, "loss": 0.0379, "step": 13482 }, { "epoch": 6.268712226871223, "grad_norm": 0.500018298625946, "learning_rate": 7.77215479031626e-06, "loss": 0.0355, "step": 13484 }, { "epoch": 6.269642026964203, "grad_norm": 0.717290461063385, "learning_rate": 7.715325776119848e-06, "loss": 0.0344, "step": 13486 }, { "epoch": 6.270571827057183, "grad_norm": 1.1821860074996948, "learning_rate": 7.658667354679856e-06, "loss": 0.0227, "step": 13488 }, { "epoch": 6.271501627150163, "grad_norm": 1.221403956413269, "learning_rate": 7.602180085192095e-06, "loss": 0.0595, "step": 13490 }, { "epoch": 6.272431427243143, "grad_norm": 1.8797287940979004, "learning_rate": 7.545864525163171e-06, "loss": 0.0487, "step": 13492 }, { "epoch": 6.273361227336123, "grad_norm": 1.0577261447906494, "learning_rate": 7.489721230404804e-06, "loss": 0.0377, "step": 13494 }, { "epoch": 6.274291027429102, "grad_norm": 0.9412918090820312, "learning_rate": 7.433750755028765e-06, "loss": 0.029, "step": 13496 }, { "epoch": 6.275220827522083, "grad_norm": 0.7520031929016113, "learning_rate": 7.377953651440934e-06, "loss": 0.0274, "step": 13498 }, { "epoch": 6.2761506276150625, "grad_norm": 0.4046686887741089, "learning_rate": 7.322330470336315e-06, "loss": 0.0216, "step": 13500 }, { "epoch": 6.277080427708043, "grad_norm": 1.0868570804595947, "learning_rate": 7.266881760693191e-06, "loss": 0.0436, "step": 13502 }, { "epoch": 6.278010227801023, "grad_norm": 1.353055715560913, "learning_rate": 7.211608069767816e-06, "loss": 0.0521, "step": 13504 }, { "epoch": 6.278940027894003, "grad_norm": 1.2530739307403564, "learning_rate": 7.156509943089451e-06, "loss": 0.0368, "step": 13506 }, { "epoch": 6.279869827986983, "grad_norm": 1.0306291580200195, "learning_rate": 7.101587924454178e-06, "loss": 0.025, "step": 13508 }, { "epoch": 6.280799628079963, "grad_norm": 1.3023428916931152, "learning_rate": 7.046842555920271e-06, "loss": 0.0467, "step": 13510 }, { "epoch": 6.281729428172943, "grad_norm": 0.614233136177063, "learning_rate": 6.992274377802352e-06, "loss": 0.0329, "step": 13512 }, { "epoch": 6.282659228265922, "grad_norm": 0.7404088973999023, "learning_rate": 6.937883928666253e-06, "loss": 0.0192, "step": 13514 }, { "epoch": 6.283589028358903, "grad_norm": 1.0007575750350952, "learning_rate": 6.8836717453238674e-06, "loss": 0.0363, "step": 13516 }, { "epoch": 6.2845188284518825, "grad_norm": 0.5695383548736572, "learning_rate": 6.829638362827374e-06, "loss": 0.0239, "step": 13518 }, { "epoch": 6.285448628544863, "grad_norm": 0.6357758641242981, "learning_rate": 6.77578431446469e-06, "loss": 0.0192, "step": 13520 }, { "epoch": 6.286378428637843, "grad_norm": 1.0388195514678955, "learning_rate": 6.72211013175341e-06, "loss": 0.0459, "step": 13522 }, { "epoch": 6.287308228730823, "grad_norm": 1.6168456077575684, "learning_rate": 6.668616344435984e-06, "loss": 0.056, "step": 13524 }, { "epoch": 6.288238028823803, "grad_norm": 0.8308179974555969, "learning_rate": 6.615303480474624e-06, "loss": 0.0481, "step": 13526 }, { "epoch": 6.289167828916783, "grad_norm": 1.3948886394500732, "learning_rate": 6.562172066045646e-06, "loss": 0.025, "step": 13528 }, { "epoch": 6.290097629009763, "grad_norm": 0.6175724864006042, "learning_rate": 6.509222625534783e-06, "loss": 0.0428, "step": 13530 }, { "epoch": 6.291027429102743, "grad_norm": 0.48465731739997864, "learning_rate": 6.456455681531459e-06, "loss": 0.0375, "step": 13532 }, { "epoch": 6.291957229195723, "grad_norm": 1.127643346786499, "learning_rate": 6.403871754824349e-06, "loss": 0.0468, "step": 13534 }, { "epoch": 6.292887029288703, "grad_norm": 0.4838900566101074, "learning_rate": 6.351471364395455e-06, "loss": 0.0167, "step": 13536 }, { "epoch": 6.293816829381683, "grad_norm": 0.6661045551300049, "learning_rate": 6.29925502741542e-06, "loss": 0.0317, "step": 13538 }, { "epoch": 6.294746629474663, "grad_norm": 0.5044808387756348, "learning_rate": 6.247223259238526e-06, "loss": 0.0239, "step": 13540 }, { "epoch": 6.295676429567643, "grad_norm": 1.2606794834136963, "learning_rate": 6.195376573397204e-06, "loss": 0.0312, "step": 13542 }, { "epoch": 6.296606229660623, "grad_norm": 0.9668813943862915, "learning_rate": 6.143715481597425e-06, "loss": 0.0214, "step": 13544 }, { "epoch": 6.297536029753603, "grad_norm": 1.1644083261489868, "learning_rate": 6.092240493713197e-06, "loss": 0.0404, "step": 13546 }, { "epoch": 6.298465829846583, "grad_norm": 0.8874009847640991, "learning_rate": 6.0409521177819225e-06, "loss": 0.0324, "step": 13548 }, { "epoch": 6.299395629939563, "grad_norm": 0.744510293006897, "learning_rate": 5.989850859999227e-06, "loss": 0.0396, "step": 13550 }, { "epoch": 6.300325430032543, "grad_norm": 0.8810352683067322, "learning_rate": 5.938937224713778e-06, "loss": 0.0314, "step": 13552 }, { "epoch": 6.301255230125523, "grad_norm": 0.7791478633880615, "learning_rate": 5.888211714422717e-06, "loss": 0.0419, "step": 13554 }, { "epoch": 6.302185030218503, "grad_norm": 1.2819222211837769, "learning_rate": 5.837674829766293e-06, "loss": 0.0349, "step": 13556 }, { "epoch": 6.303114830311483, "grad_norm": 1.3568785190582275, "learning_rate": 5.7873270695230415e-06, "loss": 0.0371, "step": 13558 }, { "epoch": 6.304044630404463, "grad_norm": 0.7367105484008789, "learning_rate": 5.737168930605258e-06, "loss": 0.0272, "step": 13560 }, { "epoch": 6.304974430497443, "grad_norm": 1.4196946620941162, "learning_rate": 5.687200908053354e-06, "loss": 0.0395, "step": 13562 }, { "epoch": 6.305904230590423, "grad_norm": 0.9414608478546143, "learning_rate": 5.63742349503165e-06, "loss": 0.0316, "step": 13564 }, { "epoch": 6.306834030683403, "grad_norm": 0.6594295501708984, "learning_rate": 5.587837182823062e-06, "loss": 0.0227, "step": 13566 }, { "epoch": 6.307763830776383, "grad_norm": 1.3742738962173462, "learning_rate": 5.538442460824414e-06, "loss": 0.0458, "step": 13568 }, { "epoch": 6.308693630869363, "grad_norm": 1.4896308183670044, "learning_rate": 5.489239816541789e-06, "loss": 0.0375, "step": 13570 }, { "epoch": 6.309623430962343, "grad_norm": 1.2964552640914917, "learning_rate": 5.440229735585248e-06, "loss": 0.0537, "step": 13572 }, { "epoch": 6.310553231055323, "grad_norm": 1.502856969833374, "learning_rate": 5.391412701664731e-06, "loss": 0.05, "step": 13574 }, { "epoch": 6.3114830311483034, "grad_norm": 1.4290894269943237, "learning_rate": 5.342789196584487e-06, "loss": 0.0365, "step": 13576 }, { "epoch": 6.312412831241283, "grad_norm": 0.8304157853126526, "learning_rate": 5.29435970023899e-06, "loss": 0.0196, "step": 13578 }, { "epoch": 6.3133426313342635, "grad_norm": 0.8816526532173157, "learning_rate": 5.246124690607762e-06, "loss": 0.0288, "step": 13580 }, { "epoch": 6.314272431427243, "grad_norm": 1.967510461807251, "learning_rate": 5.19808464375082e-06, "loss": 0.0488, "step": 13582 }, { "epoch": 6.315202231520223, "grad_norm": 0.9509020447731018, "learning_rate": 5.150240033804144e-06, "loss": 0.0426, "step": 13584 }, { "epoch": 6.316132031613203, "grad_norm": 0.6455478668212891, "learning_rate": 5.1025913329745525e-06, "loss": 0.0288, "step": 13586 }, { "epoch": 6.317061831706183, "grad_norm": 0.7407081127166748, "learning_rate": 5.055139011535704e-06, "loss": 0.0316, "step": 13588 }, { "epoch": 6.317991631799163, "grad_norm": 0.9946966767311096, "learning_rate": 5.007883537822745e-06, "loss": 0.0291, "step": 13590 }, { "epoch": 6.318921431892143, "grad_norm": 0.3423328101634979, "learning_rate": 4.960825378228071e-06, "loss": 0.0096, "step": 13592 }, { "epoch": 6.319851231985123, "grad_norm": 0.8075205087661743, "learning_rate": 4.913964997196824e-06, "loss": 0.0423, "step": 13594 }, { "epoch": 6.320781032078103, "grad_norm": 0.36435893177986145, "learning_rate": 4.867302857221943e-06, "loss": 0.0222, "step": 13596 }, { "epoch": 6.3217108321710835, "grad_norm": 0.6722139120101929, "learning_rate": 4.820839418840013e-06, "loss": 0.0336, "step": 13598 }, { "epoch": 6.322640632264063, "grad_norm": 0.5937634110450745, "learning_rate": 4.774575140626312e-06, "loss": 0.0198, "step": 13600 }, { "epoch": 6.323570432357044, "grad_norm": 0.7864893674850464, "learning_rate": 4.7285104791906384e-06, "loss": 0.0373, "step": 13602 }, { "epoch": 6.324500232450023, "grad_norm": 1.0629911422729492, "learning_rate": 4.682645889172653e-06, "loss": 0.0192, "step": 13604 }, { "epoch": 6.325430032543003, "grad_norm": 1.7573086023330688, "learning_rate": 4.636981823237245e-06, "loss": 0.0466, "step": 13606 }, { "epoch": 6.326359832635983, "grad_norm": 1.2210406064987183, "learning_rate": 4.591518732070409e-06, "loss": 0.0383, "step": 13608 }, { "epoch": 6.327289632728963, "grad_norm": 1.0178145170211792, "learning_rate": 4.5462570643744505e-06, "loss": 0.0266, "step": 13610 }, { "epoch": 6.328219432821943, "grad_norm": 0.9002236723899841, "learning_rate": 4.501197266863707e-06, "loss": 0.0241, "step": 13612 }, { "epoch": 6.329149232914923, "grad_norm": 0.9093614220619202, "learning_rate": 4.456339784260236e-06, "loss": 0.0339, "step": 13614 }, { "epoch": 6.3300790330079035, "grad_norm": 0.4095686674118042, "learning_rate": 4.4116850592892835e-06, "loss": 0.0339, "step": 13616 }, { "epoch": 6.331008833100883, "grad_norm": 0.5828489065170288, "learning_rate": 4.3672335326750086e-06, "loss": 0.0407, "step": 13618 }, { "epoch": 6.331938633193864, "grad_norm": 0.8390126824378967, "learning_rate": 4.32298564313593e-06, "loss": 0.0234, "step": 13620 }, { "epoch": 6.332868433286843, "grad_norm": 0.8614372611045837, "learning_rate": 4.2789418273809565e-06, "loss": 0.0229, "step": 13622 }, { "epoch": 6.333798233379824, "grad_norm": 0.9208846688270569, "learning_rate": 4.235102520104715e-06, "loss": 0.0275, "step": 13624 }, { "epoch": 6.334728033472803, "grad_norm": 0.6749438643455505, "learning_rate": 4.191468153983423e-06, "loss": 0.0193, "step": 13626 }, { "epoch": 6.335657833565783, "grad_norm": 0.5770670771598816, "learning_rate": 4.148039159670709e-06, "loss": 0.0238, "step": 13628 }, { "epoch": 6.336587633658763, "grad_norm": 0.882300078868866, "learning_rate": 4.1048159657932085e-06, "loss": 0.0332, "step": 13630 }, { "epoch": 6.337517433751743, "grad_norm": 0.40317627787590027, "learning_rate": 4.061798998946449e-06, "loss": 0.0142, "step": 13632 }, { "epoch": 6.3384472338447235, "grad_norm": 1.5590147972106934, "learning_rate": 4.018988683690476e-06, "loss": 0.0495, "step": 13634 }, { "epoch": 6.339377033937703, "grad_norm": 0.6321404576301575, "learning_rate": 3.976385442545772e-06, "loss": 0.0201, "step": 13636 }, { "epoch": 6.340306834030684, "grad_norm": 1.1838172674179077, "learning_rate": 3.933989695989219e-06, "loss": 0.025, "step": 13638 }, { "epoch": 6.341236634123663, "grad_norm": 1.3278676271438599, "learning_rate": 3.8918018624495805e-06, "loss": 0.026, "step": 13640 }, { "epoch": 6.342166434216644, "grad_norm": 0.3413008749485016, "learning_rate": 3.849822358303927e-06, "loss": 0.0134, "step": 13642 }, { "epoch": 6.343096234309623, "grad_norm": 2.2080891132354736, "learning_rate": 3.8080515978729294e-06, "loss": 0.0526, "step": 13644 }, { "epoch": 6.344026034402604, "grad_norm": 0.5691603422164917, "learning_rate": 3.7664899934170734e-06, "loss": 0.0323, "step": 13646 }, { "epoch": 6.344955834495583, "grad_norm": 0.7961113452911377, "learning_rate": 3.7251379551327168e-06, "loss": 0.0342, "step": 13648 }, { "epoch": 6.345885634588564, "grad_norm": 1.4462610483169556, "learning_rate": 3.6839958911476864e-06, "loss": 0.0542, "step": 13650 }, { "epoch": 6.3468154346815435, "grad_norm": 1.297450065612793, "learning_rate": 3.6430642075176402e-06, "loss": 0.0381, "step": 13652 }, { "epoch": 6.347745234774523, "grad_norm": 0.8543696999549866, "learning_rate": 3.602343308221672e-06, "loss": 0.0223, "step": 13654 }, { "epoch": 6.348675034867504, "grad_norm": 0.9282964468002319, "learning_rate": 3.561833595158672e-06, "loss": 0.0376, "step": 13656 }, { "epoch": 6.349604834960483, "grad_norm": 0.6649394631385803, "learning_rate": 3.521535468143194e-06, "loss": 0.018, "step": 13658 }, { "epoch": 6.350534635053464, "grad_norm": 0.6833548545837402, "learning_rate": 3.4814493249013917e-06, "loss": 0.0272, "step": 13660 }, { "epoch": 6.351464435146443, "grad_norm": 0.5249447226524353, "learning_rate": 3.441575561067408e-06, "loss": 0.0199, "step": 13662 }, { "epoch": 6.352394235239424, "grad_norm": 1.1828912496566772, "learning_rate": 3.4019145701791065e-06, "loss": 0.0326, "step": 13664 }, { "epoch": 6.353324035332403, "grad_norm": 1.4206677675247192, "learning_rate": 3.362466743674538e-06, "loss": 0.056, "step": 13666 }, { "epoch": 6.354253835425384, "grad_norm": 0.7454712986946106, "learning_rate": 3.32323247088774e-06, "loss": 0.0218, "step": 13668 }, { "epoch": 6.3551836355183635, "grad_norm": 0.8071190118789673, "learning_rate": 3.2842121390451917e-06, "loss": 0.0235, "step": 13670 }, { "epoch": 6.356113435611343, "grad_norm": 0.7200167775154114, "learning_rate": 3.2454061332618573e-06, "loss": 0.0183, "step": 13672 }, { "epoch": 6.357043235704324, "grad_norm": 1.0987350940704346, "learning_rate": 3.206814836537255e-06, "loss": 0.0272, "step": 13674 }, { "epoch": 6.357973035797303, "grad_norm": 1.0637586116790771, "learning_rate": 3.1684386297519983e-06, "loss": 0.0236, "step": 13676 }, { "epoch": 6.358902835890284, "grad_norm": 0.7311254143714905, "learning_rate": 3.1302778916637065e-06, "loss": 0.0266, "step": 13678 }, { "epoch": 6.359832635983263, "grad_norm": 1.2171560525894165, "learning_rate": 3.092332998903418e-06, "loss": 0.0311, "step": 13680 }, { "epoch": 6.360762436076244, "grad_norm": 1.1408036947250366, "learning_rate": 3.0546043259719456e-06, "loss": 0.0203, "step": 13682 }, { "epoch": 6.361692236169223, "grad_norm": 0.973106861114502, "learning_rate": 3.017092245236063e-06, "loss": 0.0246, "step": 13684 }, { "epoch": 6.362622036262204, "grad_norm": 2.3512520790100098, "learning_rate": 2.979797126924901e-06, "loss": 0.0528, "step": 13686 }, { "epoch": 6.3635518363551835, "grad_norm": 0.6317188739776611, "learning_rate": 2.9427193391261835e-06, "loss": 0.0176, "step": 13688 }, { "epoch": 6.364481636448164, "grad_norm": 0.28661394119262695, "learning_rate": 2.9058592477826628e-06, "loss": 0.0223, "step": 13690 }, { "epoch": 6.365411436541144, "grad_norm": 0.9909365177154541, "learning_rate": 2.8692172166886405e-06, "loss": 0.0326, "step": 13692 }, { "epoch": 6.366341236634124, "grad_norm": 0.5207486748695374, "learning_rate": 2.832793607486089e-06, "loss": 0.0232, "step": 13694 }, { "epoch": 6.367271036727104, "grad_norm": 0.7590470910072327, "learning_rate": 2.796588779661371e-06, "loss": 0.0368, "step": 13696 }, { "epoch": 6.368200836820083, "grad_norm": 1.0002061128616333, "learning_rate": 2.7606030905415576e-06, "loss": 0.0203, "step": 13698 }, { "epoch": 6.369130636913064, "grad_norm": 1.3983689546585083, "learning_rate": 2.7248368952907913e-06, "loss": 0.0334, "step": 13700 }, { "epoch": 6.370060437006043, "grad_norm": 0.8046295642852783, "learning_rate": 2.689290546907064e-06, "loss": 0.0364, "step": 13702 }, { "epoch": 6.370990237099024, "grad_norm": 0.8172715306282043, "learning_rate": 2.6539643962183976e-06, "loss": 0.0309, "step": 13704 }, { "epoch": 6.3719200371920035, "grad_norm": 0.5469913482666016, "learning_rate": 2.6188587918797226e-06, "loss": 0.0145, "step": 13706 }, { "epoch": 6.372849837284984, "grad_norm": 1.378852367401123, "learning_rate": 2.5839740803691007e-06, "loss": 0.0357, "step": 13708 }, { "epoch": 6.373779637377964, "grad_norm": 0.7575341463088989, "learning_rate": 2.54931060598459e-06, "loss": 0.038, "step": 13710 }, { "epoch": 6.374709437470944, "grad_norm": 0.8203573226928711, "learning_rate": 2.5148687108407264e-06, "loss": 0.0249, "step": 13712 }, { "epoch": 6.375639237563924, "grad_norm": 0.4016629755496979, "learning_rate": 2.480648734865033e-06, "loss": 0.0154, "step": 13714 }, { "epoch": 6.376569037656903, "grad_norm": 0.9005280137062073, "learning_rate": 2.4466510157949354e-06, "loss": 0.0267, "step": 13716 }, { "epoch": 6.377498837749884, "grad_norm": 0.7175995707511902, "learning_rate": 2.4128758891741194e-06, "loss": 0.0271, "step": 13718 }, { "epoch": 6.378428637842863, "grad_norm": 1.4969955682754517, "learning_rate": 2.3793236883495233e-06, "loss": 0.0324, "step": 13720 }, { "epoch": 6.379358437935844, "grad_norm": 1.0756150484085083, "learning_rate": 2.345994744467785e-06, "loss": 0.0339, "step": 13722 }, { "epoch": 6.3802882380288235, "grad_norm": 0.8868936896324158, "learning_rate": 2.3128893864720546e-06, "loss": 0.0311, "step": 13724 }, { "epoch": 6.381218038121804, "grad_norm": 0.6111631989479065, "learning_rate": 2.280007941098996e-06, "loss": 0.0226, "step": 13726 }, { "epoch": 6.382147838214784, "grad_norm": 1.119671106338501, "learning_rate": 2.247350732875091e-06, "loss": 0.0434, "step": 13728 }, { "epoch": 6.383077638307764, "grad_norm": 0.8073377013206482, "learning_rate": 2.21491808411387e-06, "loss": 0.0242, "step": 13730 }, { "epoch": 6.384007438400744, "grad_norm": 1.0701980590820312, "learning_rate": 2.182710314912453e-06, "loss": 0.027, "step": 13732 }, { "epoch": 6.384937238493724, "grad_norm": 1.4915908575057983, "learning_rate": 2.1507277431484755e-06, "loss": 0.0271, "step": 13734 }, { "epoch": 6.385867038586704, "grad_norm": 0.4101608693599701, "learning_rate": 2.118970684477087e-06, "loss": 0.0268, "step": 13736 }, { "epoch": 6.386796838679684, "grad_norm": 1.074249267578125, "learning_rate": 2.087439452327525e-06, "loss": 0.0266, "step": 13738 }, { "epoch": 6.387726638772664, "grad_norm": 0.663081705570221, "learning_rate": 2.056134357900466e-06, "loss": 0.0218, "step": 13740 }, { "epoch": 6.3886564388656435, "grad_norm": 1.1439173221588135, "learning_rate": 2.0250557101644806e-06, "loss": 0.0565, "step": 13742 }, { "epoch": 6.389586238958624, "grad_norm": 0.7111268043518066, "learning_rate": 1.9942038158532403e-06, "loss": 0.0189, "step": 13744 }, { "epoch": 6.390516039051604, "grad_norm": 1.6730167865753174, "learning_rate": 1.9635789794625573e-06, "loss": 0.0341, "step": 13746 }, { "epoch": 6.391445839144584, "grad_norm": 1.0165928602218628, "learning_rate": 1.9331815032471302e-06, "loss": 0.0324, "step": 13748 }, { "epoch": 6.392375639237564, "grad_norm": 0.5917250514030457, "learning_rate": 1.9030116872178172e-06, "loss": 0.0147, "step": 13750 }, { "epoch": 6.393305439330544, "grad_norm": 0.9204508662223816, "learning_rate": 1.8730698291385206e-06, "loss": 0.0278, "step": 13752 }, { "epoch": 6.394235239423524, "grad_norm": 0.5106828212738037, "learning_rate": 1.8433562245233232e-06, "loss": 0.0211, "step": 13754 }, { "epoch": 6.395165039516504, "grad_norm": 0.5032594203948975, "learning_rate": 1.8138711666334762e-06, "loss": 0.026, "step": 13756 }, { "epoch": 6.396094839609484, "grad_norm": 0.6166914105415344, "learning_rate": 1.7846149464745606e-06, "loss": 0.0377, "step": 13758 }, { "epoch": 6.397024639702464, "grad_norm": 1.0364583730697632, "learning_rate": 1.7555878527937272e-06, "loss": 0.0269, "step": 13760 }, { "epoch": 6.397954439795444, "grad_norm": 0.754253089427948, "learning_rate": 1.726790172076606e-06, "loss": 0.0352, "step": 13762 }, { "epoch": 6.398884239888424, "grad_norm": 1.200046181678772, "learning_rate": 1.6982221885447096e-06, "loss": 0.0339, "step": 13764 }, { "epoch": 6.399814039981404, "grad_norm": 1.0516304969787598, "learning_rate": 1.6698841841525372e-06, "loss": 0.0202, "step": 13766 }, { "epoch": 6.400743840074384, "grad_norm": 0.8456166982650757, "learning_rate": 1.6417764385846859e-06, "loss": 0.0324, "step": 13768 }, { "epoch": 6.401673640167364, "grad_norm": 0.7263309359550476, "learning_rate": 1.6138992292533215e-06, "loss": 0.027, "step": 13770 }, { "epoch": 6.402603440260344, "grad_norm": 0.6187747716903687, "learning_rate": 1.5862528312951824e-06, "loss": 0.0369, "step": 13772 }, { "epoch": 6.403533240353324, "grad_norm": 0.4445933401584625, "learning_rate": 1.5588375175691179e-06, "loss": 0.0157, "step": 13774 }, { "epoch": 6.404463040446304, "grad_norm": 0.57606440782547, "learning_rate": 1.5316535586531742e-06, "loss": 0.0192, "step": 13776 }, { "epoch": 6.405392840539284, "grad_norm": 1.708191990852356, "learning_rate": 1.5047012228419903e-06, "loss": 0.0725, "step": 13778 }, { "epoch": 6.406322640632264, "grad_norm": 1.1540039777755737, "learning_rate": 1.4779807761443622e-06, "loss": 0.022, "step": 13780 }, { "epoch": 6.4072524407252445, "grad_norm": 1.004267692565918, "learning_rate": 1.4514924822802187e-06, "loss": 0.0379, "step": 13782 }, { "epoch": 6.408182240818224, "grad_norm": 0.9401696920394897, "learning_rate": 1.4252366026783912e-06, "loss": 0.0425, "step": 13784 }, { "epoch": 6.409112040911204, "grad_norm": 1.347021222114563, "learning_rate": 1.399213396473774e-06, "loss": 0.0223, "step": 13786 }, { "epoch": 6.410041841004184, "grad_norm": 1.021313190460205, "learning_rate": 1.3734231205048842e-06, "loss": 0.0369, "step": 13788 }, { "epoch": 6.410971641097164, "grad_norm": 1.2433373928070068, "learning_rate": 1.3478660293113887e-06, "loss": 0.0369, "step": 13790 }, { "epoch": 6.411901441190144, "grad_norm": 1.0997052192687988, "learning_rate": 1.3225423751313739e-06, "loss": 0.0206, "step": 13792 }, { "epoch": 6.412831241283124, "grad_norm": 0.6695942878723145, "learning_rate": 1.297452407899196e-06, "loss": 0.0272, "step": 13794 }, { "epoch": 6.413761041376104, "grad_norm": 0.6628016233444214, "learning_rate": 1.272596375242618e-06, "loss": 0.0213, "step": 13796 }, { "epoch": 6.414690841469084, "grad_norm": 1.0186450481414795, "learning_rate": 1.2479745224807074e-06, "loss": 0.0242, "step": 13798 }, { "epoch": 6.4156206415620645, "grad_norm": 0.48942989110946655, "learning_rate": 1.2235870926211754e-06, "loss": 0.0226, "step": 13800 }, { "epoch": 6.416550441655044, "grad_norm": 0.8685123324394226, "learning_rate": 1.199434326358087e-06, "loss": 0.0284, "step": 13802 }, { "epoch": 6.417480241748025, "grad_norm": 1.8132253885269165, "learning_rate": 1.1755164620695478e-06, "loss": 0.0522, "step": 13804 }, { "epoch": 6.418410041841004, "grad_norm": 0.4777073562145233, "learning_rate": 1.1518337358151388e-06, "loss": 0.0127, "step": 13806 }, { "epoch": 6.419339841933985, "grad_norm": 0.6835368275642395, "learning_rate": 1.128386381333921e-06, "loss": 0.0266, "step": 13808 }, { "epoch": 6.420269642026964, "grad_norm": 0.737064778804779, "learning_rate": 1.1051746300417559e-06, "loss": 0.0247, "step": 13810 }, { "epoch": 6.421199442119944, "grad_norm": 1.3063981533050537, "learning_rate": 1.0821987110292344e-06, "loss": 0.0384, "step": 13812 }, { "epoch": 6.422129242212924, "grad_norm": 0.8611590266227722, "learning_rate": 1.0594588510594563e-06, "loss": 0.0287, "step": 13814 }, { "epoch": 6.423059042305904, "grad_norm": 1.0958492755889893, "learning_rate": 1.0369552745656024e-06, "loss": 0.0263, "step": 13816 }, { "epoch": 6.4239888423988845, "grad_norm": 0.6166085004806519, "learning_rate": 1.0146882036489432e-06, "loss": 0.0279, "step": 13818 }, { "epoch": 6.424918642491864, "grad_norm": 0.96672523021698, "learning_rate": 9.926578580764274e-07, "loss": 0.0298, "step": 13820 }, { "epoch": 6.4258484425848446, "grad_norm": 0.6839366555213928, "learning_rate": 9.70864455278693e-07, "loss": 0.0219, "step": 13822 }, { "epoch": 6.426778242677824, "grad_norm": 0.5091580748558044, "learning_rate": 9.493082103478588e-07, "loss": 0.0183, "step": 13824 }, { "epoch": 6.427708042770805, "grad_norm": 0.567439615726471, "learning_rate": 9.279893360353053e-07, "loss": 0.0171, "step": 13826 }, { "epoch": 6.428637842863784, "grad_norm": 0.7729703187942505, "learning_rate": 9.069080427497645e-07, "loss": 0.0316, "step": 13828 }, { "epoch": 6.429567642956764, "grad_norm": 1.1361868381500244, "learning_rate": 8.860645385550692e-07, "loss": 0.0524, "step": 13830 }, { "epoch": 6.430497443049744, "grad_norm": 2.0134408473968506, "learning_rate": 8.654590291681631e-07, "loss": 0.0344, "step": 13832 }, { "epoch": 6.431427243142724, "grad_norm": 0.8463093638420105, "learning_rate": 8.450917179571323e-07, "loss": 0.0326, "step": 13834 }, { "epoch": 6.4323570432357045, "grad_norm": 1.0583077669143677, "learning_rate": 8.249628059391131e-07, "loss": 0.0437, "step": 13836 }, { "epoch": 6.433286843328684, "grad_norm": 1.2730058431625366, "learning_rate": 8.050724917783657e-07, "loss": 0.0392, "step": 13838 }, { "epoch": 6.4342166434216645, "grad_norm": 0.9954528212547302, "learning_rate": 7.854209717842141e-07, "loss": 0.0192, "step": 13840 }, { "epoch": 6.435146443514644, "grad_norm": 0.7017378211021423, "learning_rate": 7.660084399092736e-07, "loss": 0.022, "step": 13842 }, { "epoch": 6.436076243607625, "grad_norm": 0.9019339680671692, "learning_rate": 7.468350877473712e-07, "loss": 0.0391, "step": 13844 }, { "epoch": 6.437006043700604, "grad_norm": 0.5864753127098083, "learning_rate": 7.279011045317109e-07, "loss": 0.0197, "step": 13846 }, { "epoch": 6.437935843793585, "grad_norm": 0.6657459735870361, "learning_rate": 7.092066771331503e-07, "loss": 0.0151, "step": 13848 }, { "epoch": 6.438865643886564, "grad_norm": 1.0313973426818848, "learning_rate": 6.907519900580747e-07, "loss": 0.0338, "step": 13850 }, { "epoch": 6.439795443979545, "grad_norm": 0.4860657751560211, "learning_rate": 6.725372254468369e-07, "loss": 0.0239, "step": 13852 }, { "epoch": 6.4407252440725244, "grad_norm": 0.9984261393547058, "learning_rate": 6.545625630717893e-07, "loss": 0.0359, "step": 13854 }, { "epoch": 6.441655044165504, "grad_norm": 0.5714625120162964, "learning_rate": 6.368281803355747e-07, "loss": 0.0178, "step": 13856 }, { "epoch": 6.4425848442584845, "grad_norm": 0.8843308687210083, "learning_rate": 6.193342522694296e-07, "loss": 0.0252, "step": 13858 }, { "epoch": 6.443514644351464, "grad_norm": 0.7594922780990601, "learning_rate": 6.020809515313004e-07, "loss": 0.0429, "step": 13860 }, { "epoch": 6.444444444444445, "grad_norm": 0.247663214802742, "learning_rate": 5.85068448404383e-07, "loss": 0.0213, "step": 13862 }, { "epoch": 6.445374244537424, "grad_norm": 0.4108871817588806, "learning_rate": 5.682969107951715e-07, "loss": 0.0183, "step": 13864 }, { "epoch": 6.446304044630405, "grad_norm": 0.6928779482841492, "learning_rate": 5.517665042319516e-07, "loss": 0.0271, "step": 13866 }, { "epoch": 6.447233844723384, "grad_norm": 1.0902994871139526, "learning_rate": 5.354773918632069e-07, "loss": 0.0387, "step": 13868 }, { "epoch": 6.448163644816365, "grad_norm": 0.6458333134651184, "learning_rate": 5.194297344558565e-07, "loss": 0.0256, "step": 13870 }, { "epoch": 6.449093444909344, "grad_norm": 0.7248163223266602, "learning_rate": 5.036236903938399e-07, "loss": 0.0216, "step": 13872 }, { "epoch": 6.450023245002324, "grad_norm": 1.1304608583450317, "learning_rate": 4.880594156763927e-07, "loss": 0.0653, "step": 13874 }, { "epoch": 6.4509530450953045, "grad_norm": 0.41278645396232605, "learning_rate": 4.7273706391664277e-07, "loss": 0.02, "step": 13876 }, { "epoch": 6.451882845188284, "grad_norm": 0.751501739025116, "learning_rate": 4.576567863400333e-07, "loss": 0.0174, "step": 13878 }, { "epoch": 6.452812645281265, "grad_norm": 1.0652291774749756, "learning_rate": 4.428187317827799e-07, "loss": 0.0252, "step": 13880 }, { "epoch": 6.453742445374244, "grad_norm": 0.9535688161849976, "learning_rate": 4.28223046690527e-07, "loss": 0.0341, "step": 13882 }, { "epoch": 6.454672245467225, "grad_norm": 0.7943058013916016, "learning_rate": 4.138698751167577e-07, "loss": 0.0369, "step": 13884 }, { "epoch": 6.455602045560204, "grad_norm": 0.8174223303794861, "learning_rate": 3.99759358721514e-07, "loss": 0.0221, "step": 13886 }, { "epoch": 6.456531845653185, "grad_norm": 0.8113406896591187, "learning_rate": 3.858916367698649e-07, "loss": 0.02, "step": 13888 }, { "epoch": 6.457461645746164, "grad_norm": 1.5697393417358398, "learning_rate": 3.7226684613064597e-07, "loss": 0.0292, "step": 13890 }, { "epoch": 6.458391445839145, "grad_norm": 0.8334473371505737, "learning_rate": 3.588851212750526e-07, "loss": 0.0249, "step": 13892 }, { "epoch": 6.4593212459321245, "grad_norm": 0.8855062127113342, "learning_rate": 3.4574659427527413e-07, "loss": 0.0333, "step": 13894 }, { "epoch": 6.460251046025105, "grad_norm": 0.46637624502182007, "learning_rate": 3.3285139480330307e-07, "loss": 0.0214, "step": 13896 }, { "epoch": 6.461180846118085, "grad_norm": 0.4993949830532074, "learning_rate": 3.2019965012953084e-07, "loss": 0.0223, "step": 13898 }, { "epoch": 6.462110646211064, "grad_norm": 0.43256881833076477, "learning_rate": 3.077914851215627e-07, "loss": 0.0144, "step": 13900 }, { "epoch": 6.463040446304045, "grad_norm": 1.1179417371749878, "learning_rate": 2.9562702224298776e-07, "loss": 0.0316, "step": 13902 }, { "epoch": 6.463970246397024, "grad_norm": 1.0244008302688599, "learning_rate": 2.837063815521444e-07, "loss": 0.0258, "step": 13904 }, { "epoch": 6.464900046490005, "grad_norm": 0.48211634159088135, "learning_rate": 2.720296807009569e-07, "loss": 0.0282, "step": 13906 }, { "epoch": 6.465829846582984, "grad_norm": 0.8522055149078369, "learning_rate": 2.605970349337338e-07, "loss": 0.0623, "step": 13908 }, { "epoch": 6.466759646675965, "grad_norm": 0.9234235286712646, "learning_rate": 2.4940855708606317e-07, "loss": 0.0337, "step": 13910 }, { "epoch": 6.4676894467689445, "grad_norm": 0.6348243951797485, "learning_rate": 2.384643575837275e-07, "loss": 0.0242, "step": 13912 }, { "epoch": 6.468619246861925, "grad_norm": 1.251821756362915, "learning_rate": 2.2776454444153497e-07, "loss": 0.0442, "step": 13914 }, { "epoch": 6.469549046954905, "grad_norm": 0.5857343077659607, "learning_rate": 2.1730922326233422e-07, "loss": 0.0158, "step": 13916 }, { "epoch": 6.470478847047885, "grad_norm": 1.074257254600525, "learning_rate": 2.0709849723593202e-07, "loss": 0.0405, "step": 13918 }, { "epoch": 6.471408647140865, "grad_norm": 1.3600188493728638, "learning_rate": 1.9713246713805496e-07, "loss": 0.0315, "step": 13920 }, { "epoch": 6.472338447233844, "grad_norm": 0.9384903311729431, "learning_rate": 1.874112313294115e-07, "loss": 0.0268, "step": 13922 }, { "epoch": 6.473268247326825, "grad_norm": 1.0641858577728271, "learning_rate": 1.779348857546595e-07, "loss": 0.0269, "step": 13924 }, { "epoch": 6.474198047419804, "grad_norm": 0.8882509469985962, "learning_rate": 1.6870352394152055e-07, "loss": 0.0302, "step": 13926 }, { "epoch": 6.475127847512785, "grad_norm": 1.2498196363449097, "learning_rate": 1.5971723699978103e-07, "loss": 0.027, "step": 13928 }, { "epoch": 6.4760576476057645, "grad_norm": 1.5516101121902466, "learning_rate": 1.509761136205066e-07, "loss": 0.0767, "step": 13930 }, { "epoch": 6.476987447698745, "grad_norm": 1.0395727157592773, "learning_rate": 1.4248024007503178e-07, "loss": 0.0344, "step": 13932 }, { "epoch": 6.477917247791725, "grad_norm": 0.7483776807785034, "learning_rate": 1.3422970021419113e-07, "loss": 0.0306, "step": 13934 }, { "epoch": 6.478847047884705, "grad_norm": 0.9044315814971924, "learning_rate": 1.262245754675006e-07, "loss": 0.0306, "step": 13936 }, { "epoch": 6.479776847977685, "grad_norm": 0.5654864311218262, "learning_rate": 1.1846494484229138e-07, "loss": 0.0297, "step": 13938 }, { "epoch": 6.480706648070665, "grad_norm": 0.5629227757453918, "learning_rate": 1.109508849230051e-07, "loss": 0.0262, "step": 13940 }, { "epoch": 6.481636448163645, "grad_norm": 2.0869359970092773, "learning_rate": 1.0368246987035815e-07, "loss": 0.0416, "step": 13942 }, { "epoch": 6.482566248256624, "grad_norm": 0.9199849367141724, "learning_rate": 9.665977142068689e-08, "loss": 0.0224, "step": 13944 }, { "epoch": 6.483496048349605, "grad_norm": 1.717630386352539, "learning_rate": 8.988285888519252e-08, "loss": 0.0323, "step": 13946 }, { "epoch": 6.4844258484425845, "grad_norm": 0.6270055174827576, "learning_rate": 8.335179914925008e-08, "loss": 0.0184, "step": 13948 }, { "epoch": 6.485355648535565, "grad_norm": 0.6819263696670532, "learning_rate": 7.706665667180328e-08, "loss": 0.018, "step": 13950 }, { "epoch": 6.486285448628545, "grad_norm": 0.6034681797027588, "learning_rate": 7.102749348465683e-08, "loss": 0.0298, "step": 13952 }, { "epoch": 6.487215248721525, "grad_norm": 0.8593317270278931, "learning_rate": 6.523436919191017e-08, "loss": 0.0255, "step": 13954 }, { "epoch": 6.488145048814505, "grad_norm": 0.5901556015014648, "learning_rate": 5.968734096936903e-08, "loss": 0.0249, "step": 13956 }, { "epoch": 6.489074848907485, "grad_norm": 0.6365477442741394, "learning_rate": 5.438646356395987e-08, "loss": 0.0213, "step": 13958 }, { "epoch": 6.490004649000465, "grad_norm": 2.5142478942871094, "learning_rate": 4.933178929321077e-08, "loss": 0.0387, "step": 13960 }, { "epoch": 6.490934449093445, "grad_norm": 1.2578431367874146, "learning_rate": 4.4523368044710236e-08, "loss": 0.0647, "step": 13962 }, { "epoch": 6.491864249186425, "grad_norm": 0.7877565026283264, "learning_rate": 3.996124727562424e-08, "loss": 0.0367, "step": 13964 }, { "epoch": 6.492794049279405, "grad_norm": 0.7598152160644531, "learning_rate": 3.564547201226047e-08, "loss": 0.0177, "step": 13966 }, { "epoch": 6.493723849372385, "grad_norm": 0.6146913170814514, "learning_rate": 3.157608484956315e-08, "loss": 0.0275, "step": 13968 }, { "epoch": 6.494653649465365, "grad_norm": 0.39645835757255554, "learning_rate": 2.7753125950749492e-08, "loss": 0.0162, "step": 13970 }, { "epoch": 6.495583449558345, "grad_norm": 0.734908938407898, "learning_rate": 2.4176633046879436e-08, "loss": 0.0241, "step": 13972 }, { "epoch": 6.496513249651325, "grad_norm": 0.7191246747970581, "learning_rate": 2.084664143649762e-08, "loss": 0.0229, "step": 13974 }, { "epoch": 6.497443049744305, "grad_norm": 0.8202003240585327, "learning_rate": 1.776318398526979e-08, "loss": 0.0275, "step": 13976 }, { "epoch": 6.498372849837285, "grad_norm": 0.9786264896392822, "learning_rate": 1.4926291125677485e-08, "loss": 0.035, "step": 13978 }, { "epoch": 6.499302649930265, "grad_norm": 1.0944191217422485, "learning_rate": 1.2335990856712713e-08, "loss": 0.0225, "step": 13980 }, { "epoch": 6.500232450023245, "grad_norm": 1.8685845136642456, "learning_rate": 9.992308743586536e-09, "loss": 0.0557, "step": 13982 }, { "epoch": 6.501162250116225, "grad_norm": 1.2694199085235596, "learning_rate": 7.895267917498686e-09, "loss": 0.0346, "step": 13984 }, { "epoch": 6.502092050209205, "grad_norm": 0.7011920809745789, "learning_rate": 6.044889075398875e-09, "loss": 0.0203, "step": 13986 }, { "epoch": 6.503021850302185, "grad_norm": 0.7690691351890564, "learning_rate": 4.441190479775846e-09, "loss": 0.0236, "step": 13988 }, { "epoch": 6.503951650395165, "grad_norm": 0.6563759446144104, "learning_rate": 3.0841879584852907e-09, "loss": 0.0236, "step": 13990 }, { "epoch": 6.504881450488145, "grad_norm": 1.2793303728103638, "learning_rate": 1.973894904597196e-09, "loss": 0.0249, "step": 13992 }, { "epoch": 6.505811250581125, "grad_norm": 0.693101167678833, "learning_rate": 1.1103222762542881e-09, "loss": 0.0208, "step": 13994 }, { "epoch": 6.506741050674105, "grad_norm": 0.9328014850616455, "learning_rate": 4.93478596572114e-10, "loss": 0.0318, "step": 13996 }, { "epoch": 6.507670850767085, "grad_norm": 1.4150925874710083, "learning_rate": 1.233699535446713e-10, "loss": 0.0452, "step": 13998 }, { "epoch": 6.508600650860065, "grad_norm": 0.7473105192184448, "learning_rate": 0.0, "loss": 0.0241, "step": 14000 }, { "epoch": 6.508600650860065, "eval_cer": 0.12997240445694053, "eval_loss": 0.1919122040271759, "eval_runtime": 402.0102, "eval_samples_per_second": 31.576, "eval_steps_per_second": 0.988, "step": 14000 }, { "epoch": 6.509530450953045, "grad_norm": 0.6144935488700867, "learning_rate": 1.2336995354467197e-10, "loss": 0.0215, "step": 14002 }, { "epoch": 6.510460251046025, "grad_norm": 0.8227245807647705, "learning_rate": 4.934785965721167e-10, "loss": 0.0326, "step": 14004 }, { "epoch": 6.5113900511390055, "grad_norm": 0.9976989030838013, "learning_rate": 1.1103222762542941e-09, "loss": 0.034, "step": 14006 }, { "epoch": 6.512319851231985, "grad_norm": 0.8294063806533813, "learning_rate": 1.973894904597207e-09, "loss": 0.0244, "step": 14008 }, { "epoch": 6.513249651324966, "grad_norm": 1.1191943883895874, "learning_rate": 3.0841879584825317e-09, "loss": 0.0334, "step": 14010 }, { "epoch": 6.514179451417945, "grad_norm": 1.0674272775650024, "learning_rate": 4.441190479775869e-09, "loss": 0.0577, "step": 14012 }, { "epoch": 6.515109251510925, "grad_norm": 0.8930789232254028, "learning_rate": 6.0448890753989065e-09, "loss": 0.0522, "step": 14014 }, { "epoch": 6.516039051603905, "grad_norm": 1.1021497249603271, "learning_rate": 7.895267917501502e-09, "loss": 0.0331, "step": 14016 }, { "epoch": 6.516968851696885, "grad_norm": 0.9690963625907898, "learning_rate": 9.992308743586585e-09, "loss": 0.0435, "step": 14018 }, { "epoch": 6.517898651789865, "grad_norm": 0.6928367614746094, "learning_rate": 1.2335990856709996e-08, "loss": 0.016, "step": 14020 }, { "epoch": 6.518828451882845, "grad_norm": 0.2843431830406189, "learning_rate": 1.4926291125677558e-08, "loss": 0.0221, "step": 14022 }, { "epoch": 6.5197582519758255, "grad_norm": 1.4480785131454468, "learning_rate": 1.7763183985267107e-08, "loss": 0.0616, "step": 14024 }, { "epoch": 6.520688052068805, "grad_norm": 0.8709639310836792, "learning_rate": 2.084664143649773e-08, "loss": 0.0245, "step": 14026 }, { "epoch": 6.521617852161786, "grad_norm": 0.3051301836967468, "learning_rate": 2.417663304688234e-08, "loss": 0.0125, "step": 14028 }, { "epoch": 6.522547652254765, "grad_norm": 0.6367751955986023, "learning_rate": 2.7753125950749647e-08, "loss": 0.0354, "step": 14030 }, { "epoch": 6.523477452347745, "grad_norm": 1.2284351587295532, "learning_rate": 3.1576084849563324e-08, "loss": 0.022, "step": 14032 }, { "epoch": 6.524407252440725, "grad_norm": 0.8985521793365479, "learning_rate": 3.564547201225233e-08, "loss": 0.0479, "step": 14034 }, { "epoch": 6.525337052533705, "grad_norm": 0.5392189025878906, "learning_rate": 3.996124727562169e-08, "loss": 0.0214, "step": 14036 }, { "epoch": 6.526266852626685, "grad_norm": 0.7428489327430725, "learning_rate": 4.4523368044707715e-08, "loss": 0.0381, "step": 14038 }, { "epoch": 6.527196652719665, "grad_norm": 0.5190829038619995, "learning_rate": 4.9331789293208273e-08, "loss": 0.0269, "step": 14040 }, { "epoch": 6.5281264528126455, "grad_norm": 0.5951483249664307, "learning_rate": 5.438646356396573e-08, "loss": 0.0298, "step": 14042 }, { "epoch": 6.529056252905625, "grad_norm": 1.151092529296875, "learning_rate": 5.968734096936661e-08, "loss": 0.0313, "step": 14044 }, { "epoch": 6.529986052998606, "grad_norm": 0.7979297637939453, "learning_rate": 6.523436919190778e-08, "loss": 0.0196, "step": 14046 }, { "epoch": 6.530915853091585, "grad_norm": 0.8685993552207947, "learning_rate": 7.102749348464894e-08, "loss": 0.0279, "step": 14048 }, { "epoch": 6.531845653184566, "grad_norm": 0.536936342716217, "learning_rate": 7.706665667179264e-08, "loss": 0.0215, "step": 14050 }, { "epoch": 6.532775453277545, "grad_norm": 1.8384287357330322, "learning_rate": 8.335179914925613e-08, "loss": 0.0364, "step": 14052 }, { "epoch": 6.533705253370526, "grad_norm": 0.318080872297287, "learning_rate": 8.988285888518752e-08, "loss": 0.0251, "step": 14054 }, { "epoch": 6.534635053463505, "grad_norm": 0.5383972525596619, "learning_rate": 9.665977142069024e-08, "loss": 0.0221, "step": 14056 }, { "epoch": 6.535564853556485, "grad_norm": 0.7950648069381714, "learning_rate": 1.0368246987035598e-07, "loss": 0.0306, "step": 14058 }, { "epoch": 6.5364946536494655, "grad_norm": 0.5725477933883667, "learning_rate": 1.1095088492300018e-07, "loss": 0.0224, "step": 14060 }, { "epoch": 6.537424453742445, "grad_norm": 1.2185051441192627, "learning_rate": 1.1846494484228928e-07, "loss": 0.0322, "step": 14062 }, { "epoch": 6.538354253835426, "grad_norm": 0.6448500156402588, "learning_rate": 1.2622457546748744e-07, "loss": 0.0278, "step": 14064 }, { "epoch": 6.539284053928405, "grad_norm": 0.5002498626708984, "learning_rate": 1.3422970021419468e-07, "loss": 0.0133, "step": 14066 }, { "epoch": 6.540213854021386, "grad_norm": 0.24687559902668, "learning_rate": 1.4248024007502704e-07, "loss": 0.0116, "step": 14068 }, { "epoch": 6.541143654114365, "grad_norm": 0.7535180449485779, "learning_rate": 1.5097611362051024e-07, "loss": 0.0232, "step": 14070 }, { "epoch": 6.542073454207346, "grad_norm": 0.9156087636947632, "learning_rate": 1.5971723699978746e-07, "loss": 0.063, "step": 14072 }, { "epoch": 6.543003254300325, "grad_norm": 0.6221912503242493, "learning_rate": 1.6870352394150758e-07, "loss": 0.016, "step": 14074 }, { "epoch": 6.543933054393305, "grad_norm": 1.2521607875823975, "learning_rate": 1.7793488575465492e-07, "loss": 0.0336, "step": 14076 }, { "epoch": 6.5448628544862855, "grad_norm": 0.7126548290252686, "learning_rate": 1.874112313293959e-07, "loss": 0.0443, "step": 14078 }, { "epoch": 6.545792654579266, "grad_norm": 1.279240369796753, "learning_rate": 1.971324671380616e-07, "loss": 0.0356, "step": 14080 }, { "epoch": 6.546722454672246, "grad_norm": 0.7074787020683289, "learning_rate": 2.0709849723592763e-07, "loss": 0.0114, "step": 14082 }, { "epoch": 6.547652254765225, "grad_norm": 0.6870935559272766, "learning_rate": 2.173092232623299e-07, "loss": 0.0291, "step": 14084 }, { "epoch": 6.548582054858206, "grad_norm": 0.39381641149520874, "learning_rate": 2.2776454444153068e-07, "loss": 0.0238, "step": 14086 }, { "epoch": 6.549511854951185, "grad_norm": 0.8311089873313904, "learning_rate": 2.3846435758371217e-07, "loss": 0.0309, "step": 14088 }, { "epoch": 6.550441655044166, "grad_norm": 0.7000072002410889, "learning_rate": 2.4940855708605904e-07, "loss": 0.026, "step": 14090 }, { "epoch": 6.551371455137145, "grad_norm": 0.9759277701377869, "learning_rate": 2.605970349337158e-07, "loss": 0.0339, "step": 14092 }, { "epoch": 6.552301255230126, "grad_norm": 0.9869343638420105, "learning_rate": 2.7202968070095003e-07, "loss": 0.0322, "step": 14094 }, { "epoch": 6.5532310553231055, "grad_norm": 0.6936774849891663, "learning_rate": 2.837063815521515e-07, "loss": 0.0371, "step": 14096 }, { "epoch": 6.554160855416086, "grad_norm": 0.45786187052726746, "learning_rate": 2.9562702224298104e-07, "loss": 0.0257, "step": 14098 }, { "epoch": 6.5550906555090656, "grad_norm": 0.5266798138618469, "learning_rate": 3.077914851215561e-07, "loss": 0.0172, "step": 14100 }, { "epoch": 6.556020455602045, "grad_norm": 0.5070422291755676, "learning_rate": 3.201996501295132e-07, "loss": 0.0187, "step": 14102 }, { "epoch": 6.556950255695026, "grad_norm": 0.5560793280601501, "learning_rate": 3.3285139480329666e-07, "loss": 0.0345, "step": 14104 }, { "epoch": 6.557880055788005, "grad_norm": 0.4225059449672699, "learning_rate": 3.457465942752706e-07, "loss": 0.0197, "step": 14106 }, { "epoch": 6.558809855880986, "grad_norm": 0.9360178709030151, "learning_rate": 3.5888512127504636e-07, "loss": 0.0331, "step": 14108 }, { "epoch": 6.559739655973965, "grad_norm": 0.8869208097457886, "learning_rate": 3.722668461306564e-07, "loss": 0.0248, "step": 14110 }, { "epoch": 6.560669456066946, "grad_norm": 0.6517983675003052, "learning_rate": 3.858916367698615e-07, "loss": 0.0317, "step": 14112 }, { "epoch": 6.5615992561599255, "grad_norm": 1.0124170780181885, "learning_rate": 3.99759358721508e-07, "loss": 0.0195, "step": 14114 }, { "epoch": 6.562529056252906, "grad_norm": 1.1739009618759155, "learning_rate": 4.138698751167517e-07, "loss": 0.056, "step": 14116 }, { "epoch": 6.5634588563458856, "grad_norm": 0.6592795848846436, "learning_rate": 4.2822304669050437e-07, "loss": 0.0354, "step": 14118 }, { "epoch": 6.564388656438865, "grad_norm": 0.8802260160446167, "learning_rate": 4.42818731782774e-07, "loss": 0.0293, "step": 14120 }, { "epoch": 6.565318456531846, "grad_norm": 0.5891916751861572, "learning_rate": 4.5765678634002753e-07, "loss": 0.0303, "step": 14122 }, { "epoch": 6.566248256624826, "grad_norm": 1.2761880159378052, "learning_rate": 4.727370639166539e-07, "loss": 0.0302, "step": 14124 }, { "epoch": 6.567178056717806, "grad_norm": 0.8530398607254028, "learning_rate": 4.880594156763872e-07, "loss": 0.0283, "step": 14126 }, { "epoch": 6.568107856810785, "grad_norm": 0.46353429555892944, "learning_rate": 5.036236903938152e-07, "loss": 0.0188, "step": 14128 }, { "epoch": 6.569037656903766, "grad_norm": 0.7489776611328125, "learning_rate": 5.194297344558513e-07, "loss": 0.0477, "step": 14130 }, { "epoch": 6.5699674569967454, "grad_norm": 1.0622631311416626, "learning_rate": 5.354773918631823e-07, "loss": 0.0239, "step": 14132 }, { "epoch": 6.570897257089726, "grad_norm": 1.1210488080978394, "learning_rate": 5.51766504231966e-07, "loss": 0.0334, "step": 14134 }, { "epoch": 6.5718270571827055, "grad_norm": 0.5725510716438293, "learning_rate": 5.682969107951638e-07, "loss": 0.0192, "step": 14136 }, { "epoch": 6.572756857275686, "grad_norm": 0.7820074558258057, "learning_rate": 5.850684484043753e-07, "loss": 0.0269, "step": 14138 }, { "epoch": 6.573686657368666, "grad_norm": 0.8289169073104858, "learning_rate": 6.020809515313151e-07, "loss": 0.0249, "step": 14140 }, { "epoch": 6.574616457461646, "grad_norm": 1.3132638931274414, "learning_rate": 6.193342522694027e-07, "loss": 0.0391, "step": 14142 }, { "epoch": 6.575546257554626, "grad_norm": 0.6400314569473267, "learning_rate": 6.368281803355673e-07, "loss": 0.0294, "step": 14144 }, { "epoch": 6.576476057647605, "grad_norm": 0.9456989765167236, "learning_rate": 6.545625630717627e-07, "loss": 0.0193, "step": 14146 }, { "epoch": 6.577405857740586, "grad_norm": 0.32885506749153137, "learning_rate": 6.725372254468521e-07, "loss": 0.0159, "step": 14148 }, { "epoch": 6.578335657833565, "grad_norm": 0.8855543732643127, "learning_rate": 6.907519900580901e-07, "loss": 0.0305, "step": 14150 }, { "epoch": 6.579265457926546, "grad_norm": 1.4805618524551392, "learning_rate": 7.092066771331436e-07, "loss": 0.044, "step": 14152 }, { "epoch": 6.5801952580195255, "grad_norm": 0.6124271154403687, "learning_rate": 7.279011045317266e-07, "loss": 0.0336, "step": 14154 }, { "epoch": 6.581125058112506, "grad_norm": 0.7579219937324524, "learning_rate": 7.468350877473426e-07, "loss": 0.0394, "step": 14156 }, { "epoch": 6.582054858205486, "grad_norm": 0.3584092855453491, "learning_rate": 7.660084399092675e-07, "loss": 0.0124, "step": 14158 }, { "epoch": 6.582984658298466, "grad_norm": 0.9918620586395264, "learning_rate": 7.85420971784208e-07, "loss": 0.0312, "step": 14160 }, { "epoch": 6.583914458391446, "grad_norm": 0.517218291759491, "learning_rate": 8.050724917783596e-07, "loss": 0.0237, "step": 14162 }, { "epoch": 6.584844258484426, "grad_norm": 1.4598957300186157, "learning_rate": 8.249628059391294e-07, "loss": 0.039, "step": 14164 }, { "epoch": 6.585774058577406, "grad_norm": 0.6581509709358215, "learning_rate": 8.450917179571265e-07, "loss": 0.024, "step": 14166 }, { "epoch": 6.586703858670386, "grad_norm": 0.7653487920761108, "learning_rate": 8.654590291681574e-07, "loss": 0.0255, "step": 14168 }, { "epoch": 6.587633658763366, "grad_norm": 1.1008294820785522, "learning_rate": 8.860645385550386e-07, "loss": 0.0439, "step": 14170 }, { "epoch": 6.5885634588563455, "grad_norm": 0.6974859833717346, "learning_rate": 9.069080427497367e-07, "loss": 0.029, "step": 14172 }, { "epoch": 6.589493258949326, "grad_norm": 0.9378176331520081, "learning_rate": 9.279893360352999e-07, "loss": 0.0287, "step": 14174 }, { "epoch": 6.590423059042306, "grad_norm": 0.9754680395126343, "learning_rate": 9.493082103478535e-07, "loss": 0.0482, "step": 14176 }, { "epoch": 6.591352859135286, "grad_norm": 0.6777422428131104, "learning_rate": 9.70864455278713e-07, "loss": 0.0206, "step": 14178 }, { "epoch": 6.592282659228266, "grad_norm": 0.7730860114097595, "learning_rate": 9.926578580764198e-07, "loss": 0.0295, "step": 14180 }, { "epoch": 6.593212459321246, "grad_norm": 1.564176082611084, "learning_rate": 1.0146882036489133e-06, "loss": 0.0316, "step": 14182 }, { "epoch": 6.594142259414226, "grad_norm": 0.818019449710846, "learning_rate": 1.0369552745655977e-06, "loss": 0.0251, "step": 14184 }, { "epoch": 6.595072059507206, "grad_norm": 0.8987934589385986, "learning_rate": 1.0594588510594241e-06, "loss": 0.0455, "step": 14186 }, { "epoch": 6.596001859600186, "grad_norm": 0.7710734605789185, "learning_rate": 1.0821987110292301e-06, "loss": 0.0441, "step": 14188 }, { "epoch": 6.5969316596931655, "grad_norm": 0.9373701810836792, "learning_rate": 1.105174630041749e-06, "loss": 0.0321, "step": 14190 }, { "epoch": 6.597861459786146, "grad_norm": 0.5557774305343628, "learning_rate": 1.1283863813339397e-06, "loss": 0.0159, "step": 14192 }, { "epoch": 6.598791259879126, "grad_norm": 0.5753014087677002, "learning_rate": 1.1518337358151604e-06, "loss": 0.021, "step": 14194 }, { "epoch": 6.599721059972106, "grad_norm": 0.3547399342060089, "learning_rate": 1.1755164620695143e-06, "loss": 0.0198, "step": 14196 }, { "epoch": 6.600650860065086, "grad_norm": 1.4977086782455444, "learning_rate": 1.1994343263580812e-06, "loss": 0.0322, "step": 14198 }, { "epoch": 6.601580660158066, "grad_norm": 0.6502998471260071, "learning_rate": 1.2235870926211422e-06, "loss": 0.0456, "step": 14200 }, { "epoch": 6.602510460251046, "grad_norm": 0.8081398606300354, "learning_rate": 1.2479745224807023e-06, "loss": 0.0217, "step": 14202 }, { "epoch": 6.603440260344026, "grad_norm": 0.49826905131340027, "learning_rate": 1.2725963752426436e-06, "loss": 0.0295, "step": 14204 }, { "epoch": 6.604370060437006, "grad_norm": 0.5093134641647339, "learning_rate": 1.2974524078991914e-06, "loss": 0.026, "step": 14206 }, { "epoch": 6.605299860529986, "grad_norm": 1.4217299222946167, "learning_rate": 1.3225423751313974e-06, "loss": 0.0327, "step": 14208 }, { "epoch": 6.606229660622966, "grad_norm": 1.3054471015930176, "learning_rate": 1.3478660293113544e-06, "loss": 0.0472, "step": 14210 }, { "epoch": 6.6071594607159465, "grad_norm": 0.49748682975769043, "learning_rate": 1.3734231205048804e-06, "loss": 0.0367, "step": 14212 }, { "epoch": 6.608089260808926, "grad_norm": 1.3391027450561523, "learning_rate": 1.3992133964737428e-06, "loss": 0.0344, "step": 14214 }, { "epoch": 6.609019060901906, "grad_norm": 0.6650235652923584, "learning_rate": 1.4252366026783575e-06, "loss": 0.0277, "step": 14216 }, { "epoch": 6.609948860994886, "grad_norm": 0.7266335487365723, "learning_rate": 1.4514924822802462e-06, "loss": 0.028, "step": 14218 }, { "epoch": 6.610878661087866, "grad_norm": 0.6156625151634216, "learning_rate": 1.4779807761443592e-06, "loss": 0.0205, "step": 14220 }, { "epoch": 6.611808461180846, "grad_norm": 0.6685687899589539, "learning_rate": 1.5047012228420157e-06, "loss": 0.0349, "step": 14222 }, { "epoch": 6.612738261273826, "grad_norm": 0.7853866815567017, "learning_rate": 1.5316535586531386e-06, "loss": 0.024, "step": 14224 }, { "epoch": 6.613668061366806, "grad_norm": 0.9108678102493286, "learning_rate": 1.5588375175690827e-06, "loss": 0.0225, "step": 14226 }, { "epoch": 6.614597861459786, "grad_norm": 0.4075510799884796, "learning_rate": 1.5862528312951809e-06, "loss": 0.0193, "step": 14228 }, { "epoch": 6.6155276615527665, "grad_norm": 0.5863857269287109, "learning_rate": 1.6138992292533204e-06, "loss": 0.0293, "step": 14230 }, { "epoch": 6.616457461645746, "grad_norm": 0.9822931289672852, "learning_rate": 1.6417764385847157e-06, "loss": 0.0214, "step": 14232 }, { "epoch": 6.617387261738726, "grad_norm": 0.842465877532959, "learning_rate": 1.669884184152534e-06, "loss": 0.0654, "step": 14234 }, { "epoch": 6.618317061831706, "grad_norm": 0.9563736319541931, "learning_rate": 1.6982221885447401e-06, "loss": 0.0202, "step": 14236 }, { "epoch": 6.619246861924686, "grad_norm": 0.8466838598251343, "learning_rate": 1.7267901720766034e-06, "loss": 0.0274, "step": 14238 }, { "epoch": 6.620176662017666, "grad_norm": 0.819032609462738, "learning_rate": 1.7555878527936944e-06, "loss": 0.0202, "step": 14240 }, { "epoch": 6.621106462110646, "grad_norm": 0.37811920046806335, "learning_rate": 1.7846149464745583e-06, "loss": 0.0165, "step": 14242 }, { "epoch": 6.622036262203626, "grad_norm": 0.363825261592865, "learning_rate": 1.8138711666334743e-06, "loss": 0.0291, "step": 14244 }, { "epoch": 6.622966062296606, "grad_norm": 0.8846033215522766, "learning_rate": 1.8433562245233546e-06, "loss": 0.035, "step": 14246 }, { "epoch": 6.6238958623895865, "grad_norm": 0.6129227876663208, "learning_rate": 1.8730698291385522e-06, "loss": 0.0157, "step": 14248 }, { "epoch": 6.624825662482566, "grad_norm": 0.36470332741737366, "learning_rate": 1.9030116872178156e-06, "loss": 0.0226, "step": 14250 }, { "epoch": 6.625755462575547, "grad_norm": 0.24176444113254547, "learning_rate": 1.933181503247128e-06, "loss": 0.0124, "step": 14252 }, { "epoch": 6.626685262668526, "grad_norm": 0.6087048649787903, "learning_rate": 1.9635789794625188e-06, "loss": 0.0296, "step": 14254 }, { "epoch": 6.627615062761507, "grad_norm": 0.8498103618621826, "learning_rate": 1.9942038158532378e-06, "loss": 0.0253, "step": 14256 }, { "epoch": 6.628544862854486, "grad_norm": 1.4832417964935303, "learning_rate": 2.025055710164478e-06, "loss": 0.05, "step": 14258 }, { "epoch": 6.629474662947466, "grad_norm": 0.733991265296936, "learning_rate": 2.056134357900464e-06, "loss": 0.0344, "step": 14260 }, { "epoch": 6.630404463040446, "grad_norm": 0.8291684985160828, "learning_rate": 2.0874394523275587e-06, "loss": 0.019, "step": 14262 }, { "epoch": 6.631334263133426, "grad_norm": 0.8940747380256653, "learning_rate": 2.118970684477049e-06, "loss": 0.0255, "step": 14264 }, { "epoch": 6.6322640632264065, "grad_norm": 1.4689807891845703, "learning_rate": 2.1507277431484742e-06, "loss": 0.0551, "step": 14266 }, { "epoch": 6.633193863319386, "grad_norm": 0.8781888484954834, "learning_rate": 2.182710314912413e-06, "loss": 0.0357, "step": 14268 }, { "epoch": 6.634123663412367, "grad_norm": 0.7796184420585632, "learning_rate": 2.2149180841138333e-06, "loss": 0.0197, "step": 14270 }, { "epoch": 6.635053463505346, "grad_norm": 1.2516019344329834, "learning_rate": 2.2473507328751242e-06, "loss": 0.034, "step": 14272 }, { "epoch": 6.635983263598327, "grad_norm": 0.844143271446228, "learning_rate": 2.280007941098996e-06, "loss": 0.0321, "step": 14274 }, { "epoch": 6.636913063691306, "grad_norm": 0.6950188279151917, "learning_rate": 2.3128893864720906e-06, "loss": 0.0268, "step": 14276 }, { "epoch": 6.637842863784286, "grad_norm": 1.6467851400375366, "learning_rate": 2.3459947444677465e-06, "loss": 0.0445, "step": 14278 }, { "epoch": 6.638772663877266, "grad_norm": 1.0307338237762451, "learning_rate": 2.3793236883495216e-06, "loss": 0.0364, "step": 14280 }, { "epoch": 6.639702463970247, "grad_norm": 0.6118257641792297, "learning_rate": 2.412875889174118e-06, "loss": 0.0274, "step": 14282 }, { "epoch": 6.6406322640632265, "grad_norm": 0.4988762140274048, "learning_rate": 2.446651015794899e-06, "loss": 0.0279, "step": 14284 }, { "epoch": 6.641562064156206, "grad_norm": 1.69290292263031, "learning_rate": 2.4806487348650713e-06, "loss": 0.0275, "step": 14286 }, { "epoch": 6.642491864249187, "grad_norm": 1.199186086654663, "learning_rate": 2.5148687108407264e-06, "loss": 0.0348, "step": 14288 }, { "epoch": 6.643421664342166, "grad_norm": 1.3336397409439087, "learning_rate": 2.549310605984629e-06, "loss": 0.0338, "step": 14290 }, { "epoch": 6.644351464435147, "grad_norm": 0.5547077059745789, "learning_rate": 2.583974080369101e-06, "loss": 0.0196, "step": 14292 }, { "epoch": 6.645281264528126, "grad_norm": 1.0844253301620483, "learning_rate": 2.618858791879684e-06, "loss": 0.0379, "step": 14294 }, { "epoch": 6.646211064621107, "grad_norm": 1.2179381847381592, "learning_rate": 2.6539643962183985e-06, "loss": 0.0308, "step": 14296 }, { "epoch": 6.647140864714086, "grad_norm": 0.6930427551269531, "learning_rate": 2.6892905469070232e-06, "loss": 0.0303, "step": 14298 }, { "epoch": 6.648070664807067, "grad_norm": 0.4792950451374054, "learning_rate": 2.7248368952908337e-06, "loss": 0.0264, "step": 14300 }, { "epoch": 6.6490004649000465, "grad_norm": 0.5107163786888123, "learning_rate": 2.760603090541559e-06, "loss": 0.0208, "step": 14302 }, { "epoch": 6.649930264993026, "grad_norm": 0.9545860886573792, "learning_rate": 2.7965887796613723e-06, "loss": 0.0183, "step": 14304 }, { "epoch": 6.650860065086007, "grad_norm": 1.0611449480056763, "learning_rate": 2.8327936074860906e-06, "loss": 0.0208, "step": 14306 }, { "epoch": 6.651789865178986, "grad_norm": 0.7507985234260559, "learning_rate": 2.8692172166886007e-06, "loss": 0.0204, "step": 14308 }, { "epoch": 6.652719665271967, "grad_norm": 0.3586903512477875, "learning_rate": 2.9058592477826623e-06, "loss": 0.0137, "step": 14310 }, { "epoch": 6.653649465364946, "grad_norm": 1.001948595046997, "learning_rate": 2.942719339126145e-06, "loss": 0.0259, "step": 14312 }, { "epoch": 6.654579265457927, "grad_norm": 0.3708464801311493, "learning_rate": 2.9797971269249043e-06, "loss": 0.0205, "step": 14314 }, { "epoch": 6.655509065550906, "grad_norm": 0.9640244841575623, "learning_rate": 3.0170922452361075e-06, "loss": 0.0237, "step": 14316 }, { "epoch": 6.656438865643887, "grad_norm": 0.4554334878921509, "learning_rate": 3.054604325971949e-06, "loss": 0.0211, "step": 14318 }, { "epoch": 6.6573686657368665, "grad_norm": 1.2054091691970825, "learning_rate": 3.0923329989034187e-06, "loss": 0.0323, "step": 14320 }, { "epoch": 6.658298465829847, "grad_norm": 0.38908833265304565, "learning_rate": 3.130277891663666e-06, "loss": 0.0194, "step": 14322 }, { "epoch": 6.659228265922827, "grad_norm": 1.0480791330337524, "learning_rate": 3.1684386297519987e-06, "loss": 0.0313, "step": 14324 }, { "epoch": 6.660158066015807, "grad_norm": 0.7637245655059814, "learning_rate": 3.206814836537303e-06, "loss": 0.0244, "step": 14326 }, { "epoch": 6.661087866108787, "grad_norm": 0.6983271241188049, "learning_rate": 3.245406133261858e-06, "loss": 0.0235, "step": 14328 }, { "epoch": 6.662017666201766, "grad_norm": 0.8540825247764587, "learning_rate": 3.2842121390452374e-06, "loss": 0.0366, "step": 14330 }, { "epoch": 6.662947466294747, "grad_norm": 0.3909684121608734, "learning_rate": 3.323232470887745e-06, "loss": 0.0212, "step": 14332 }, { "epoch": 6.663877266387726, "grad_norm": 0.9973958134651184, "learning_rate": 3.3624667436745404e-06, "loss": 0.0327, "step": 14334 }, { "epoch": 6.664807066480707, "grad_norm": 0.7614860534667969, "learning_rate": 3.4019145701791086e-06, "loss": 0.0346, "step": 14336 }, { "epoch": 6.6657368665736865, "grad_norm": 0.64188152551651, "learning_rate": 3.4415755610673658e-06, "loss": 0.0251, "step": 14338 }, { "epoch": 6.666666666666667, "grad_norm": 0.9441412687301636, "learning_rate": 3.4814493249014413e-06, "loss": 0.034, "step": 14340 }, { "epoch": 6.667596466759647, "grad_norm": 1.4116109609603882, "learning_rate": 3.5215354681431964e-06, "loss": 0.025, "step": 14342 }, { "epoch": 6.668526266852627, "grad_norm": 1.03644859790802, "learning_rate": 3.5618335951587195e-06, "loss": 0.0406, "step": 14344 }, { "epoch": 6.669456066945607, "grad_norm": 0.9072498083114624, "learning_rate": 3.602343308221675e-06, "loss": 0.0191, "step": 14346 }, { "epoch": 6.670385867038586, "grad_norm": 1.092732548713684, "learning_rate": 3.643064207517596e-06, "loss": 0.0419, "step": 14348 }, { "epoch": 6.671315667131567, "grad_norm": 0.933952808380127, "learning_rate": 3.68399589114769e-06, "loss": 0.0298, "step": 14350 }, { "epoch": 6.672245467224546, "grad_norm": 0.8200474381446838, "learning_rate": 3.7251379551326744e-06, "loss": 0.034, "step": 14352 }, { "epoch": 6.673175267317527, "grad_norm": 0.5034893155097961, "learning_rate": 3.766489993417126e-06, "loss": 0.022, "step": 14354 }, { "epoch": 6.6741050674105065, "grad_norm": 0.5685073137283325, "learning_rate": 3.8080515978729328e-06, "loss": 0.0173, "step": 14356 }, { "epoch": 6.675034867503487, "grad_norm": 0.4364009499549866, "learning_rate": 3.849822358303981e-06, "loss": 0.0217, "step": 14358 }, { "epoch": 6.675964667596467, "grad_norm": 0.8195444941520691, "learning_rate": 3.8918018624496304e-06, "loss": 0.0247, "step": 14360 }, { "epoch": 6.676894467689447, "grad_norm": 1.1331111192703247, "learning_rate": 3.933989695989176e-06, "loss": 0.0399, "step": 14362 }, { "epoch": 6.677824267782427, "grad_norm": 0.8159966468811035, "learning_rate": 3.9763854425457764e-06, "loss": 0.0212, "step": 14364 }, { "epoch": 6.678754067875407, "grad_norm": 0.8024634718894958, "learning_rate": 4.018988683690436e-06, "loss": 0.0226, "step": 14366 }, { "epoch": 6.679683867968387, "grad_norm": 1.2417265176773071, "learning_rate": 4.0617989989465066e-06, "loss": 0.0511, "step": 14368 }, { "epoch": 6.680613668061367, "grad_norm": 0.9039382338523865, "learning_rate": 4.104815965793263e-06, "loss": 0.0217, "step": 14370 }, { "epoch": 6.681543468154347, "grad_norm": 0.5520707368850708, "learning_rate": 4.148039159670714e-06, "loss": 0.0243, "step": 14372 }, { "epoch": 6.6824732682473265, "grad_norm": 1.1136122941970825, "learning_rate": 4.19146815398343e-06, "loss": 0.0296, "step": 14374 }, { "epoch": 6.683403068340307, "grad_norm": 0.9233624935150146, "learning_rate": 4.235102520104672e-06, "loss": 0.0244, "step": 14376 }, { "epoch": 6.684332868433287, "grad_norm": 0.32156726717948914, "learning_rate": 4.278941827380961e-06, "loss": 0.0157, "step": 14378 }, { "epoch": 6.685262668526267, "grad_norm": 0.9951450228691101, "learning_rate": 4.322985643135935e-06, "loss": 0.0151, "step": 14380 }, { "epoch": 6.686192468619247, "grad_norm": 0.5072375535964966, "learning_rate": 4.367233532675013e-06, "loss": 0.031, "step": 14382 }, { "epoch": 6.687122268712227, "grad_norm": 0.5307641625404358, "learning_rate": 4.411685059289342e-06, "loss": 0.0398, "step": 14384 }, { "epoch": 6.688052068805207, "grad_norm": 1.349746823310852, "learning_rate": 4.456339784260242e-06, "loss": 0.022, "step": 14386 }, { "epoch": 6.688981868898187, "grad_norm": 1.0635583400726318, "learning_rate": 4.501197266863715e-06, "loss": 0.0386, "step": 14388 }, { "epoch": 6.689911668991167, "grad_norm": 1.0967622995376587, "learning_rate": 4.546257064374408e-06, "loss": 0.039, "step": 14390 }, { "epoch": 6.6908414690841465, "grad_norm": 0.6955051422119141, "learning_rate": 4.591518732070367e-06, "loss": 0.0289, "step": 14392 }, { "epoch": 6.691771269177127, "grad_norm": 0.6471487283706665, "learning_rate": 4.6369818232372536e-06, "loss": 0.0158, "step": 14394 }, { "epoch": 6.6927010692701066, "grad_norm": 0.6037565469741821, "learning_rate": 4.682645889172662e-06, "loss": 0.0317, "step": 14396 }, { "epoch": 6.693630869363087, "grad_norm": 0.6244372725486755, "learning_rate": 4.728510479190698e-06, "loss": 0.0202, "step": 14398 }, { "epoch": 6.694560669456067, "grad_norm": 0.6231850385665894, "learning_rate": 4.774575140626322e-06, "loss": 0.0318, "step": 14400 }, { "epoch": 6.695490469549047, "grad_norm": 0.7300328612327576, "learning_rate": 4.820839418840022e-06, "loss": 0.0168, "step": 14402 }, { "epoch": 6.696420269642027, "grad_norm": 0.5628653168678284, "learning_rate": 4.867302857221953e-06, "loss": 0.0173, "step": 14404 }, { "epoch": 6.697350069735007, "grad_norm": 0.3782132565975189, "learning_rate": 4.9139649971967825e-06, "loss": 0.0216, "step": 14406 }, { "epoch": 6.698279869827987, "grad_norm": 1.6678893566131592, "learning_rate": 4.960825378228083e-06, "loss": 0.0376, "step": 14408 }, { "epoch": 6.699209669920967, "grad_norm": 0.6567463278770447, "learning_rate": 5.007883537822757e-06, "loss": 0.0243, "step": 14410 }, { "epoch": 6.700139470013947, "grad_norm": 0.8908796906471252, "learning_rate": 5.055139011535769e-06, "loss": 0.0215, "step": 14412 }, { "epoch": 6.701069270106927, "grad_norm": 0.7683044075965881, "learning_rate": 5.102591332974618e-06, "loss": 0.0303, "step": 14414 }, { "epoch": 6.701999070199907, "grad_norm": 0.9795831441879272, "learning_rate": 5.150240033804102e-06, "loss": 0.0216, "step": 14416 }, { "epoch": 6.702928870292887, "grad_norm": 0.625929057598114, "learning_rate": 5.198084643750834e-06, "loss": 0.0177, "step": 14418 }, { "epoch": 6.703858670385867, "grad_norm": 0.8839271664619446, "learning_rate": 5.2461246906077225e-06, "loss": 0.0236, "step": 14420 }, { "epoch": 6.704788470478847, "grad_norm": 0.3284202218055725, "learning_rate": 5.29435970023906e-06, "loss": 0.0208, "step": 14422 }, { "epoch": 6.705718270571827, "grad_norm": 1.0138850212097168, "learning_rate": 5.342789196584558e-06, "loss": 0.036, "step": 14424 }, { "epoch": 6.706648070664807, "grad_norm": 0.7100760340690613, "learning_rate": 5.391412701664747e-06, "loss": 0.0268, "step": 14426 }, { "epoch": 6.707577870757787, "grad_norm": 1.2603518962860107, "learning_rate": 5.44022973558532e-06, "loss": 0.0284, "step": 14428 }, { "epoch": 6.708507670850767, "grad_norm": 0.4436134994029999, "learning_rate": 5.48923981654175e-06, "loss": 0.0146, "step": 14430 }, { "epoch": 6.709437470943747, "grad_norm": 0.8768236637115479, "learning_rate": 5.538442460824434e-06, "loss": 0.0328, "step": 14432 }, { "epoch": 6.710367271036727, "grad_norm": 0.45839107036590576, "learning_rate": 5.587837182823023e-06, "loss": 0.0268, "step": 14434 }, { "epoch": 6.711297071129707, "grad_norm": 0.5776904821395874, "learning_rate": 5.637423495031667e-06, "loss": 0.0175, "step": 14436 }, { "epoch": 6.712226871222687, "grad_norm": 0.7420940399169922, "learning_rate": 5.687200908053426e-06, "loss": 0.0271, "step": 14438 }, { "epoch": 6.713156671315668, "grad_norm": 0.36937037110328674, "learning_rate": 5.737168930605276e-06, "loss": 0.0181, "step": 14440 }, { "epoch": 6.714086471408647, "grad_norm": 0.6584226489067078, "learning_rate": 5.787327069523117e-06, "loss": 0.0194, "step": 14442 }, { "epoch": 6.715016271501627, "grad_norm": 0.9009031057357788, "learning_rate": 5.837674829766256e-06, "loss": 0.0286, "step": 14444 }, { "epoch": 6.715946071594607, "grad_norm": 0.9588241577148438, "learning_rate": 5.888211714422735e-06, "loss": 0.0253, "step": 14446 }, { "epoch": 6.716875871687587, "grad_norm": 0.7972764372825623, "learning_rate": 5.938937224713797e-06, "loss": 0.0196, "step": 14448 }, { "epoch": 6.717805671780567, "grad_norm": 0.9743324518203735, "learning_rate": 5.989850859999246e-06, "loss": 0.0285, "step": 14450 }, { "epoch": 6.718735471873547, "grad_norm": 0.5902631878852844, "learning_rate": 6.040952117782001e-06, "loss": 0.0182, "step": 14452 }, { "epoch": 6.7196652719665275, "grad_norm": 0.5211553573608398, "learning_rate": 6.092240493713217e-06, "loss": 0.0267, "step": 14454 }, { "epoch": 6.720595072059507, "grad_norm": 0.8247618079185486, "learning_rate": 6.143715481597446e-06, "loss": 0.029, "step": 14456 }, { "epoch": 6.721524872152488, "grad_norm": 0.8691372871398926, "learning_rate": 6.195376573397223e-06, "loss": 0.0381, "step": 14458 }, { "epoch": 6.722454672245467, "grad_norm": 0.8292721509933472, "learning_rate": 6.247223259238487e-06, "loss": 0.041, "step": 14460 }, { "epoch": 6.723384472338447, "grad_norm": 1.121852993965149, "learning_rate": 6.29925502741544e-06, "loss": 0.0317, "step": 14462 }, { "epoch": 6.724314272431427, "grad_norm": 0.9728794693946838, "learning_rate": 6.351471364395478e-06, "loss": 0.0323, "step": 14464 }, { "epoch": 6.725244072524407, "grad_norm": 1.1571944952011108, "learning_rate": 6.403871754824431e-06, "loss": 0.0431, "step": 14466 }, { "epoch": 6.726173872617387, "grad_norm": 0.4759596288204193, "learning_rate": 6.456455681531543e-06, "loss": 0.0203, "step": 14468 }, { "epoch": 6.727103672710367, "grad_norm": 0.7325693368911743, "learning_rate": 6.5092226255347485e-06, "loss": 0.0296, "step": 14470 }, { "epoch": 6.7280334728033475, "grad_norm": 0.8985444903373718, "learning_rate": 6.562172066045669e-06, "loss": 0.0683, "step": 14472 }, { "epoch": 6.728963272896327, "grad_norm": 0.42397236824035645, "learning_rate": 6.615303480474586e-06, "loss": 0.0168, "step": 14474 }, { "epoch": 6.729893072989308, "grad_norm": 0.519446611404419, "learning_rate": 6.668616344436011e-06, "loss": 0.0217, "step": 14476 }, { "epoch": 6.730822873082287, "grad_norm": 0.4261282682418823, "learning_rate": 6.722110131753436e-06, "loss": 0.028, "step": 14478 }, { "epoch": 6.731752673175267, "grad_norm": 0.5926581621170044, "learning_rate": 6.775784314464715e-06, "loss": 0.0305, "step": 14480 }, { "epoch": 6.732682473268247, "grad_norm": 0.9910712838172913, "learning_rate": 6.82963836282746e-06, "loss": 0.0237, "step": 14482 }, { "epoch": 6.733612273361228, "grad_norm": 0.706656277179718, "learning_rate": 6.883671745323833e-06, "loss": 0.0146, "step": 14484 }, { "epoch": 6.734542073454207, "grad_norm": 0.7612806558609009, "learning_rate": 6.93788392866628e-06, "loss": 0.0377, "step": 14486 }, { "epoch": 6.735471873547187, "grad_norm": 0.5168070197105408, "learning_rate": 6.992274377802321e-06, "loss": 0.0314, "step": 14488 }, { "epoch": 6.7364016736401675, "grad_norm": 0.6637610197067261, "learning_rate": 7.046842555920299e-06, "loss": 0.0226, "step": 14490 }, { "epoch": 6.737331473733147, "grad_norm": 1.5080112218856812, "learning_rate": 7.101587924454267e-06, "loss": 0.052, "step": 14492 }, { "epoch": 6.738261273826128, "grad_norm": 0.8496905565261841, "learning_rate": 7.15650994308948e-06, "loss": 0.0177, "step": 14494 }, { "epoch": 6.739191073919107, "grad_norm": 1.1740084886550903, "learning_rate": 7.211608069767907e-06, "loss": 0.0276, "step": 14496 }, { "epoch": 6.740120874012088, "grad_norm": 0.5215067863464355, "learning_rate": 7.2668817606931615e-06, "loss": 0.0234, "step": 14498 }, { "epoch": 6.741050674105067, "grad_norm": 0.7973103523254395, "learning_rate": 7.322330470336348e-06, "loss": 0.0358, "step": 14500 }, { "epoch": 6.741980474198048, "grad_norm": 0.7381361722946167, "learning_rate": 7.377953651440969e-06, "loss": 0.033, "step": 14502 }, { "epoch": 6.742910274291027, "grad_norm": 0.4943796396255493, "learning_rate": 7.433750755028741e-06, "loss": 0.0138, "step": 14504 }, { "epoch": 6.743840074384007, "grad_norm": 0.4671924412250519, "learning_rate": 7.489721230404904e-06, "loss": 0.0183, "step": 14506 }, { "epoch": 6.7447698744769875, "grad_norm": 0.9887598156929016, "learning_rate": 7.545864525163207e-06, "loss": 0.0315, "step": 14508 }, { "epoch": 6.745699674569967, "grad_norm": 1.2703335285186768, "learning_rate": 7.602180085192196e-06, "loss": 0.0251, "step": 14510 }, { "epoch": 6.746629474662948, "grad_norm": 0.5735373497009277, "learning_rate": 7.658667354679895e-06, "loss": 0.0258, "step": 14512 }, { "epoch": 6.747559274755927, "grad_norm": 1.058873176574707, "learning_rate": 7.715325776119824e-06, "loss": 0.052, "step": 14514 }, { "epoch": 6.748489074848908, "grad_norm": 0.6445603966712952, "learning_rate": 7.772154790316301e-06, "loss": 0.0153, "step": 14516 }, { "epoch": 6.749418874941887, "grad_norm": 1.2859139442443848, "learning_rate": 7.829153836389834e-06, "loss": 0.0427, "step": 14518 }, { "epoch": 6.750348675034868, "grad_norm": 0.4678875505924225, "learning_rate": 7.886322351782854e-06, "loss": 0.0131, "step": 14520 }, { "epoch": 6.751278475127847, "grad_norm": 0.6027785539627075, "learning_rate": 7.943659772265125e-06, "loss": 0.0181, "step": 14522 }, { "epoch": 6.752208275220828, "grad_norm": 0.9021539092063904, "learning_rate": 8.00116553193952e-06, "loss": 0.0368, "step": 14524 }, { "epoch": 6.7531380753138075, "grad_norm": 0.6430683732032776, "learning_rate": 8.058839063247472e-06, "loss": 0.0191, "step": 14526 }, { "epoch": 6.754067875406788, "grad_norm": 0.5707961916923523, "learning_rate": 8.11667979697438e-06, "loss": 0.0216, "step": 14528 }, { "epoch": 6.754997675499768, "grad_norm": 0.7970477342605591, "learning_rate": 8.174687162255688e-06, "loss": 0.0217, "step": 14530 }, { "epoch": 6.755927475592747, "grad_norm": 1.2554593086242676, "learning_rate": 8.232860586582052e-06, "loss": 0.0273, "step": 14532 }, { "epoch": 6.756857275685728, "grad_norm": 0.6465139389038086, "learning_rate": 8.291199495805254e-06, "loss": 0.0201, "step": 14534 }, { "epoch": 6.757787075778707, "grad_norm": 0.6674253940582275, "learning_rate": 8.349703314143755e-06, "loss": 0.0312, "step": 14536 }, { "epoch": 6.758716875871688, "grad_norm": 0.7374978065490723, "learning_rate": 8.408371464188546e-06, "loss": 0.0289, "step": 14538 }, { "epoch": 6.759646675964667, "grad_norm": 0.45992913842201233, "learning_rate": 8.46720336690874e-06, "loss": 0.04, "step": 14540 }, { "epoch": 6.760576476057648, "grad_norm": 0.5936574339866638, "learning_rate": 8.526198441657076e-06, "loss": 0.0291, "step": 14542 }, { "epoch": 6.7615062761506275, "grad_norm": 1.0568445920944214, "learning_rate": 8.585356106176119e-06, "loss": 0.0296, "step": 14544 }, { "epoch": 6.762436076243608, "grad_norm": 0.8644788265228271, "learning_rate": 8.644675776603532e-06, "loss": 0.0282, "step": 14546 }, { "epoch": 6.763365876336588, "grad_norm": 1.0226056575775146, "learning_rate": 8.704156867478056e-06, "loss": 0.0225, "step": 14548 }, { "epoch": 6.764295676429567, "grad_norm": 0.6027012467384338, "learning_rate": 8.763798791745462e-06, "loss": 0.0126, "step": 14550 }, { "epoch": 6.765225476522548, "grad_norm": 0.9239317774772644, "learning_rate": 8.823600960763912e-06, "loss": 0.0234, "step": 14552 }, { "epoch": 6.766155276615527, "grad_norm": 0.7892206907272339, "learning_rate": 8.883562784310264e-06, "loss": 0.0183, "step": 14554 }, { "epoch": 6.767085076708508, "grad_norm": 0.6459301710128784, "learning_rate": 8.9436836705854e-06, "loss": 0.0256, "step": 14556 }, { "epoch": 6.768014876801487, "grad_norm": 0.7850602269172668, "learning_rate": 9.003963026220509e-06, "loss": 0.0338, "step": 14558 }, { "epoch": 6.768944676894468, "grad_norm": 0.5347840785980225, "learning_rate": 9.064400256282828e-06, "loss": 0.0139, "step": 14560 }, { "epoch": 6.7698744769874475, "grad_norm": 0.5754089951515198, "learning_rate": 9.124994764281015e-06, "loss": 0.0203, "step": 14562 }, { "epoch": 6.770804277080428, "grad_norm": 1.1119239330291748, "learning_rate": 9.185745952171951e-06, "loss": 0.0457, "step": 14564 }, { "epoch": 6.771734077173408, "grad_norm": 0.7217116951942444, "learning_rate": 9.246653220365795e-06, "loss": 0.0295, "step": 14566 }, { "epoch": 6.772663877266388, "grad_norm": 0.638994038105011, "learning_rate": 9.307715967732472e-06, "loss": 0.0303, "step": 14568 }, { "epoch": 6.773593677359368, "grad_norm": 0.9207333326339722, "learning_rate": 9.368933591607387e-06, "loss": 0.0541, "step": 14570 }, { "epoch": 6.774523477452348, "grad_norm": 0.6059748530387878, "learning_rate": 9.430305487797162e-06, "loss": 0.0127, "step": 14572 }, { "epoch": 6.775453277545328, "grad_norm": 0.7257763147354126, "learning_rate": 9.491831050586177e-06, "loss": 0.0242, "step": 14574 }, { "epoch": 6.776383077638307, "grad_norm": 1.3272119760513306, "learning_rate": 9.55350967274168e-06, "loss": 0.0248, "step": 14576 }, { "epoch": 6.777312877731288, "grad_norm": 0.535622775554657, "learning_rate": 9.61534074552078e-06, "loss": 0.0244, "step": 14578 }, { "epoch": 6.7782426778242675, "grad_norm": 0.623672366142273, "learning_rate": 9.67732365867562e-06, "loss": 0.0198, "step": 14580 }, { "epoch": 6.779172477917248, "grad_norm": 1.0393770933151245, "learning_rate": 9.73945780045993e-06, "loss": 0.0285, "step": 14582 }, { "epoch": 6.780102278010228, "grad_norm": 1.106995701789856, "learning_rate": 9.80174255763489e-06, "loss": 0.0235, "step": 14584 }, { "epoch": 6.781032078103208, "grad_norm": 1.1796619892120361, "learning_rate": 9.864177315474951e-06, "loss": 0.0282, "step": 14586 }, { "epoch": 6.781961878196188, "grad_norm": 0.31030213832855225, "learning_rate": 9.926761457774472e-06, "loss": 0.0277, "step": 14588 }, { "epoch": 6.782891678289168, "grad_norm": 1.5731194019317627, "learning_rate": 9.989494366852948e-06, "loss": 0.0486, "step": 14590 }, { "epoch": 6.783821478382148, "grad_norm": 0.9104661345481873, "learning_rate": 1.005237542356204e-05, "loss": 0.028, "step": 14592 }, { "epoch": 6.784751278475127, "grad_norm": 0.3055681884288788, "learning_rate": 1.011540400729117e-05, "loss": 0.0155, "step": 14594 }, { "epoch": 6.785681078568108, "grad_norm": 0.9206215739250183, "learning_rate": 1.0178579495973522e-05, "loss": 0.0241, "step": 14596 }, { "epoch": 6.786610878661088, "grad_norm": 0.5141895413398743, "learning_rate": 1.0241901266092675e-05, "loss": 0.0136, "step": 14598 }, { "epoch": 6.787540678754068, "grad_norm": 0.5228453874588013, "learning_rate": 1.0305368692688171e-05, "loss": 0.0244, "step": 14600 }, { "epoch": 6.788470478847048, "grad_norm": 0.9090437293052673, "learning_rate": 1.0368981149362276e-05, "loss": 0.0253, "step": 14602 }, { "epoch": 6.789400278940028, "grad_norm": 1.3930295705795288, "learning_rate": 1.043273800828566e-05, "loss": 0.0299, "step": 14604 }, { "epoch": 6.790330079033008, "grad_norm": 0.562197208404541, "learning_rate": 1.0496638640203786e-05, "loss": 0.0183, "step": 14606 }, { "epoch": 6.791259879125988, "grad_norm": 0.952340304851532, "learning_rate": 1.0560682414443363e-05, "loss": 0.0236, "step": 14608 }, { "epoch": 6.792189679218968, "grad_norm": 0.3947238326072693, "learning_rate": 1.0624868698918048e-05, "loss": 0.0217, "step": 14610 }, { "epoch": 6.793119479311948, "grad_norm": 0.9890720248222351, "learning_rate": 1.0689196860135204e-05, "loss": 0.034, "step": 14612 }, { "epoch": 6.794049279404928, "grad_norm": 1.0947364568710327, "learning_rate": 1.0753666263201985e-05, "loss": 0.041, "step": 14614 }, { "epoch": 6.794979079497908, "grad_norm": 0.7323158979415894, "learning_rate": 1.0818276271831125e-05, "loss": 0.027, "step": 14616 }, { "epoch": 6.795908879590888, "grad_norm": 0.5122412443161011, "learning_rate": 1.0883026248348144e-05, "loss": 0.0158, "step": 14618 }, { "epoch": 6.796838679683868, "grad_norm": 1.2009401321411133, "learning_rate": 1.0947915553696762e-05, "loss": 0.0426, "step": 14620 }, { "epoch": 6.797768479776848, "grad_norm": 0.8540225625038147, "learning_rate": 1.1012943547445883e-05, "loss": 0.027, "step": 14622 }, { "epoch": 6.798698279869828, "grad_norm": 0.8717135190963745, "learning_rate": 1.1078109587795318e-05, "loss": 0.0316, "step": 14624 }, { "epoch": 6.799628079962808, "grad_norm": 0.7171940207481384, "learning_rate": 1.1143413031582626e-05, "loss": 0.0361, "step": 14626 }, { "epoch": 6.800557880055788, "grad_norm": 1.2984797954559326, "learning_rate": 1.1208853234289327e-05, "loss": 0.0413, "step": 14628 }, { "epoch": 6.801487680148768, "grad_norm": 0.9009943604469299, "learning_rate": 1.127442955004675e-05, "loss": 0.0469, "step": 14630 }, { "epoch": 6.802417480241748, "grad_norm": 0.8503722548484802, "learning_rate": 1.1340141331643349e-05, "loss": 0.0349, "step": 14632 }, { "epoch": 6.803347280334728, "grad_norm": 0.6077072024345398, "learning_rate": 1.1405987930530221e-05, "loss": 0.0181, "step": 14634 }, { "epoch": 6.804277080427708, "grad_norm": 0.7808938026428223, "learning_rate": 1.1471968696828097e-05, "loss": 0.0282, "step": 14636 }, { "epoch": 6.805206880520688, "grad_norm": 0.7730957865715027, "learning_rate": 1.153808297933352e-05, "loss": 0.028, "step": 14638 }, { "epoch": 6.806136680613668, "grad_norm": 0.602616012096405, "learning_rate": 1.1604330125525069e-05, "loss": 0.0203, "step": 14640 }, { "epoch": 6.8070664807066485, "grad_norm": 0.6700547337532043, "learning_rate": 1.167070948157038e-05, "loss": 0.0216, "step": 14642 }, { "epoch": 6.807996280799628, "grad_norm": 0.7150000333786011, "learning_rate": 1.1737220392331704e-05, "loss": 0.0227, "step": 14644 }, { "epoch": 6.808926080892608, "grad_norm": 1.1584351062774658, "learning_rate": 1.1803862201373352e-05, "loss": 0.0318, "step": 14646 }, { "epoch": 6.809855880985588, "grad_norm": 0.4270252585411072, "learning_rate": 1.1870634250967655e-05, "loss": 0.0189, "step": 14648 }, { "epoch": 6.810785681078568, "grad_norm": 0.7542041540145874, "learning_rate": 1.1937535882101283e-05, "loss": 0.0187, "step": 14650 }, { "epoch": 6.811715481171548, "grad_norm": 1.2832458019256592, "learning_rate": 1.2004566434482305e-05, "loss": 0.0436, "step": 14652 }, { "epoch": 6.812645281264528, "grad_norm": 0.8192248940467834, "learning_rate": 1.2071725246546066e-05, "loss": 0.029, "step": 14654 }, { "epoch": 6.8135750813575084, "grad_norm": 1.6235238313674927, "learning_rate": 1.2139011655462369e-05, "loss": 0.0329, "step": 14656 }, { "epoch": 6.814504881450488, "grad_norm": 0.4575323164463043, "learning_rate": 1.2206424997141439e-05, "loss": 0.0285, "step": 14658 }, { "epoch": 6.8154346815434685, "grad_norm": 1.3112595081329346, "learning_rate": 1.2273964606240738e-05, "loss": 0.0362, "step": 14660 }, { "epoch": 6.816364481636448, "grad_norm": 0.9148069024085999, "learning_rate": 1.2341629816171742e-05, "loss": 0.032, "step": 14662 }, { "epoch": 6.817294281729428, "grad_norm": 0.6766182780265808, "learning_rate": 1.2409419959105991e-05, "loss": 0.0194, "step": 14664 }, { "epoch": 6.818224081822408, "grad_norm": 0.4843279719352722, "learning_rate": 1.2477334365982302e-05, "loss": 0.0195, "step": 14666 }, { "epoch": 6.819153881915388, "grad_norm": 1.8049589395523071, "learning_rate": 1.2545372366512735e-05, "loss": 0.0532, "step": 14668 }, { "epoch": 6.820083682008368, "grad_norm": 0.6040574312210083, "learning_rate": 1.2613533289189854e-05, "loss": 0.0266, "step": 14670 }, { "epoch": 6.821013482101348, "grad_norm": 0.7564893960952759, "learning_rate": 1.268181646129279e-05, "loss": 0.0196, "step": 14672 }, { "epoch": 6.821943282194328, "grad_norm": 0.7459097504615784, "learning_rate": 1.2750221208894118e-05, "loss": 0.0276, "step": 14674 }, { "epoch": 6.822873082287308, "grad_norm": 0.5827462673187256, "learning_rate": 1.281874685686676e-05, "loss": 0.0305, "step": 14676 }, { "epoch": 6.8238028823802885, "grad_norm": 0.6189590096473694, "learning_rate": 1.2887392728890074e-05, "loss": 0.0271, "step": 14678 }, { "epoch": 6.824732682473268, "grad_norm": 1.1168253421783447, "learning_rate": 1.2956158147457096e-05, "loss": 0.0557, "step": 14680 }, { "epoch": 6.825662482566249, "grad_norm": 0.9821563959121704, "learning_rate": 1.3025042433880992e-05, "loss": 0.027, "step": 14682 }, { "epoch": 6.826592282659228, "grad_norm": 1.1097643375396729, "learning_rate": 1.3094044908301574e-05, "loss": 0.0273, "step": 14684 }, { "epoch": 6.827522082752209, "grad_norm": 2.1412274837493896, "learning_rate": 1.3163164889692286e-05, "loss": 0.0374, "step": 14686 }, { "epoch": 6.828451882845188, "grad_norm": 1.2352863550186157, "learning_rate": 1.3232401695866735e-05, "loss": 0.0403, "step": 14688 }, { "epoch": 6.829381682938168, "grad_norm": 1.0831722021102905, "learning_rate": 1.3301754643485673e-05, "loss": 0.028, "step": 14690 }, { "epoch": 6.830311483031148, "grad_norm": 0.9032544493675232, "learning_rate": 1.3371223048063583e-05, "loss": 0.0283, "step": 14692 }, { "epoch": 6.831241283124128, "grad_norm": 1.1758431196212769, "learning_rate": 1.3440806223975201e-05, "loss": 0.063, "step": 14694 }, { "epoch": 6.8321710832171085, "grad_norm": 0.9756811261177063, "learning_rate": 1.3510503484462834e-05, "loss": 0.0374, "step": 14696 }, { "epoch": 6.833100883310088, "grad_norm": 1.2143921852111816, "learning_rate": 1.3580314141642584e-05, "loss": 0.0319, "step": 14698 }, { "epoch": 6.834030683403069, "grad_norm": 0.4195149540901184, "learning_rate": 1.3650237506511355e-05, "loss": 0.0184, "step": 14700 }, { "epoch": 6.834960483496048, "grad_norm": 0.7253996133804321, "learning_rate": 1.372027288895393e-05, "loss": 0.0222, "step": 14702 }, { "epoch": 6.835890283589029, "grad_norm": 0.5142528414726257, "learning_rate": 1.3790419597749206e-05, "loss": 0.018, "step": 14704 }, { "epoch": 6.836820083682008, "grad_norm": 0.9477583169937134, "learning_rate": 1.3860676940577642e-05, "loss": 0.0243, "step": 14706 }, { "epoch": 6.837749883774988, "grad_norm": 0.9005760550498962, "learning_rate": 1.393104422402747e-05, "loss": 0.031, "step": 14708 }, { "epoch": 6.838679683867968, "grad_norm": 0.8938680291175842, "learning_rate": 1.4001520753602243e-05, "loss": 0.0395, "step": 14710 }, { "epoch": 6.839609483960948, "grad_norm": 0.7058882117271423, "learning_rate": 1.4072105833726753e-05, "loss": 0.0329, "step": 14712 }, { "epoch": 6.8405392840539285, "grad_norm": 0.5880693793296814, "learning_rate": 1.4142798767754918e-05, "loss": 0.0236, "step": 14714 }, { "epoch": 6.841469084146908, "grad_norm": 0.8043079972267151, "learning_rate": 1.4213598857976091e-05, "loss": 0.0416, "step": 14716 }, { "epoch": 6.842398884239889, "grad_norm": 1.309188723564148, "learning_rate": 1.4284505405621814e-05, "loss": 0.0406, "step": 14718 }, { "epoch": 6.843328684332868, "grad_norm": 1.2859598398208618, "learning_rate": 1.4355517710873241e-05, "loss": 0.036, "step": 14720 }, { "epoch": 6.844258484425849, "grad_norm": 0.7394227385520935, "learning_rate": 1.4426635072867431e-05, "loss": 0.0165, "step": 14722 }, { "epoch": 6.845188284518828, "grad_norm": 0.7123878598213196, "learning_rate": 1.4497856789704894e-05, "loss": 0.0334, "step": 14724 }, { "epoch": 6.846118084611809, "grad_norm": 1.1948153972625732, "learning_rate": 1.4569182158455951e-05, "loss": 0.0259, "step": 14726 }, { "epoch": 6.847047884704788, "grad_norm": 0.7879899740219116, "learning_rate": 1.4640610475167935e-05, "loss": 0.0253, "step": 14728 }, { "epoch": 6.847977684797769, "grad_norm": 0.7466092109680176, "learning_rate": 1.4712141034872356e-05, "loss": 0.0398, "step": 14730 }, { "epoch": 6.8489074848907485, "grad_norm": 0.8731971383094788, "learning_rate": 1.4783773131591307e-05, "loss": 0.034, "step": 14732 }, { "epoch": 6.849837284983728, "grad_norm": 1.5666770935058594, "learning_rate": 1.4855506058344988e-05, "loss": 0.0573, "step": 14734 }, { "epoch": 6.850767085076709, "grad_norm": 1.5606698989868164, "learning_rate": 1.4927339107158459e-05, "loss": 0.0345, "step": 14736 }, { "epoch": 6.851696885169688, "grad_norm": 0.8689563274383545, "learning_rate": 1.4999271569068445e-05, "loss": 0.0355, "step": 14738 }, { "epoch": 6.852626685262669, "grad_norm": 0.46263545751571655, "learning_rate": 1.5071302734130582e-05, "loss": 0.0183, "step": 14740 }, { "epoch": 6.853556485355648, "grad_norm": 1.110274076461792, "learning_rate": 1.514343189142628e-05, "loss": 0.0307, "step": 14742 }, { "epoch": 6.854486285448629, "grad_norm": 0.7424903512001038, "learning_rate": 1.5215658329069948e-05, "loss": 0.0242, "step": 14744 }, { "epoch": 6.855416085541608, "grad_norm": 0.34995579719543457, "learning_rate": 1.5287981334215892e-05, "loss": 0.014, "step": 14746 }, { "epoch": 6.856345885634589, "grad_norm": 1.074371099472046, "learning_rate": 1.5360400193065087e-05, "loss": 0.0232, "step": 14748 }, { "epoch": 6.8572756857275685, "grad_norm": 1.6725504398345947, "learning_rate": 1.5432914190872794e-05, "loss": 0.0348, "step": 14750 }, { "epoch": 6.858205485820548, "grad_norm": 0.6212953925132751, "learning_rate": 1.5505522611955046e-05, "loss": 0.0175, "step": 14752 }, { "epoch": 6.859135285913529, "grad_norm": 0.5910730361938477, "learning_rate": 1.5578224739696043e-05, "loss": 0.0251, "step": 14754 }, { "epoch": 6.860065086006508, "grad_norm": 0.7167070508003235, "learning_rate": 1.5651019856555056e-05, "loss": 0.0249, "step": 14756 }, { "epoch": 6.860994886099489, "grad_norm": 1.0072015523910522, "learning_rate": 1.5723907244073743e-05, "loss": 0.0301, "step": 14758 }, { "epoch": 6.861924686192468, "grad_norm": 0.5120880603790283, "learning_rate": 1.5796886182883107e-05, "loss": 0.036, "step": 14760 }, { "epoch": 6.862854486285449, "grad_norm": 0.8593647480010986, "learning_rate": 1.586995595271031e-05, "loss": 0.0297, "step": 14762 }, { "epoch": 6.863784286378428, "grad_norm": 1.2750898599624634, "learning_rate": 1.59431158323864e-05, "loss": 0.0416, "step": 14764 }, { "epoch": 6.864714086471409, "grad_norm": 0.7226830720901489, "learning_rate": 1.6016365099852813e-05, "loss": 0.0343, "step": 14766 }, { "epoch": 6.8656438865643885, "grad_norm": 0.9648162722587585, "learning_rate": 1.6089703032168767e-05, "loss": 0.0327, "step": 14768 }, { "epoch": 6.866573686657369, "grad_norm": 0.761206865310669, "learning_rate": 1.6163128905518645e-05, "loss": 0.0344, "step": 14770 }, { "epoch": 6.867503486750349, "grad_norm": 0.40153810381889343, "learning_rate": 1.6236641995218548e-05, "loss": 0.0205, "step": 14772 }, { "epoch": 6.868433286843329, "grad_norm": 1.184215784072876, "learning_rate": 1.6310241575724138e-05, "loss": 0.0549, "step": 14774 }, { "epoch": 6.869363086936309, "grad_norm": 1.0161796808242798, "learning_rate": 1.6383926920637087e-05, "loss": 0.0236, "step": 14776 }, { "epoch": 6.870292887029288, "grad_norm": 1.1727455854415894, "learning_rate": 1.6457697302712884e-05, "loss": 0.032, "step": 14778 }, { "epoch": 6.871222687122269, "grad_norm": 0.6253501176834106, "learning_rate": 1.6531551993867805e-05, "loss": 0.0293, "step": 14780 }, { "epoch": 6.872152487215248, "grad_norm": 0.8807229399681091, "learning_rate": 1.660549026518553e-05, "loss": 0.0285, "step": 14782 }, { "epoch": 6.873082287308229, "grad_norm": 1.372992753982544, "learning_rate": 1.6679511386925415e-05, "loss": 0.0764, "step": 14784 }, { "epoch": 6.8740120874012085, "grad_norm": 0.5967235565185547, "learning_rate": 1.6753614628528716e-05, "loss": 0.0379, "step": 14786 }, { "epoch": 6.874941887494189, "grad_norm": 0.9274300932884216, "learning_rate": 1.6827799258626432e-05, "loss": 0.0289, "step": 14788 }, { "epoch": 6.875871687587169, "grad_norm": 0.45490318536758423, "learning_rate": 1.6902064545046298e-05, "loss": 0.0264, "step": 14790 }, { "epoch": 6.876801487680149, "grad_norm": 0.6999660134315491, "learning_rate": 1.697640975481975e-05, "loss": 0.0312, "step": 14792 }, { "epoch": 6.877731287773129, "grad_norm": 1.1411073207855225, "learning_rate": 1.705083415418983e-05, "loss": 0.0315, "step": 14794 }, { "epoch": 6.878661087866108, "grad_norm": 0.8167783617973328, "learning_rate": 1.712533700861744e-05, "loss": 0.0311, "step": 14796 }, { "epoch": 6.879590887959089, "grad_norm": 1.1138988733291626, "learning_rate": 1.7199917582789724e-05, "loss": 0.0365, "step": 14798 }, { "epoch": 6.880520688052069, "grad_norm": 1.3363211154937744, "learning_rate": 1.7274575140626365e-05, "loss": 0.042, "step": 14800 }, { "epoch": 6.881450488145049, "grad_norm": 1.0922613143920898, "learning_rate": 1.7349308945287487e-05, "loss": 0.0352, "step": 14802 }, { "epoch": 6.8823802882380285, "grad_norm": 1.0946394205093384, "learning_rate": 1.7424118259180695e-05, "loss": 0.0435, "step": 14804 }, { "epoch": 6.883310088331009, "grad_norm": 0.8676357269287109, "learning_rate": 1.7499002343968172e-05, "loss": 0.0276, "step": 14806 }, { "epoch": 6.884239888423989, "grad_norm": 0.8217958807945251, "learning_rate": 1.757396046057424e-05, "loss": 0.0381, "step": 14808 }, { "epoch": 6.885169688516969, "grad_norm": 1.0138541460037231, "learning_rate": 1.7648991869192466e-05, "loss": 0.0377, "step": 14810 }, { "epoch": 6.886099488609949, "grad_norm": 1.6098483800888062, "learning_rate": 1.7724095829293223e-05, "loss": 0.0517, "step": 14812 }, { "epoch": 6.887029288702929, "grad_norm": 0.6146007776260376, "learning_rate": 1.7799271599630827e-05, "loss": 0.0337, "step": 14814 }, { "epoch": 6.887959088795909, "grad_norm": 0.8816133141517639, "learning_rate": 1.78745184382506e-05, "loss": 0.0247, "step": 14816 }, { "epoch": 6.888888888888889, "grad_norm": 0.965253472328186, "learning_rate": 1.7949835602496807e-05, "loss": 0.0358, "step": 14818 }, { "epoch": 6.889818688981869, "grad_norm": 0.5898703932762146, "learning_rate": 1.8025222349019345e-05, "loss": 0.0256, "step": 14820 }, { "epoch": 6.8907484890748485, "grad_norm": 0.5830801129341125, "learning_rate": 1.8100677933781384e-05, "loss": 0.03, "step": 14822 }, { "epoch": 6.891678289167829, "grad_norm": 1.135762095451355, "learning_rate": 1.817620161206694e-05, "loss": 0.0425, "step": 14824 }, { "epoch": 6.892608089260809, "grad_norm": 1.2242182493209839, "learning_rate": 1.8251792638487613e-05, "loss": 0.0372, "step": 14826 }, { "epoch": 6.893537889353789, "grad_norm": 0.3522123396396637, "learning_rate": 1.832745026699067e-05, "loss": 0.0221, "step": 14828 }, { "epoch": 6.894467689446769, "grad_norm": 1.007181167602539, "learning_rate": 1.840317375086569e-05, "loss": 0.0231, "step": 14830 }, { "epoch": 6.895397489539749, "grad_norm": 1.1261019706726074, "learning_rate": 1.8478962342752546e-05, "loss": 0.0253, "step": 14832 }, { "epoch": 6.896327289632729, "grad_norm": 1.5305203199386597, "learning_rate": 1.8554815294648598e-05, "loss": 0.0343, "step": 14834 }, { "epoch": 6.897257089725709, "grad_norm": 0.4997195303440094, "learning_rate": 1.863073185791549e-05, "loss": 0.0278, "step": 14836 }, { "epoch": 6.898186889818689, "grad_norm": 1.2038706541061401, "learning_rate": 1.8706711283287657e-05, "loss": 0.0258, "step": 14838 }, { "epoch": 6.899116689911669, "grad_norm": 1.2087149620056152, "learning_rate": 1.8782752820878668e-05, "loss": 0.035, "step": 14840 }, { "epoch": 6.900046490004649, "grad_norm": 0.7896910905838013, "learning_rate": 1.8858855720189418e-05, "loss": 0.0279, "step": 14842 }, { "epoch": 6.9009762900976295, "grad_norm": 0.7883797287940979, "learning_rate": 1.8935019230114843e-05, "loss": 0.0246, "step": 14844 }, { "epoch": 6.901906090190609, "grad_norm": 1.7958440780639648, "learning_rate": 1.9011242598951932e-05, "loss": 0.0465, "step": 14846 }, { "epoch": 6.902835890283589, "grad_norm": 1.4617947340011597, "learning_rate": 1.9087525074406987e-05, "loss": 0.0356, "step": 14848 }, { "epoch": 6.903765690376569, "grad_norm": 1.3202333450317383, "learning_rate": 1.916386590360242e-05, "loss": 0.0244, "step": 14850 }, { "epoch": 6.904695490469549, "grad_norm": 1.2924894094467163, "learning_rate": 1.9240264333085326e-05, "loss": 0.0343, "step": 14852 }, { "epoch": 6.905625290562529, "grad_norm": 0.5249738097190857, "learning_rate": 1.9316719608833882e-05, "loss": 0.0176, "step": 14854 }, { "epoch": 6.906555090655509, "grad_norm": 1.955054521560669, "learning_rate": 1.9393230976265467e-05, "loss": 0.0351, "step": 14856 }, { "epoch": 6.907484890748489, "grad_norm": 1.0142247676849365, "learning_rate": 1.9469797680243855e-05, "loss": 0.0412, "step": 14858 }, { "epoch": 6.908414690841469, "grad_norm": 0.5102016925811768, "learning_rate": 1.9546418965086422e-05, "loss": 0.0378, "step": 14860 }, { "epoch": 6.9093444909344495, "grad_norm": 1.2323079109191895, "learning_rate": 1.9623094074572275e-05, "loss": 0.0355, "step": 14862 }, { "epoch": 6.910274291027429, "grad_norm": 1.4910950660705566, "learning_rate": 1.9699822251948694e-05, "loss": 0.0423, "step": 14864 }, { "epoch": 6.911204091120409, "grad_norm": 1.3452991247177124, "learning_rate": 1.977660273993972e-05, "loss": 0.0358, "step": 14866 }, { "epoch": 6.912133891213389, "grad_norm": 0.9025667309761047, "learning_rate": 1.9853434780753017e-05, "loss": 0.0472, "step": 14868 }, { "epoch": 6.913063691306369, "grad_norm": 0.5692058205604553, "learning_rate": 1.9930317616087193e-05, "loss": 0.0391, "step": 14870 }, { "epoch": 6.913993491399349, "grad_norm": 1.7056126594543457, "learning_rate": 2.0007250487139867e-05, "loss": 0.0609, "step": 14872 }, { "epoch": 6.914923291492329, "grad_norm": 1.127834677696228, "learning_rate": 2.0084232634614493e-05, "loss": 0.0186, "step": 14874 }, { "epoch": 6.915853091585309, "grad_norm": 0.8795637488365173, "learning_rate": 2.0161263298728525e-05, "loss": 0.0392, "step": 14876 }, { "epoch": 6.916782891678289, "grad_norm": 0.6978120803833008, "learning_rate": 2.0238341719220322e-05, "loss": 0.022, "step": 14878 }, { "epoch": 6.9177126917712695, "grad_norm": 1.1069129705429077, "learning_rate": 2.0315467135356907e-05, "loss": 0.0402, "step": 14880 }, { "epoch": 6.918642491864249, "grad_norm": 0.6748161315917969, "learning_rate": 2.0392638785941725e-05, "loss": 0.0225, "step": 14882 }, { "epoch": 6.91957229195723, "grad_norm": 0.764909565448761, "learning_rate": 2.0469855909321574e-05, "loss": 0.0254, "step": 14884 }, { "epoch": 6.920502092050209, "grad_norm": 0.9278630614280701, "learning_rate": 2.0547117743394792e-05, "loss": 0.0378, "step": 14886 }, { "epoch": 6.92143189214319, "grad_norm": 1.443016767501831, "learning_rate": 2.0624423525618098e-05, "loss": 0.0492, "step": 14888 }, { "epoch": 6.922361692236169, "grad_norm": 0.7282733917236328, "learning_rate": 2.0701772493014797e-05, "loss": 0.0238, "step": 14890 }, { "epoch": 6.923291492329149, "grad_norm": 0.9115960001945496, "learning_rate": 2.077916388218173e-05, "loss": 0.0347, "step": 14892 }, { "epoch": 6.924221292422129, "grad_norm": 0.9575152397155762, "learning_rate": 2.0856596929297033e-05, "loss": 0.0348, "step": 14894 }, { "epoch": 6.925151092515109, "grad_norm": 2.290935516357422, "learning_rate": 2.0934070870127976e-05, "loss": 0.0586, "step": 14896 }, { "epoch": 6.9260808926080895, "grad_norm": 0.8987970352172852, "learning_rate": 2.101158494003786e-05, "loss": 0.0246, "step": 14898 }, { "epoch": 6.927010692701069, "grad_norm": 0.7406635284423828, "learning_rate": 2.10891383739942e-05, "loss": 0.0558, "step": 14900 }, { "epoch": 6.9279404927940496, "grad_norm": 0.9285454750061035, "learning_rate": 2.1166730406576083e-05, "loss": 0.0314, "step": 14902 }, { "epoch": 6.928870292887029, "grad_norm": 0.883547306060791, "learning_rate": 2.124436027198112e-05, "loss": 0.0367, "step": 14904 }, { "epoch": 6.92980009298001, "grad_norm": 1.6306084394454956, "learning_rate": 2.132202720403415e-05, "loss": 0.0294, "step": 14906 }, { "epoch": 6.930729893072989, "grad_norm": 0.9946677088737488, "learning_rate": 2.1399730436193734e-05, "loss": 0.0383, "step": 14908 }, { "epoch": 6.931659693165969, "grad_norm": 1.0056196451187134, "learning_rate": 2.1477469201560422e-05, "loss": 0.0347, "step": 14910 }, { "epoch": 6.932589493258949, "grad_norm": 0.5451403260231018, "learning_rate": 2.1555242732884075e-05, "loss": 0.0264, "step": 14912 }, { "epoch": 6.933519293351929, "grad_norm": 1.7575527429580688, "learning_rate": 2.1633050262571162e-05, "loss": 0.0518, "step": 14914 }, { "epoch": 6.9344490934449095, "grad_norm": 0.7502812743186951, "learning_rate": 2.1710891022693037e-05, "loss": 0.0278, "step": 14916 }, { "epoch": 6.935378893537889, "grad_norm": 1.133156418800354, "learning_rate": 2.178876424499248e-05, "loss": 0.0387, "step": 14918 }, { "epoch": 6.9363086936308695, "grad_norm": 1.2370566129684448, "learning_rate": 2.1866669160892394e-05, "loss": 0.0315, "step": 14920 }, { "epoch": 6.937238493723849, "grad_norm": 0.9063189625740051, "learning_rate": 2.1944605001502806e-05, "loss": 0.017, "step": 14922 }, { "epoch": 6.93816829381683, "grad_norm": 1.5440316200256348, "learning_rate": 2.202257099762825e-05, "loss": 0.0376, "step": 14924 }, { "epoch": 6.939098093909809, "grad_norm": 0.8068605065345764, "learning_rate": 2.2100566379775997e-05, "loss": 0.0215, "step": 14926 }, { "epoch": 6.94002789400279, "grad_norm": 1.046044945716858, "learning_rate": 2.2178590378162942e-05, "loss": 0.06, "step": 14928 }, { "epoch": 6.940957694095769, "grad_norm": 1.4887906312942505, "learning_rate": 2.225664222272398e-05, "loss": 0.0275, "step": 14930 }, { "epoch": 6.94188749418875, "grad_norm": 1.3602721691131592, "learning_rate": 2.233472114311857e-05, "loss": 0.0358, "step": 14932 }, { "epoch": 6.9428172942817294, "grad_norm": 1.9047119617462158, "learning_rate": 2.2412826368739454e-05, "loss": 0.0637, "step": 14934 }, { "epoch": 6.943747094374709, "grad_norm": 0.8657906651496887, "learning_rate": 2.249095712871968e-05, "loss": 0.0286, "step": 14936 }, { "epoch": 6.9446768944676895, "grad_norm": 1.2362322807312012, "learning_rate": 2.256911265194002e-05, "loss": 0.0292, "step": 14938 }, { "epoch": 6.945606694560669, "grad_norm": 1.5061925649642944, "learning_rate": 2.2647292167037188e-05, "loss": 0.0495, "step": 14940 }, { "epoch": 6.94653649465365, "grad_norm": 1.8195587396621704, "learning_rate": 2.272549490241075e-05, "loss": 0.0386, "step": 14942 }, { "epoch": 6.947466294746629, "grad_norm": 1.1908153295516968, "learning_rate": 2.2803720086231462e-05, "loss": 0.0389, "step": 14944 }, { "epoch": 6.94839609483961, "grad_norm": 1.1325668096542358, "learning_rate": 2.2881966946448245e-05, "loss": 0.0349, "step": 14946 }, { "epoch": 6.949325894932589, "grad_norm": 2.4940314292907715, "learning_rate": 2.2960234710796094e-05, "loss": 0.057, "step": 14948 }, { "epoch": 6.95025569502557, "grad_norm": 2.4192306995391846, "learning_rate": 2.3038522606803948e-05, "loss": 0.0516, "step": 14950 }, { "epoch": 6.951185495118549, "grad_norm": 1.245465874671936, "learning_rate": 2.311682986180171e-05, "loss": 0.0387, "step": 14952 }, { "epoch": 6.952115295211529, "grad_norm": 0.7405454516410828, "learning_rate": 2.3195155702928463e-05, "loss": 0.0373, "step": 14954 }, { "epoch": 6.9530450953045095, "grad_norm": 0.624129593372345, "learning_rate": 2.3273499357139872e-05, "loss": 0.0365, "step": 14956 }, { "epoch": 6.95397489539749, "grad_norm": 1.056316614151001, "learning_rate": 2.335186005121561e-05, "loss": 0.0368, "step": 14958 }, { "epoch": 6.95490469549047, "grad_norm": 1.0039260387420654, "learning_rate": 2.343023701176726e-05, "loss": 0.034, "step": 14960 }, { "epoch": 6.955834495583449, "grad_norm": 0.8972709774971008, "learning_rate": 2.3508629465245775e-05, "loss": 0.0563, "step": 14962 }, { "epoch": 6.95676429567643, "grad_norm": 1.1770890951156616, "learning_rate": 2.3587036637949378e-05, "loss": 0.0355, "step": 14964 }, { "epoch": 6.957694095769409, "grad_norm": 0.8338500261306763, "learning_rate": 2.3665457756031022e-05, "loss": 0.045, "step": 14966 }, { "epoch": 6.95862389586239, "grad_norm": 1.6286488771438599, "learning_rate": 2.3743892045505747e-05, "loss": 0.0441, "step": 14968 }, { "epoch": 6.959553695955369, "grad_norm": 1.633887529373169, "learning_rate": 2.3822338732258958e-05, "loss": 0.0328, "step": 14970 }, { "epoch": 6.96048349604835, "grad_norm": 0.9893823862075806, "learning_rate": 2.3900797042053436e-05, "loss": 0.0193, "step": 14972 }, { "epoch": 6.9614132961413295, "grad_norm": 1.8623391389846802, "learning_rate": 2.3979266200537347e-05, "loss": 0.0548, "step": 14974 }, { "epoch": 6.96234309623431, "grad_norm": 1.302681565284729, "learning_rate": 2.4057745433251682e-05, "loss": 0.0348, "step": 14976 }, { "epoch": 6.96327289632729, "grad_norm": 1.1657074689865112, "learning_rate": 2.4136233965638188e-05, "loss": 0.0472, "step": 14978 }, { "epoch": 6.964202696420269, "grad_norm": 2.081526517868042, "learning_rate": 2.4214731023046827e-05, "loss": 0.0427, "step": 14980 }, { "epoch": 6.96513249651325, "grad_norm": 1.529523253440857, "learning_rate": 2.4293235830743154e-05, "loss": 0.0462, "step": 14982 }, { "epoch": 6.966062296606229, "grad_norm": 0.9899202585220337, "learning_rate": 2.4371747613916583e-05, "loss": 0.026, "step": 14984 }, { "epoch": 6.96699209669921, "grad_norm": 0.7704739570617676, "learning_rate": 2.445026559768743e-05, "loss": 0.0385, "step": 14986 }, { "epoch": 6.967921896792189, "grad_norm": 1.0761144161224365, "learning_rate": 2.45287890071148e-05, "loss": 0.0358, "step": 14988 }, { "epoch": 6.96885169688517, "grad_norm": 0.4978483021259308, "learning_rate": 2.4607317067204533e-05, "loss": 0.0148, "step": 14990 }, { "epoch": 6.9697814969781495, "grad_norm": 0.6271532773971558, "learning_rate": 2.4685849002916186e-05, "loss": 0.0377, "step": 14992 }, { "epoch": 6.97071129707113, "grad_norm": 0.8574469089508057, "learning_rate": 2.4764384039171444e-05, "loss": 0.0423, "step": 14994 }, { "epoch": 6.97164109716411, "grad_norm": 1.2553319931030273, "learning_rate": 2.484292140086102e-05, "loss": 0.0404, "step": 14996 }, { "epoch": 6.972570897257089, "grad_norm": 0.6641249060630798, "learning_rate": 2.4921460312852953e-05, "loss": 0.03, "step": 14998 }, { "epoch": 6.97350069735007, "grad_norm": 1.0427449941635132, "learning_rate": 2.500000000000007e-05, "loss": 0.0383, "step": 15000 }, { "epoch": 6.97350069735007, "eval_cer": 0.14832289169051485, "eval_loss": 0.22978059947490692, "eval_runtime": 406.9141, "eval_samples_per_second": 31.196, "eval_steps_per_second": 0.976, "step": 15000 }, { "epoch": 6.97443049744305, "grad_norm": 0.6852913498878479, "learning_rate": 2.507853968714701e-05, "loss": 0.0218, "step": 15002 }, { "epoch": 6.97536029753603, "grad_norm": 0.6043956279754639, "learning_rate": 2.5157078599139035e-05, "loss": 0.0273, "step": 15004 }, { "epoch": 6.976290097629009, "grad_norm": 0.5735668540000916, "learning_rate": 2.5235615960828605e-05, "loss": 0.0318, "step": 15006 }, { "epoch": 6.97721989772199, "grad_norm": 1.1465792655944824, "learning_rate": 2.5314150997083772e-05, "loss": 0.0331, "step": 15008 }, { "epoch": 6.9781496978149695, "grad_norm": 2.1564085483551025, "learning_rate": 2.5392682932795507e-05, "loss": 0.073, "step": 15010 }, { "epoch": 6.97907949790795, "grad_norm": 0.48165324330329895, "learning_rate": 2.547121099288524e-05, "loss": 0.0339, "step": 15012 }, { "epoch": 6.98000929800093, "grad_norm": 1.1748589277267456, "learning_rate": 2.55497344023127e-05, "loss": 0.0365, "step": 15014 }, { "epoch": 6.98093909809391, "grad_norm": 1.396898627281189, "learning_rate": 2.5628252386083457e-05, "loss": 0.032, "step": 15016 }, { "epoch": 6.98186889818689, "grad_norm": 1.865855097770691, "learning_rate": 2.5706764169256896e-05, "loss": 0.053, "step": 15018 }, { "epoch": 6.98279869827987, "grad_norm": 1.5471843481063843, "learning_rate": 2.578526897695322e-05, "loss": 0.0289, "step": 15020 }, { "epoch": 6.98372849837285, "grad_norm": 1.1096336841583252, "learning_rate": 2.5863766034361774e-05, "loss": 0.0257, "step": 15022 }, { "epoch": 6.984658298465829, "grad_norm": 1.7573853731155396, "learning_rate": 2.594225456674837e-05, "loss": 0.0547, "step": 15024 }, { "epoch": 6.98558809855881, "grad_norm": 1.274198055267334, "learning_rate": 2.60207337994628e-05, "loss": 0.0544, "step": 15026 }, { "epoch": 6.9865178986517895, "grad_norm": 1.5689551830291748, "learning_rate": 2.609920295794672e-05, "loss": 0.0551, "step": 15028 }, { "epoch": 6.98744769874477, "grad_norm": 0.836762547492981, "learning_rate": 2.6177661267741113e-05, "loss": 0.0495, "step": 15030 }, { "epoch": 6.98837749883775, "grad_norm": 1.2740501165390015, "learning_rate": 2.6256107954494245e-05, "loss": 0.03, "step": 15032 }, { "epoch": 6.98930729893073, "grad_norm": 0.8721797466278076, "learning_rate": 2.6334542243969055e-05, "loss": 0.0281, "step": 15034 }, { "epoch": 6.99023709902371, "grad_norm": 1.9411046504974365, "learning_rate": 2.6412963362050608e-05, "loss": 0.0345, "step": 15036 }, { "epoch": 6.99116689911669, "grad_norm": 0.5703341364860535, "learning_rate": 2.64913705347543e-05, "loss": 0.0146, "step": 15038 }, { "epoch": 6.99209669920967, "grad_norm": 1.1236315965652466, "learning_rate": 2.656976298823291e-05, "loss": 0.0389, "step": 15040 }, { "epoch": 6.99302649930265, "grad_norm": 0.6835329532623291, "learning_rate": 2.6648139948784474e-05, "loss": 0.0206, "step": 15042 }, { "epoch": 6.99395629939563, "grad_norm": 1.5584698915481567, "learning_rate": 2.672650064286021e-05, "loss": 0.0253, "step": 15044 }, { "epoch": 6.99488609948861, "grad_norm": 1.6129192113876343, "learning_rate": 2.6804844297071532e-05, "loss": 0.0591, "step": 15046 }, { "epoch": 6.99581589958159, "grad_norm": 1.2058541774749756, "learning_rate": 2.6883170138198374e-05, "loss": 0.0463, "step": 15048 }, { "epoch": 6.99674569967457, "grad_norm": 0.5152899026870728, "learning_rate": 2.6961477393196133e-05, "loss": 0.0226, "step": 15050 }, { "epoch": 6.99767549976755, "grad_norm": 1.7923368215560913, "learning_rate": 2.7039765289203898e-05, "loss": 0.0713, "step": 15052 }, { "epoch": 6.99860529986053, "grad_norm": 1.1311370134353638, "learning_rate": 2.711803305355192e-05, "loss": 0.0413, "step": 15054 }, { "epoch": 6.99953509995351, "grad_norm": 0.5987065434455872, "learning_rate": 2.719627991376861e-05, "loss": 0.0284, "step": 15056 }, { "epoch": 7.00046490004649, "grad_norm": 0.6228224039077759, "learning_rate": 2.7274505097589324e-05, "loss": 0.0322, "step": 15058 }, { "epoch": 7.00139470013947, "grad_norm": 0.7019197940826416, "learning_rate": 2.7352707832962882e-05, "loss": 0.0287, "step": 15060 }, { "epoch": 7.00232450023245, "grad_norm": 1.0318067073822021, "learning_rate": 2.7430887348060048e-05, "loss": 0.0354, "step": 15062 }, { "epoch": 7.00325430032543, "grad_norm": 0.5266805291175842, "learning_rate": 2.7509042871280385e-05, "loss": 0.0252, "step": 15064 }, { "epoch": 7.00418410041841, "grad_norm": 0.5959828495979309, "learning_rate": 2.7587173631260532e-05, "loss": 0.0281, "step": 15066 }, { "epoch": 7.0051139005113905, "grad_norm": 0.5124279260635376, "learning_rate": 2.76652788568816e-05, "loss": 0.0176, "step": 15068 }, { "epoch": 7.00604370060437, "grad_norm": 1.8212331533432007, "learning_rate": 2.7743357777276184e-05, "loss": 0.0366, "step": 15070 }, { "epoch": 7.00697350069735, "grad_norm": 0.8681917786598206, "learning_rate": 2.7821409621837128e-05, "loss": 0.035, "step": 15072 }, { "epoch": 7.00790330079033, "grad_norm": 0.9533523917198181, "learning_rate": 2.7899433620224066e-05, "loss": 0.0325, "step": 15074 }, { "epoch": 7.00883310088331, "grad_norm": 0.7739330530166626, "learning_rate": 2.7977429002371726e-05, "loss": 0.0215, "step": 15076 }, { "epoch": 7.00976290097629, "grad_norm": 1.4876395463943481, "learning_rate": 2.805539499849726e-05, "loss": 0.0403, "step": 15078 }, { "epoch": 7.01069270106927, "grad_norm": 1.6156013011932373, "learning_rate": 2.813333083910758e-05, "loss": 0.0363, "step": 15080 }, { "epoch": 7.01162250116225, "grad_norm": 0.8083267211914062, "learning_rate": 2.8211235755007674e-05, "loss": 0.0271, "step": 15082 }, { "epoch": 7.01255230125523, "grad_norm": 0.44860291481018066, "learning_rate": 2.8289108977307114e-05, "loss": 0.0208, "step": 15084 }, { "epoch": 7.0134821013482105, "grad_norm": 0.861574649810791, "learning_rate": 2.8366949737428824e-05, "loss": 0.026, "step": 15086 }, { "epoch": 7.01441190144119, "grad_norm": 0.8205621242523193, "learning_rate": 2.8444757267116002e-05, "loss": 0.0237, "step": 15088 }, { "epoch": 7.015341701534171, "grad_norm": 0.7784242630004883, "learning_rate": 2.852253079843957e-05, "loss": 0.0238, "step": 15090 }, { "epoch": 7.01627150162715, "grad_norm": 1.7172701358795166, "learning_rate": 2.8600269563806343e-05, "loss": 0.0325, "step": 15092 }, { "epoch": 7.01720130172013, "grad_norm": 1.434105396270752, "learning_rate": 2.8677972795966014e-05, "loss": 0.0348, "step": 15094 }, { "epoch": 7.01813110181311, "grad_norm": 1.0741311311721802, "learning_rate": 2.8755639728018953e-05, "loss": 0.052, "step": 15096 }, { "epoch": 7.01906090190609, "grad_norm": 3.013857841491699, "learning_rate": 2.883326959342409e-05, "loss": 0.0449, "step": 15098 }, { "epoch": 7.01999070199907, "grad_norm": 1.0459595918655396, "learning_rate": 2.891086162600579e-05, "loss": 0.0312, "step": 15100 }, { "epoch": 7.02092050209205, "grad_norm": 1.5531738996505737, "learning_rate": 2.8988415059962217e-05, "loss": 0.028, "step": 15102 }, { "epoch": 7.0218503021850305, "grad_norm": 0.7356158494949341, "learning_rate": 2.9065929129872104e-05, "loss": 0.0689, "step": 15104 }, { "epoch": 7.02278010227801, "grad_norm": 0.5271821022033691, "learning_rate": 2.9143403070703047e-05, "loss": 0.0268, "step": 15106 }, { "epoch": 7.023709902370991, "grad_norm": 0.5033504962921143, "learning_rate": 2.9220836117818442e-05, "loss": 0.0265, "step": 15108 }, { "epoch": 7.02463970246397, "grad_norm": 1.4033676385879517, "learning_rate": 2.9298227506985283e-05, "loss": 0.0407, "step": 15110 }, { "epoch": 7.025569502556951, "grad_norm": 1.014350175857544, "learning_rate": 2.937557647438199e-05, "loss": 0.0267, "step": 15112 }, { "epoch": 7.02649930264993, "grad_norm": 1.0809569358825684, "learning_rate": 2.9452882256605292e-05, "loss": 0.0179, "step": 15114 }, { "epoch": 7.02742910274291, "grad_norm": 1.4833470582962036, "learning_rate": 2.9530144090678513e-05, "loss": 0.0397, "step": 15116 }, { "epoch": 7.02835890283589, "grad_norm": 1.2845851182937622, "learning_rate": 2.9607361214058362e-05, "loss": 0.0388, "step": 15118 }, { "epoch": 7.02928870292887, "grad_norm": 0.8581060171127319, "learning_rate": 2.9684532864643092e-05, "loss": 0.0221, "step": 15120 }, { "epoch": 7.0302185030218505, "grad_norm": 1.1451184749603271, "learning_rate": 2.9761658280779853e-05, "loss": 0.0492, "step": 15122 }, { "epoch": 7.03114830311483, "grad_norm": 1.8900964260101318, "learning_rate": 2.983873670127157e-05, "loss": 0.0535, "step": 15124 }, { "epoch": 7.032078103207811, "grad_norm": 0.6773242950439453, "learning_rate": 2.99157673653856e-05, "loss": 0.0237, "step": 15126 }, { "epoch": 7.03300790330079, "grad_norm": 0.6877291798591614, "learning_rate": 2.9992749512860224e-05, "loss": 0.0275, "step": 15128 }, { "epoch": 7.033937703393771, "grad_norm": 2.283932685852051, "learning_rate": 3.006968238391281e-05, "loss": 0.0436, "step": 15130 }, { "epoch": 7.03486750348675, "grad_norm": 1.6376622915267944, "learning_rate": 3.0146565219247073e-05, "loss": 0.0418, "step": 15132 }, { "epoch": 7.035797303579731, "grad_norm": 0.916925847530365, "learning_rate": 3.0223397260060278e-05, "loss": 0.0275, "step": 15134 }, { "epoch": 7.03672710367271, "grad_norm": 0.6798664331436157, "learning_rate": 3.030017774805148e-05, "loss": 0.0368, "step": 15136 }, { "epoch": 7.03765690376569, "grad_norm": 0.554677426815033, "learning_rate": 3.0376905925427897e-05, "loss": 0.0278, "step": 15138 }, { "epoch": 7.0385867038586705, "grad_norm": 1.11789071559906, "learning_rate": 3.0453581034913584e-05, "loss": 0.0231, "step": 15140 }, { "epoch": 7.03951650395165, "grad_norm": 1.29378342628479, "learning_rate": 3.053020231975624e-05, "loss": 0.0596, "step": 15142 }, { "epoch": 7.040446304044631, "grad_norm": 1.0129940509796143, "learning_rate": 3.060676902373455e-05, "loss": 0.0255, "step": 15144 }, { "epoch": 7.04137610413761, "grad_norm": 0.9444379806518555, "learning_rate": 3.068328039116621e-05, "loss": 0.0322, "step": 15146 }, { "epoch": 7.042305904230591, "grad_norm": 0.9321597814559937, "learning_rate": 3.075973566691477e-05, "loss": 0.0295, "step": 15148 }, { "epoch": 7.04323570432357, "grad_norm": 2.19793963432312, "learning_rate": 3.083613409639777e-05, "loss": 0.0395, "step": 15150 }, { "epoch": 7.044165504416551, "grad_norm": 1.3479799032211304, "learning_rate": 3.09124749255932e-05, "loss": 0.029, "step": 15152 }, { "epoch": 7.04509530450953, "grad_norm": 1.6118022203445435, "learning_rate": 3.098875740104808e-05, "loss": 0.0716, "step": 15154 }, { "epoch": 7.046025104602511, "grad_norm": 2.1188230514526367, "learning_rate": 3.106498076988525e-05, "loss": 0.0497, "step": 15156 }, { "epoch": 7.0469549046954905, "grad_norm": 1.80829656124115, "learning_rate": 3.114114427981068e-05, "loss": 0.0449, "step": 15158 }, { "epoch": 7.04788470478847, "grad_norm": 0.6550396084785461, "learning_rate": 3.121724717912143e-05, "loss": 0.0201, "step": 15160 }, { "epoch": 7.048814504881451, "grad_norm": 0.7597762942314148, "learning_rate": 3.1293288716712444e-05, "loss": 0.0265, "step": 15162 }, { "epoch": 7.04974430497443, "grad_norm": 0.5644457936286926, "learning_rate": 3.136926814208461e-05, "loss": 0.0222, "step": 15164 }, { "epoch": 7.050674105067411, "grad_norm": 0.8080119490623474, "learning_rate": 3.14451847053516e-05, "loss": 0.0278, "step": 15166 }, { "epoch": 7.05160390516039, "grad_norm": 1.2635443210601807, "learning_rate": 3.152103765724748e-05, "loss": 0.0504, "step": 15168 }, { "epoch": 7.052533705253371, "grad_norm": 1.12166428565979, "learning_rate": 3.159682624913442e-05, "loss": 0.0557, "step": 15170 }, { "epoch": 7.05346350534635, "grad_norm": 0.5345950126647949, "learning_rate": 3.1672549733009444e-05, "loss": 0.0281, "step": 15172 }, { "epoch": 7.054393305439331, "grad_norm": 1.7309401035308838, "learning_rate": 3.174820736151242e-05, "loss": 0.0385, "step": 15174 }, { "epoch": 7.0553231055323105, "grad_norm": 1.2576230764389038, "learning_rate": 3.182379838793318e-05, "loss": 0.033, "step": 15176 }, { "epoch": 7.056252905625291, "grad_norm": 0.47953152656555176, "learning_rate": 3.189932206621873e-05, "loss": 0.039, "step": 15178 }, { "epoch": 7.0571827057182706, "grad_norm": 2.0418591499328613, "learning_rate": 3.197477765098086e-05, "loss": 0.0651, "step": 15180 }, { "epoch": 7.05811250581125, "grad_norm": 1.5279603004455566, "learning_rate": 3.2050164397503314e-05, "loss": 0.0605, "step": 15182 }, { "epoch": 7.059042305904231, "grad_norm": 0.6458685398101807, "learning_rate": 3.2125481561749436e-05, "loss": 0.0212, "step": 15184 }, { "epoch": 7.05997210599721, "grad_norm": 1.1903095245361328, "learning_rate": 3.2200728400369294e-05, "loss": 0.0331, "step": 15186 }, { "epoch": 7.060901906090191, "grad_norm": 1.212339162826538, "learning_rate": 3.22759041707068e-05, "loss": 0.046, "step": 15188 }, { "epoch": 7.06183170618317, "grad_norm": 1.0682389736175537, "learning_rate": 3.235100813080773e-05, "loss": 0.0362, "step": 15190 }, { "epoch": 7.062761506276151, "grad_norm": 1.5836797952651978, "learning_rate": 3.2426039539425944e-05, "loss": 0.0502, "step": 15192 }, { "epoch": 7.0636913063691305, "grad_norm": 1.8933147192001343, "learning_rate": 3.250099765603201e-05, "loss": 0.0551, "step": 15194 }, { "epoch": 7.064621106462111, "grad_norm": 1.4898207187652588, "learning_rate": 3.257588174081941e-05, "loss": 0.0563, "step": 15196 }, { "epoch": 7.0655509065550905, "grad_norm": 0.490057110786438, "learning_rate": 3.265069105471252e-05, "loss": 0.0233, "step": 15198 }, { "epoch": 7.066480706648071, "grad_norm": 1.3582345247268677, "learning_rate": 3.2725424859373725e-05, "loss": 0.0441, "step": 15200 }, { "epoch": 7.067410506741051, "grad_norm": 1.5634739398956299, "learning_rate": 3.280008241721036e-05, "loss": 0.0465, "step": 15202 }, { "epoch": 7.06834030683403, "grad_norm": 1.1348872184753418, "learning_rate": 3.287466299138273e-05, "loss": 0.0243, "step": 15204 }, { "epoch": 7.069270106927011, "grad_norm": 1.5858793258666992, "learning_rate": 3.294916584581034e-05, "loss": 0.0352, "step": 15206 }, { "epoch": 7.07019990701999, "grad_norm": 1.5110454559326172, "learning_rate": 3.302359024518026e-05, "loss": 0.0489, "step": 15208 }, { "epoch": 7.071129707112971, "grad_norm": 1.8503668308258057, "learning_rate": 3.309793545495379e-05, "loss": 0.0318, "step": 15210 }, { "epoch": 7.0720595072059504, "grad_norm": 0.5583415627479553, "learning_rate": 3.3172200741373563e-05, "loss": 0.0275, "step": 15212 }, { "epoch": 7.072989307298931, "grad_norm": 0.6317014694213867, "learning_rate": 3.324638537147137e-05, "loss": 0.0374, "step": 15214 }, { "epoch": 7.0739191073919105, "grad_norm": 0.8680753111839294, "learning_rate": 3.332048861307466e-05, "loss": 0.046, "step": 15216 }, { "epoch": 7.074848907484891, "grad_norm": 1.7221705913543701, "learning_rate": 3.339450973481455e-05, "loss": 0.0372, "step": 15218 }, { "epoch": 7.075778707577871, "grad_norm": 1.4904918670654297, "learning_rate": 3.346844800613236e-05, "loss": 0.0265, "step": 15220 }, { "epoch": 7.076708507670851, "grad_norm": 0.6828353404998779, "learning_rate": 3.354230269728711e-05, "loss": 0.037, "step": 15222 }, { "epoch": 7.077638307763831, "grad_norm": 0.92682945728302, "learning_rate": 3.3616073079362994e-05, "loss": 0.0351, "step": 15224 }, { "epoch": 7.07856810785681, "grad_norm": 1.1195651292800903, "learning_rate": 3.368975842427594e-05, "loss": 0.0438, "step": 15226 }, { "epoch": 7.079497907949791, "grad_norm": 1.6703146696090698, "learning_rate": 3.376335800478144e-05, "loss": 0.0444, "step": 15228 }, { "epoch": 7.08042770804277, "grad_norm": 1.372338891029358, "learning_rate": 3.383687109448143e-05, "loss": 0.0466, "step": 15230 }, { "epoch": 7.081357508135751, "grad_norm": 0.452753484249115, "learning_rate": 3.3910296967831306e-05, "loss": 0.0495, "step": 15232 }, { "epoch": 7.0822873082287305, "grad_norm": 0.8275772333145142, "learning_rate": 3.3983634900147345e-05, "loss": 0.0215, "step": 15234 }, { "epoch": 7.083217108321711, "grad_norm": 2.632277727127075, "learning_rate": 3.405688416761368e-05, "loss": 0.0643, "step": 15236 }, { "epoch": 7.084146908414691, "grad_norm": 1.6750733852386475, "learning_rate": 3.4130044047289775e-05, "loss": 0.0535, "step": 15238 }, { "epoch": 7.085076708507671, "grad_norm": 1.366640329360962, "learning_rate": 3.4203113817116984e-05, "loss": 0.0345, "step": 15240 }, { "epoch": 7.086006508600651, "grad_norm": 1.4489659070968628, "learning_rate": 3.4276092755926267e-05, "loss": 0.0407, "step": 15242 }, { "epoch": 7.086936308693631, "grad_norm": 1.4317823648452759, "learning_rate": 3.4348980143445034e-05, "loss": 0.0406, "step": 15244 }, { "epoch": 7.087866108786611, "grad_norm": 0.9442710876464844, "learning_rate": 3.442177526030412e-05, "loss": 0.0423, "step": 15246 }, { "epoch": 7.088795908879591, "grad_norm": 2.0252678394317627, "learning_rate": 3.4494477388045116e-05, "loss": 0.0663, "step": 15248 }, { "epoch": 7.089725708972571, "grad_norm": 1.0307893753051758, "learning_rate": 3.456708580912728e-05, "loss": 0.0318, "step": 15250 }, { "epoch": 7.0906555090655505, "grad_norm": 1.1877058744430542, "learning_rate": 3.46395998069349e-05, "loss": 0.0327, "step": 15252 }, { "epoch": 7.091585309158531, "grad_norm": 1.5067445039749146, "learning_rate": 3.471201866578418e-05, "loss": 0.0324, "step": 15254 }, { "epoch": 7.092515109251511, "grad_norm": 1.0174680948257446, "learning_rate": 3.4784341670930044e-05, "loss": 0.0312, "step": 15256 }, { "epoch": 7.093444909344491, "grad_norm": 1.8152955770492554, "learning_rate": 3.4856568108573795e-05, "loss": 0.0393, "step": 15258 }, { "epoch": 7.094374709437471, "grad_norm": 1.5825351476669312, "learning_rate": 3.492869726586957e-05, "loss": 0.042, "step": 15260 }, { "epoch": 7.095304509530451, "grad_norm": 1.6344319581985474, "learning_rate": 3.500072843093164e-05, "loss": 0.0416, "step": 15262 }, { "epoch": 7.096234309623431, "grad_norm": 1.5721079111099243, "learning_rate": 3.5072660892841625e-05, "loss": 0.0518, "step": 15264 }, { "epoch": 7.097164109716411, "grad_norm": 1.3846031427383423, "learning_rate": 3.5144493941655014e-05, "loss": 0.0621, "step": 15266 }, { "epoch": 7.098093909809391, "grad_norm": 1.1136224269866943, "learning_rate": 3.521622686840878e-05, "loss": 0.0427, "step": 15268 }, { "epoch": 7.099023709902371, "grad_norm": 0.9557772278785706, "learning_rate": 3.5287858965127734e-05, "loss": 0.0523, "step": 15270 }, { "epoch": 7.099953509995351, "grad_norm": 0.881682276725769, "learning_rate": 3.535938952483208e-05, "loss": 0.0343, "step": 15272 }, { "epoch": 7.100883310088331, "grad_norm": 0.9313231110572815, "learning_rate": 3.543081784154423e-05, "loss": 0.0432, "step": 15274 }, { "epoch": 7.101813110181311, "grad_norm": 1.2020269632339478, "learning_rate": 3.550214321029521e-05, "loss": 0.0357, "step": 15276 }, { "epoch": 7.102742910274291, "grad_norm": 0.8438595533370972, "learning_rate": 3.557336492713267e-05, "loss": 0.0373, "step": 15278 }, { "epoch": 7.103672710367271, "grad_norm": 1.231661319732666, "learning_rate": 3.564448228912686e-05, "loss": 0.038, "step": 15280 }, { "epoch": 7.104602510460251, "grad_norm": 1.3759297132492065, "learning_rate": 3.571549459437829e-05, "loss": 0.0473, "step": 15282 }, { "epoch": 7.105532310553231, "grad_norm": 1.6624306440353394, "learning_rate": 3.578640114202401e-05, "loss": 0.0448, "step": 15284 }, { "epoch": 7.106462110646211, "grad_norm": 1.5505510568618774, "learning_rate": 3.58572012322451e-05, "loss": 0.0415, "step": 15286 }, { "epoch": 7.107391910739191, "grad_norm": 1.2323191165924072, "learning_rate": 3.592789416627343e-05, "loss": 0.0349, "step": 15288 }, { "epoch": 7.108321710832171, "grad_norm": 1.1476138830184937, "learning_rate": 3.599847924639794e-05, "loss": 0.0333, "step": 15290 }, { "epoch": 7.1092515109251515, "grad_norm": 1.7120131254196167, "learning_rate": 3.6068955775972636e-05, "loss": 0.0442, "step": 15292 }, { "epoch": 7.110181311018131, "grad_norm": 1.111267328262329, "learning_rate": 3.613932305942246e-05, "loss": 0.0411, "step": 15294 }, { "epoch": 7.111111111111111, "grad_norm": 1.705585241317749, "learning_rate": 3.6209580402250815e-05, "loss": 0.068, "step": 15296 }, { "epoch": 7.112040911204091, "grad_norm": 1.044176697731018, "learning_rate": 3.627972711104617e-05, "loss": 0.0325, "step": 15298 }, { "epoch": 7.112970711297071, "grad_norm": 1.6845011711120605, "learning_rate": 3.634976249348875e-05, "loss": 0.0698, "step": 15300 }, { "epoch": 7.113900511390051, "grad_norm": 1.2364166975021362, "learning_rate": 3.64196858583576e-05, "loss": 0.0376, "step": 15302 }, { "epoch": 7.114830311483031, "grad_norm": 1.5269358158111572, "learning_rate": 3.6489496515537265e-05, "loss": 0.043, "step": 15304 }, { "epoch": 7.115760111576011, "grad_norm": 1.4613336324691772, "learning_rate": 3.6559193776024815e-05, "loss": 0.0839, "step": 15306 }, { "epoch": 7.116689911668991, "grad_norm": 0.6148922443389893, "learning_rate": 3.6628776951936506e-05, "loss": 0.04, "step": 15308 }, { "epoch": 7.1176197117619715, "grad_norm": 1.6452922821044922, "learning_rate": 3.6698245356514335e-05, "loss": 0.0599, "step": 15310 }, { "epoch": 7.118549511854951, "grad_norm": 1.279009222984314, "learning_rate": 3.676759830413336e-05, "loss": 0.0464, "step": 15312 }, { "epoch": 7.119479311947932, "grad_norm": 1.1568071842193604, "learning_rate": 3.683683511030788e-05, "loss": 0.0292, "step": 15314 }, { "epoch": 7.120409112040911, "grad_norm": 1.0663104057312012, "learning_rate": 3.690595509169851e-05, "loss": 0.0418, "step": 15316 }, { "epoch": 7.121338912133891, "grad_norm": 0.510381817817688, "learning_rate": 3.697495756611909e-05, "loss": 0.0257, "step": 15318 }, { "epoch": 7.122268712226871, "grad_norm": 1.8561534881591797, "learning_rate": 3.70438418525429e-05, "loss": 0.054, "step": 15320 }, { "epoch": 7.123198512319851, "grad_norm": 0.9313971996307373, "learning_rate": 3.7112607271110005e-05, "loss": 0.031, "step": 15322 }, { "epoch": 7.124128312412831, "grad_norm": 1.6837302446365356, "learning_rate": 3.718125314313332e-05, "loss": 0.0502, "step": 15324 }, { "epoch": 7.125058112505811, "grad_norm": 1.877713680267334, "learning_rate": 3.7249778791105974e-05, "loss": 0.0651, "step": 15326 }, { "epoch": 7.1259879125987915, "grad_norm": 0.3686792552471161, "learning_rate": 3.731818353870738e-05, "loss": 0.0288, "step": 15328 }, { "epoch": 7.126917712691771, "grad_norm": 1.316874623298645, "learning_rate": 3.7386466710810234e-05, "loss": 0.0533, "step": 15330 }, { "epoch": 7.127847512784752, "grad_norm": 2.363616704940796, "learning_rate": 3.7454627633487355e-05, "loss": 0.0621, "step": 15332 }, { "epoch": 7.128777312877731, "grad_norm": 0.8992049694061279, "learning_rate": 3.7522665634017785e-05, "loss": 0.0316, "step": 15334 }, { "epoch": 7.129707112970712, "grad_norm": 1.5135773420333862, "learning_rate": 3.7590580040894084e-05, "loss": 0.0365, "step": 15336 }, { "epoch": 7.130636913063691, "grad_norm": 1.5137070417404175, "learning_rate": 3.765837018382834e-05, "loss": 0.0374, "step": 15338 }, { "epoch": 7.131566713156671, "grad_norm": 1.0469787120819092, "learning_rate": 3.7726035393759264e-05, "loss": 0.0376, "step": 15340 }, { "epoch": 7.132496513249651, "grad_norm": 1.2023286819458008, "learning_rate": 3.779357500285872e-05, "loss": 0.0515, "step": 15342 }, { "epoch": 7.133426313342631, "grad_norm": 1.2302874326705933, "learning_rate": 3.7860988344537715e-05, "loss": 0.0384, "step": 15344 }, { "epoch": 7.1343561134356115, "grad_norm": 2.6830224990844727, "learning_rate": 3.7928274753454024e-05, "loss": 0.0627, "step": 15346 }, { "epoch": 7.135285913528591, "grad_norm": 0.7415525317192078, "learning_rate": 3.799543356551778e-05, "loss": 0.0237, "step": 15348 }, { "epoch": 7.136215713621572, "grad_norm": 1.6325721740722656, "learning_rate": 3.8062464117898724e-05, "loss": 0.0488, "step": 15350 }, { "epoch": 7.137145513714551, "grad_norm": 2.2071080207824707, "learning_rate": 3.8129365749032433e-05, "loss": 0.0706, "step": 15352 }, { "epoch": 7.138075313807532, "grad_norm": 2.219780921936035, "learning_rate": 3.8196137798626654e-05, "loss": 0.0593, "step": 15354 }, { "epoch": 7.139005113900511, "grad_norm": 1.7322282791137695, "learning_rate": 3.8262779607668455e-05, "loss": 0.079, "step": 15356 }, { "epoch": 7.139934913993492, "grad_norm": 0.8223041296005249, "learning_rate": 3.832929051842977e-05, "loss": 0.0484, "step": 15358 }, { "epoch": 7.140864714086471, "grad_norm": 1.404931664466858, "learning_rate": 3.8395669874474935e-05, "loss": 0.047, "step": 15360 }, { "epoch": 7.141794514179451, "grad_norm": 1.224783182144165, "learning_rate": 3.846191702066656e-05, "loss": 0.0713, "step": 15362 }, { "epoch": 7.1427243142724315, "grad_norm": 1.9693355560302734, "learning_rate": 3.852803130317191e-05, "loss": 0.0628, "step": 15364 }, { "epoch": 7.143654114365411, "grad_norm": 1.2601679563522339, "learning_rate": 3.859401206946986e-05, "loss": 0.0261, "step": 15366 }, { "epoch": 7.144583914458392, "grad_norm": 2.9159138202667236, "learning_rate": 3.865985866835673e-05, "loss": 0.1104, "step": 15368 }, { "epoch": 7.145513714551371, "grad_norm": 2.2282490730285645, "learning_rate": 3.872557044995341e-05, "loss": 0.0592, "step": 15370 }, { "epoch": 7.146443514644352, "grad_norm": 0.6973653435707092, "learning_rate": 3.879114676571083e-05, "loss": 0.0255, "step": 15372 }, { "epoch": 7.147373314737331, "grad_norm": 1.6536107063293457, "learning_rate": 3.885658696841739e-05, "loss": 0.0615, "step": 15374 }, { "epoch": 7.148303114830312, "grad_norm": 1.694772720336914, "learning_rate": 3.892189041220477e-05, "loss": 0.0447, "step": 15376 }, { "epoch": 7.149232914923291, "grad_norm": 2.504002809524536, "learning_rate": 3.89870564525542e-05, "loss": 0.0818, "step": 15378 }, { "epoch": 7.150162715016272, "grad_norm": 1.3879696130752563, "learning_rate": 3.9052084446303315e-05, "loss": 0.0572, "step": 15380 }, { "epoch": 7.1510925151092515, "grad_norm": 1.1328380107879639, "learning_rate": 3.911697375165201e-05, "loss": 0.0468, "step": 15382 }, { "epoch": 7.152022315202231, "grad_norm": 1.4893487691879272, "learning_rate": 3.918172372816895e-05, "loss": 0.0626, "step": 15384 }, { "epoch": 7.152952115295212, "grad_norm": 0.6072794198989868, "learning_rate": 3.924633373679816e-05, "loss": 0.0471, "step": 15386 }, { "epoch": 7.153881915388191, "grad_norm": 1.8918073177337646, "learning_rate": 3.93108031398648e-05, "loss": 0.051, "step": 15388 }, { "epoch": 7.154811715481172, "grad_norm": 1.7038817405700684, "learning_rate": 3.937513130108203e-05, "loss": 0.0576, "step": 15390 }, { "epoch": 7.155741515574151, "grad_norm": 1.1028915643692017, "learning_rate": 3.943931758555671e-05, "loss": 0.0544, "step": 15392 }, { "epoch": 7.156671315667132, "grad_norm": 1.9037914276123047, "learning_rate": 3.950336135979621e-05, "loss": 0.0507, "step": 15394 }, { "epoch": 7.157601115760111, "grad_norm": 1.4015852212905884, "learning_rate": 3.956726199171448e-05, "loss": 0.0465, "step": 15396 }, { "epoch": 7.158530915853092, "grad_norm": 1.2435671091079712, "learning_rate": 3.963101885063779e-05, "loss": 0.0636, "step": 15398 }, { "epoch": 7.1594607159460715, "grad_norm": 2.74474835395813, "learning_rate": 3.96946313073119e-05, "loss": 0.072, "step": 15400 }, { "epoch": 7.160390516039052, "grad_norm": 1.431689739227295, "learning_rate": 3.97580987339074e-05, "loss": 0.0405, "step": 15402 }, { "epoch": 7.161320316132032, "grad_norm": 1.4382551908493042, "learning_rate": 3.982142050402654e-05, "loss": 0.0532, "step": 15404 }, { "epoch": 7.162250116225012, "grad_norm": 1.2911664247512817, "learning_rate": 3.988459599270889e-05, "loss": 0.0387, "step": 15406 }, { "epoch": 7.163179916317992, "grad_norm": 2.010803699493408, "learning_rate": 3.9947624576437955e-05, "loss": 0.0634, "step": 15408 }, { "epoch": 7.164109716410971, "grad_norm": 1.9443926811218262, "learning_rate": 4.001050563314718e-05, "loss": 0.0488, "step": 15410 }, { "epoch": 7.165039516503952, "grad_norm": 1.923764944076538, "learning_rate": 4.007323854222565e-05, "loss": 0.0595, "step": 15412 }, { "epoch": 7.165969316596931, "grad_norm": 1.7106351852416992, "learning_rate": 4.0135822684525105e-05, "loss": 0.0597, "step": 15414 }, { "epoch": 7.166899116689912, "grad_norm": 0.8266811370849609, "learning_rate": 4.019825744236517e-05, "loss": 0.0497, "step": 15416 }, { "epoch": 7.1678289167828915, "grad_norm": 3.042996644973755, "learning_rate": 4.026054219954006e-05, "loss": 0.0839, "step": 15418 }, { "epoch": 7.168758716875872, "grad_norm": 1.4488850831985474, "learning_rate": 4.0322676341324456e-05, "loss": 0.0459, "step": 15420 }, { "epoch": 7.169688516968852, "grad_norm": 1.40945565700531, "learning_rate": 4.038465925447929e-05, "loss": 0.0577, "step": 15422 }, { "epoch": 7.170618317061832, "grad_norm": 2.2706055641174316, "learning_rate": 4.044649032725847e-05, "loss": 0.048, "step": 15424 }, { "epoch": 7.171548117154812, "grad_norm": 1.1678553819656372, "learning_rate": 4.0508168949413974e-05, "loss": 0.0457, "step": 15426 }, { "epoch": 7.172477917247791, "grad_norm": 1.695186734199524, "learning_rate": 4.056969451220285e-05, "loss": 0.0452, "step": 15428 }, { "epoch": 7.173407717340772, "grad_norm": 1.9876755475997925, "learning_rate": 4.06310664083927e-05, "loss": 0.0481, "step": 15430 }, { "epoch": 7.174337517433751, "grad_norm": 2.1412079334259033, "learning_rate": 4.0692284032267544e-05, "loss": 0.0544, "step": 15432 }, { "epoch": 7.175267317526732, "grad_norm": 1.5377964973449707, "learning_rate": 4.0753346779634286e-05, "loss": 0.0463, "step": 15434 }, { "epoch": 7.1761971176197115, "grad_norm": 2.0471889972686768, "learning_rate": 4.081425404782813e-05, "loss": 0.0498, "step": 15436 }, { "epoch": 7.177126917712692, "grad_norm": 1.687208652496338, "learning_rate": 4.087500523571907e-05, "loss": 0.0442, "step": 15438 }, { "epoch": 7.178056717805672, "grad_norm": 2.0356521606445312, "learning_rate": 4.093559974371732e-05, "loss": 0.031, "step": 15440 }, { "epoch": 7.178986517898652, "grad_norm": 0.8499637842178345, "learning_rate": 4.09960369737795e-05, "loss": 0.03, "step": 15442 }, { "epoch": 7.179916317991632, "grad_norm": 0.6667483448982239, "learning_rate": 4.105631632941467e-05, "loss": 0.0391, "step": 15444 }, { "epoch": 7.180846118084612, "grad_norm": 1.1094942092895508, "learning_rate": 4.1116437215689804e-05, "loss": 0.0246, "step": 15446 }, { "epoch": 7.181775918177592, "grad_norm": 1.1809765100479126, "learning_rate": 4.117639903923616e-05, "loss": 0.0637, "step": 15448 }, { "epoch": 7.182705718270572, "grad_norm": 2.6496903896331787, "learning_rate": 4.12362012082546e-05, "loss": 0.0539, "step": 15450 }, { "epoch": 7.183635518363552, "grad_norm": 2.8234081268310547, "learning_rate": 4.1295843132522014e-05, "loss": 0.08, "step": 15452 }, { "epoch": 7.1845653184565315, "grad_norm": 1.3023897409439087, "learning_rate": 4.13553242233966e-05, "loss": 0.0382, "step": 15454 }, { "epoch": 7.185495118549512, "grad_norm": 0.47259023785591125, "learning_rate": 4.141464389382395e-05, "loss": 0.0368, "step": 15456 }, { "epoch": 7.1864249186424916, "grad_norm": 1.1968839168548584, "learning_rate": 4.147380155834299e-05, "loss": 0.0755, "step": 15458 }, { "epoch": 7.187354718735472, "grad_norm": 1.745492935180664, "learning_rate": 4.153279663309132e-05, "loss": 0.0392, "step": 15460 }, { "epoch": 7.188284518828452, "grad_norm": 1.9111884832382202, "learning_rate": 4.159162853581145e-05, "loss": 0.0544, "step": 15462 }, { "epoch": 7.189214318921432, "grad_norm": 0.9146319627761841, "learning_rate": 4.165029668585632e-05, "loss": 0.0421, "step": 15464 }, { "epoch": 7.190144119014412, "grad_norm": 1.8086832761764526, "learning_rate": 4.170880050419489e-05, "loss": 0.0687, "step": 15466 }, { "epoch": 7.191073919107392, "grad_norm": 1.9835879802703857, "learning_rate": 4.1767139413418095e-05, "loss": 0.0718, "step": 15468 }, { "epoch": 7.192003719200372, "grad_norm": 2.320396661758423, "learning_rate": 4.182531283774439e-05, "loss": 0.0955, "step": 15470 }, { "epoch": 7.192933519293352, "grad_norm": 1.7524079084396362, "learning_rate": 4.188332020302564e-05, "loss": 0.0571, "step": 15472 }, { "epoch": 7.193863319386332, "grad_norm": 1.805411458015442, "learning_rate": 4.194116093675261e-05, "loss": 0.0657, "step": 15474 }, { "epoch": 7.1947931194793115, "grad_norm": 1.9144126176834106, "learning_rate": 4.19988344680605e-05, "loss": 0.0692, "step": 15476 }, { "epoch": 7.195722919572292, "grad_norm": 1.8513391017913818, "learning_rate": 4.2056340227735015e-05, "loss": 0.0492, "step": 15478 }, { "epoch": 7.196652719665272, "grad_norm": 1.482221007347107, "learning_rate": 4.2113677648217286e-05, "loss": 0.0565, "step": 15480 }, { "epoch": 7.197582519758252, "grad_norm": 1.270060420036316, "learning_rate": 4.217084616361024e-05, "loss": 0.0611, "step": 15482 }, { "epoch": 7.198512319851232, "grad_norm": 1.9459847211837769, "learning_rate": 4.2227845209683764e-05, "loss": 0.0597, "step": 15484 }, { "epoch": 7.199442119944212, "grad_norm": 1.4955087900161743, "learning_rate": 4.2284674223880174e-05, "loss": 0.0424, "step": 15486 }, { "epoch": 7.200371920037192, "grad_norm": 1.7181693315505981, "learning_rate": 4.234133264532017e-05, "loss": 0.0562, "step": 15488 }, { "epoch": 7.201301720130172, "grad_norm": 1.5105658769607544, "learning_rate": 4.239781991480786e-05, "loss": 0.069, "step": 15490 }, { "epoch": 7.202231520223152, "grad_norm": 2.0011837482452393, "learning_rate": 4.245413547483692e-05, "loss": 0.0595, "step": 15492 }, { "epoch": 7.203161320316132, "grad_norm": 1.7934973239898682, "learning_rate": 4.251027876959523e-05, "loss": 0.0464, "step": 15494 }, { "epoch": 7.204091120409112, "grad_norm": 1.170207142829895, "learning_rate": 4.256624924497127e-05, "loss": 0.0583, "step": 15496 }, { "epoch": 7.205020920502092, "grad_norm": 1.2248884439468384, "learning_rate": 4.262204634855911e-05, "loss": 0.0513, "step": 15498 }, { "epoch": 7.205950720595072, "grad_norm": 1.432193398475647, "learning_rate": 4.267766952966372e-05, "loss": 0.04, "step": 15500 }, { "epoch": 7.206880520688052, "grad_norm": 2.3204619884490967, "learning_rate": 4.27331182393069e-05, "loss": 0.0795, "step": 15502 }, { "epoch": 7.207810320781032, "grad_norm": 2.4304986000061035, "learning_rate": 4.2788391930232156e-05, "loss": 0.0907, "step": 15504 }, { "epoch": 7.208740120874012, "grad_norm": 1.250344157218933, "learning_rate": 4.284349005691059e-05, "loss": 0.0311, "step": 15506 }, { "epoch": 7.209669920966992, "grad_norm": 1.9487318992614746, "learning_rate": 4.289841207554585e-05, "loss": 0.0547, "step": 15508 }, { "epoch": 7.210599721059972, "grad_norm": 1.2818667888641357, "learning_rate": 4.295315744407975e-05, "loss": 0.045, "step": 15510 }, { "epoch": 7.211529521152952, "grad_norm": 1.5603342056274414, "learning_rate": 4.300772562219772e-05, "loss": 0.0588, "step": 15512 }, { "epoch": 7.212459321245932, "grad_norm": 1.402672290802002, "learning_rate": 4.306211607133377e-05, "loss": 0.0451, "step": 15514 }, { "epoch": 7.2133891213389125, "grad_norm": 1.1968791484832764, "learning_rate": 4.311632825467615e-05, "loss": 0.0507, "step": 15516 }, { "epoch": 7.214318921431892, "grad_norm": 1.2535992860794067, "learning_rate": 4.317036163717258e-05, "loss": 0.0462, "step": 15518 }, { "epoch": 7.215248721524872, "grad_norm": 1.0319759845733643, "learning_rate": 4.3224215685535334e-05, "loss": 0.0339, "step": 15520 }, { "epoch": 7.216178521617852, "grad_norm": 1.550363302230835, "learning_rate": 4.327788986824667e-05, "loss": 0.0572, "step": 15522 }, { "epoch": 7.217108321710832, "grad_norm": 1.8164690732955933, "learning_rate": 4.3331383655564046e-05, "loss": 0.054, "step": 15524 }, { "epoch": 7.218038121803812, "grad_norm": 1.1594167947769165, "learning_rate": 4.338469651952541e-05, "loss": 0.0589, "step": 15526 }, { "epoch": 7.218967921896792, "grad_norm": 1.2528913021087646, "learning_rate": 4.343782793395439e-05, "loss": 0.0333, "step": 15528 }, { "epoch": 7.219897721989772, "grad_norm": 1.5467109680175781, "learning_rate": 4.349077737446526e-05, "loss": 0.051, "step": 15530 }, { "epoch": 7.220827522082752, "grad_norm": 1.9024672508239746, "learning_rate": 4.354354431846852e-05, "loss": 0.0635, "step": 15532 }, { "epoch": 7.2217573221757325, "grad_norm": 1.1880786418914795, "learning_rate": 4.359612824517568e-05, "loss": 0.0448, "step": 15534 }, { "epoch": 7.222687122268712, "grad_norm": 1.687477707862854, "learning_rate": 4.364852863560464e-05, "loss": 0.0529, "step": 15536 }, { "epoch": 7.223616922361693, "grad_norm": 1.843959093093872, "learning_rate": 4.370074497258462e-05, "loss": 0.0544, "step": 15538 }, { "epoch": 7.224546722454672, "grad_norm": 1.8440910577774048, "learning_rate": 4.375277674076151e-05, "loss": 0.0781, "step": 15540 }, { "epoch": 7.225476522547652, "grad_norm": 1.752593994140625, "learning_rate": 4.3804623426602825e-05, "loss": 0.0648, "step": 15542 }, { "epoch": 7.226406322640632, "grad_norm": 1.722840428352356, "learning_rate": 4.3856284518402614e-05, "loss": 0.072, "step": 15544 }, { "epoch": 7.227336122733612, "grad_norm": 1.425978183746338, "learning_rate": 4.390775950628684e-05, "loss": 0.0686, "step": 15546 }, { "epoch": 7.228265922826592, "grad_norm": 2.139108657836914, "learning_rate": 4.395904788221812e-05, "loss": 0.0647, "step": 15548 }, { "epoch": 7.229195722919572, "grad_norm": 1.4698009490966797, "learning_rate": 4.401014914000082e-05, "loss": 0.0555, "step": 15550 }, { "epoch": 7.2301255230125525, "grad_norm": 2.4483816623687744, "learning_rate": 4.406106277528627e-05, "loss": 0.1073, "step": 15552 }, { "epoch": 7.231055323105532, "grad_norm": 1.5742830038070679, "learning_rate": 4.411178828557733e-05, "loss": 0.0574, "step": 15554 }, { "epoch": 7.231985123198513, "grad_norm": 2.0027191638946533, "learning_rate": 4.416232517023381e-05, "loss": 0.0525, "step": 15556 }, { "epoch": 7.232914923291492, "grad_norm": 1.5884162187576294, "learning_rate": 4.421267293047695e-05, "loss": 0.0582, "step": 15558 }, { "epoch": 7.233844723384473, "grad_norm": 1.4270153045654297, "learning_rate": 4.426283106939473e-05, "loss": 0.0809, "step": 15560 }, { "epoch": 7.234774523477452, "grad_norm": 1.4295445680618286, "learning_rate": 4.43127990919467e-05, "loss": 0.0597, "step": 15562 }, { "epoch": 7.235704323570433, "grad_norm": 1.5521985292434692, "learning_rate": 4.43625765049684e-05, "loss": 0.0775, "step": 15564 }, { "epoch": 7.236634123663412, "grad_norm": 1.8079779148101807, "learning_rate": 4.441216281717703e-05, "loss": 0.0442, "step": 15566 }, { "epoch": 7.237563923756392, "grad_norm": 1.9198503494262695, "learning_rate": 4.446155753917563e-05, "loss": 0.0791, "step": 15568 }, { "epoch": 7.2384937238493725, "grad_norm": 2.074991464614868, "learning_rate": 4.4510760183458254e-05, "loss": 0.0436, "step": 15570 }, { "epoch": 7.239423523942352, "grad_norm": 0.9392071962356567, "learning_rate": 4.4559770264414736e-05, "loss": 0.0232, "step": 15572 }, { "epoch": 7.240353324035333, "grad_norm": 1.4381376504898071, "learning_rate": 4.4608587298335246e-05, "loss": 0.0568, "step": 15574 }, { "epoch": 7.241283124128312, "grad_norm": 1.5873843431472778, "learning_rate": 4.465721080341555e-05, "loss": 0.0655, "step": 15576 }, { "epoch": 7.242212924221293, "grad_norm": 1.023293137550354, "learning_rate": 4.4705640299761054e-05, "loss": 0.0448, "step": 15578 }, { "epoch": 7.243142724314272, "grad_norm": 1.953959345817566, "learning_rate": 4.475387530939234e-05, "loss": 0.0669, "step": 15580 }, { "epoch": 7.244072524407253, "grad_norm": 1.6211251020431519, "learning_rate": 4.480191535624922e-05, "loss": 0.0669, "step": 15582 }, { "epoch": 7.245002324500232, "grad_norm": 1.6443856954574585, "learning_rate": 4.484975996619589e-05, "loss": 0.0341, "step": 15584 }, { "epoch": 7.245932124593212, "grad_norm": 2.197366237640381, "learning_rate": 4.4897408667025425e-05, "loss": 0.0729, "step": 15586 }, { "epoch": 7.2468619246861925, "grad_norm": 0.9704784154891968, "learning_rate": 4.494486098846432e-05, "loss": 0.0695, "step": 15588 }, { "epoch": 7.247791724779172, "grad_norm": 1.1390235424041748, "learning_rate": 4.4992116462177344e-05, "loss": 0.0526, "step": 15590 }, { "epoch": 7.248721524872153, "grad_norm": 2.4930624961853027, "learning_rate": 4.503917462177198e-05, "loss": 0.0694, "step": 15592 }, { "epoch": 7.249651324965132, "grad_norm": 1.233207106590271, "learning_rate": 4.5086035002803216e-05, "loss": 0.0429, "step": 15594 }, { "epoch": 7.250581125058113, "grad_norm": 1.5853798389434814, "learning_rate": 4.5132697142778104e-05, "loss": 0.0508, "step": 15596 }, { "epoch": 7.251510925151092, "grad_norm": 1.7127418518066406, "learning_rate": 4.517916058116003e-05, "loss": 0.0587, "step": 15598 }, { "epoch": 7.252440725244073, "grad_norm": 1.530614972114563, "learning_rate": 4.5225424859373744e-05, "loss": 0.0528, "step": 15600 }, { "epoch": 7.253370525337052, "grad_norm": 2.6946682929992676, "learning_rate": 4.527148952080942e-05, "loss": 0.0481, "step": 15602 }, { "epoch": 7.254300325430033, "grad_norm": 1.5792434215545654, "learning_rate": 4.53173541108274e-05, "loss": 0.0538, "step": 15604 }, { "epoch": 7.2552301255230125, "grad_norm": 1.417999505996704, "learning_rate": 4.536301817676281e-05, "loss": 0.0629, "step": 15606 }, { "epoch": 7.256159925615993, "grad_norm": 1.5195927619934082, "learning_rate": 4.5408481267929646e-05, "loss": 0.0667, "step": 15608 }, { "epoch": 7.257089725708973, "grad_norm": 1.6381853818893433, "learning_rate": 4.545374293562566e-05, "loss": 0.0848, "step": 15610 }, { "epoch": 7.258019525801952, "grad_norm": 2.963345766067505, "learning_rate": 4.549880273313636e-05, "loss": 0.1283, "step": 15612 }, { "epoch": 7.258949325894933, "grad_norm": 2.2125298976898193, "learning_rate": 4.554366021573978e-05, "loss": 0.0678, "step": 15614 }, { "epoch": 7.259879125987912, "grad_norm": 2.831714153289795, "learning_rate": 4.5588314940710776e-05, "loss": 0.0819, "step": 15616 }, { "epoch": 7.260808926080893, "grad_norm": 1.9731754064559937, "learning_rate": 4.5632766467325046e-05, "loss": 0.0646, "step": 15618 }, { "epoch": 7.261738726173872, "grad_norm": 1.7012419700622559, "learning_rate": 4.567701435686413e-05, "loss": 0.0601, "step": 15620 }, { "epoch": 7.262668526266853, "grad_norm": 1.671277403831482, "learning_rate": 4.5721058172619104e-05, "loss": 0.0712, "step": 15622 }, { "epoch": 7.2635983263598325, "grad_norm": 1.6435049772262573, "learning_rate": 4.57648974798954e-05, "loss": 0.0478, "step": 15624 }, { "epoch": 7.264528126452813, "grad_norm": 3.307614803314209, "learning_rate": 4.580853184601665e-05, "loss": 0.1212, "step": 15626 }, { "epoch": 7.265457926545793, "grad_norm": 1.975287675857544, "learning_rate": 4.5851960840329316e-05, "loss": 0.0856, "step": 15628 }, { "epoch": 7.266387726638772, "grad_norm": 1.5024096965789795, "learning_rate": 4.589518403420687e-05, "loss": 0.0523, "step": 15630 }, { "epoch": 7.267317526731753, "grad_norm": 1.615908145904541, "learning_rate": 4.5938201001053624e-05, "loss": 0.069, "step": 15632 }, { "epoch": 7.268247326824732, "grad_norm": 1.5487250089645386, "learning_rate": 4.598101131630965e-05, "loss": 0.0758, "step": 15634 }, { "epoch": 7.269177126917713, "grad_norm": 2.2062225341796875, "learning_rate": 4.602361455745431e-05, "loss": 0.0894, "step": 15636 }, { "epoch": 7.270106927010692, "grad_norm": 0.8288989067077637, "learning_rate": 4.606601030401087e-05, "loss": 0.0487, "step": 15638 }, { "epoch": 7.271036727103673, "grad_norm": 2.051623821258545, "learning_rate": 4.610819813755047e-05, "loss": 0.0482, "step": 15640 }, { "epoch": 7.2719665271966525, "grad_norm": 1.6469810009002686, "learning_rate": 4.6150177641696116e-05, "loss": 0.0705, "step": 15642 }, { "epoch": 7.272896327289633, "grad_norm": 1.7711143493652344, "learning_rate": 4.61919484021272e-05, "loss": 0.0481, "step": 15644 }, { "epoch": 7.273826127382613, "grad_norm": 1.6403473615646362, "learning_rate": 4.6233510006583016e-05, "loss": 0.0606, "step": 15646 }, { "epoch": 7.274755927475593, "grad_norm": 1.634597897529602, "learning_rate": 4.627486204486738e-05, "loss": 0.0752, "step": 15648 }, { "epoch": 7.275685727568573, "grad_norm": 2.31192946434021, "learning_rate": 4.631600410885241e-05, "loss": 0.0674, "step": 15650 }, { "epoch": 7.276615527661553, "grad_norm": 1.7224054336547852, "learning_rate": 4.635693579248245e-05, "loss": 0.0671, "step": 15652 }, { "epoch": 7.277545327754533, "grad_norm": 2.8439571857452393, "learning_rate": 4.639765669177841e-05, "loss": 0.0751, "step": 15654 }, { "epoch": 7.278475127847512, "grad_norm": 2.4069595336914062, "learning_rate": 4.643816640484137e-05, "loss": 0.0684, "step": 15656 }, { "epoch": 7.279404927940493, "grad_norm": 3.1482434272766113, "learning_rate": 4.64784645318569e-05, "loss": 0.0749, "step": 15658 }, { "epoch": 7.2803347280334725, "grad_norm": 1.5399751663208008, "learning_rate": 4.6518550675098705e-05, "loss": 0.0713, "step": 15660 }, { "epoch": 7.281264528126453, "grad_norm": 1.8759145736694336, "learning_rate": 4.6558424438932694e-05, "loss": 0.084, "step": 15662 }, { "epoch": 7.282194328219433, "grad_norm": 1.1096101999282837, "learning_rate": 4.6598085429821e-05, "loss": 0.0509, "step": 15664 }, { "epoch": 7.283124128312413, "grad_norm": 1.903733491897583, "learning_rate": 4.6637533256325565e-05, "loss": 0.0618, "step": 15666 }, { "epoch": 7.284053928405393, "grad_norm": 1.954447627067566, "learning_rate": 4.667676752911236e-05, "loss": 0.0873, "step": 15668 }, { "epoch": 7.284983728498373, "grad_norm": 2.598311185836792, "learning_rate": 4.6715787860954905e-05, "loss": 0.0666, "step": 15670 }, { "epoch": 7.285913528591353, "grad_norm": 1.927505373954773, "learning_rate": 4.6754593866738236e-05, "loss": 0.0664, "step": 15672 }, { "epoch": 7.286843328684333, "grad_norm": 2.2021148204803467, "learning_rate": 4.679318516346283e-05, "loss": 0.0764, "step": 15674 }, { "epoch": 7.287773128777313, "grad_norm": 1.0046905279159546, "learning_rate": 4.683156137024808e-05, "loss": 0.0397, "step": 15676 }, { "epoch": 7.2887029288702925, "grad_norm": 1.450103759765625, "learning_rate": 4.686972210833642e-05, "loss": 0.0694, "step": 15678 }, { "epoch": 7.289632728963273, "grad_norm": 2.2029855251312256, "learning_rate": 4.6907667001096665e-05, "loss": 0.0647, "step": 15680 }, { "epoch": 7.290562529056253, "grad_norm": 1.9573330879211426, "learning_rate": 4.69453956740281e-05, "loss": 0.0447, "step": 15682 }, { "epoch": 7.291492329149233, "grad_norm": 1.3621543645858765, "learning_rate": 4.6982907754764035e-05, "loss": 0.0543, "step": 15684 }, { "epoch": 7.292422129242213, "grad_norm": 1.9740885496139526, "learning_rate": 4.7020202873075185e-05, "loss": 0.0733, "step": 15686 }, { "epoch": 7.293351929335193, "grad_norm": 1.2352054119110107, "learning_rate": 4.705728066087395e-05, "loss": 0.0451, "step": 15688 }, { "epoch": 7.294281729428173, "grad_norm": 3.100851058959961, "learning_rate": 4.7094140752217424e-05, "loss": 0.0848, "step": 15690 }, { "epoch": 7.295211529521153, "grad_norm": 1.3548519611358643, "learning_rate": 4.713078278331145e-05, "loss": 0.0511, "step": 15692 }, { "epoch": 7.296141329614133, "grad_norm": 2.5410027503967285, "learning_rate": 4.7167206392513985e-05, "loss": 0.0571, "step": 15694 }, { "epoch": 7.297071129707113, "grad_norm": 1.8732779026031494, "learning_rate": 4.7203411220338666e-05, "loss": 0.0807, "step": 15696 }, { "epoch": 7.298000929800093, "grad_norm": 1.779232144355774, "learning_rate": 4.723939690945857e-05, "loss": 0.0534, "step": 15698 }, { "epoch": 7.298930729893073, "grad_norm": 1.8808848857879639, "learning_rate": 4.727516310470929e-05, "loss": 0.0706, "step": 15700 }, { "epoch": 7.299860529986053, "grad_norm": 1.1082401275634766, "learning_rate": 4.731070945309302e-05, "loss": 0.0378, "step": 15702 }, { "epoch": 7.300790330079033, "grad_norm": 1.5949807167053223, "learning_rate": 4.7346035603781686e-05, "loss": 0.0667, "step": 15704 }, { "epoch": 7.301720130172013, "grad_norm": 1.2202932834625244, "learning_rate": 4.7381141208120364e-05, "loss": 0.0522, "step": 15706 }, { "epoch": 7.302649930264993, "grad_norm": 2.3609418869018555, "learning_rate": 4.741602591963099e-05, "loss": 0.0699, "step": 15708 }, { "epoch": 7.303579730357973, "grad_norm": 2.9002034664154053, "learning_rate": 4.745068939401546e-05, "loss": 0.0781, "step": 15710 }, { "epoch": 7.304509530450953, "grad_norm": 1.7679896354675293, "learning_rate": 4.74851312891594e-05, "loss": 0.0546, "step": 15712 }, { "epoch": 7.305439330543933, "grad_norm": 1.6820629835128784, "learning_rate": 4.751935126513505e-05, "loss": 0.0717, "step": 15714 }, { "epoch": 7.306369130636913, "grad_norm": 2.2055537700653076, "learning_rate": 4.755334898420516e-05, "loss": 0.0873, "step": 15716 }, { "epoch": 7.3072989307298934, "grad_norm": 1.3815414905548096, "learning_rate": 4.758712411082597e-05, "loss": 0.0481, "step": 15718 }, { "epoch": 7.308228730822873, "grad_norm": 1.1195765733718872, "learning_rate": 4.762067631165056e-05, "loss": 0.0565, "step": 15720 }, { "epoch": 7.3091585309158535, "grad_norm": 1.4280650615692139, "learning_rate": 4.7654005255532325e-05, "loss": 0.0543, "step": 15722 }, { "epoch": 7.310088331008833, "grad_norm": 2.086658000946045, "learning_rate": 4.768711061352798e-05, "loss": 0.0637, "step": 15724 }, { "epoch": 7.311018131101813, "grad_norm": 1.064520239830017, "learning_rate": 4.771999205890108e-05, "loss": 0.044, "step": 15726 }, { "epoch": 7.311947931194793, "grad_norm": 1.8760677576065063, "learning_rate": 4.7752649267124994e-05, "loss": 0.0572, "step": 15728 }, { "epoch": 7.312877731287773, "grad_norm": 2.557662010192871, "learning_rate": 4.7785081915886215e-05, "loss": 0.0652, "step": 15730 }, { "epoch": 7.313807531380753, "grad_norm": 2.135657787322998, "learning_rate": 4.7817289685087665e-05, "loss": 0.0656, "step": 15732 }, { "epoch": 7.314737331473733, "grad_norm": 1.4795007705688477, "learning_rate": 4.784927225685161e-05, "loss": 0.0544, "step": 15734 }, { "epoch": 7.315667131566713, "grad_norm": 1.326574444770813, "learning_rate": 4.7881029315522995e-05, "loss": 0.058, "step": 15736 }, { "epoch": 7.316596931659693, "grad_norm": 3.0831778049468994, "learning_rate": 4.791256054767252e-05, "loss": 0.1171, "step": 15738 }, { "epoch": 7.3175267317526735, "grad_norm": 1.6846586465835571, "learning_rate": 4.7943865642099625e-05, "loss": 0.044, "step": 15740 }, { "epoch": 7.318456531845653, "grad_norm": 2.1612377166748047, "learning_rate": 4.797494428983564e-05, "loss": 0.0636, "step": 15742 }, { "epoch": 7.319386331938633, "grad_norm": 2.1609485149383545, "learning_rate": 4.8005796184146856e-05, "loss": 0.0523, "step": 15744 }, { "epoch": 7.320316132031613, "grad_norm": 1.5922387838363647, "learning_rate": 4.8036421020537544e-05, "loss": 0.0593, "step": 15746 }, { "epoch": 7.321245932124593, "grad_norm": 2.3744711875915527, "learning_rate": 4.8066818496752976e-05, "loss": 0.0759, "step": 15748 }, { "epoch": 7.322175732217573, "grad_norm": 1.5418275594711304, "learning_rate": 4.8096988312782255e-05, "loss": 0.0637, "step": 15750 }, { "epoch": 7.323105532310553, "grad_norm": 1.9422681331634521, "learning_rate": 4.8126930170861545e-05, "loss": 0.0755, "step": 15752 }, { "epoch": 7.324035332403533, "grad_norm": 2.6949799060821533, "learning_rate": 4.815664377547677e-05, "loss": 0.0977, "step": 15754 }, { "epoch": 7.324965132496513, "grad_norm": 2.1758389472961426, "learning_rate": 4.818612883336666e-05, "loss": 0.0831, "step": 15756 }, { "epoch": 7.3258949325894935, "grad_norm": 2.5719492435455322, "learning_rate": 4.821538505352554e-05, "loss": 0.0934, "step": 15758 }, { "epoch": 7.326824732682473, "grad_norm": 1.3247780799865723, "learning_rate": 4.824441214720637e-05, "loss": 0.0478, "step": 15760 }, { "epoch": 7.327754532775454, "grad_norm": 1.8516221046447754, "learning_rate": 4.8273209827923486e-05, "loss": 0.0434, "step": 15762 }, { "epoch": 7.328684332868433, "grad_norm": 1.9802266359329224, "learning_rate": 4.830177781145536e-05, "loss": 0.0693, "step": 15764 }, { "epoch": 7.329614132961414, "grad_norm": 2.6216766834259033, "learning_rate": 4.8330115815847594e-05, "loss": 0.0974, "step": 15766 }, { "epoch": 7.330543933054393, "grad_norm": 2.62488055229187, "learning_rate": 4.8358223561415406e-05, "loss": 0.0952, "step": 15768 }, { "epoch": 7.331473733147373, "grad_norm": 2.1680195331573486, "learning_rate": 4.8386100770746776e-05, "loss": 0.074, "step": 15770 }, { "epoch": 7.332403533240353, "grad_norm": 1.8132728338241577, "learning_rate": 4.841374716870493e-05, "loss": 0.0668, "step": 15772 }, { "epoch": 7.333333333333333, "grad_norm": 1.863587737083435, "learning_rate": 4.8441162482431006e-05, "loss": 0.0781, "step": 15774 }, { "epoch": 7.3342631334263135, "grad_norm": 1.6469393968582153, "learning_rate": 4.8468346441346975e-05, "loss": 0.0828, "step": 15776 }, { "epoch": 7.335192933519293, "grad_norm": 1.9906346797943115, "learning_rate": 4.84952987771581e-05, "loss": 0.0546, "step": 15778 }, { "epoch": 7.336122733612274, "grad_norm": 2.0275702476501465, "learning_rate": 4.852201922385575e-05, "loss": 0.0364, "step": 15780 }, { "epoch": 7.337052533705253, "grad_norm": 1.8371875286102295, "learning_rate": 4.8548507517719904e-05, "loss": 0.073, "step": 15782 }, { "epoch": 7.337982333798234, "grad_norm": 2.3348114490509033, "learning_rate": 4.8574763397321736e-05, "loss": 0.0661, "step": 15784 }, { "epoch": 7.338912133891213, "grad_norm": 2.6374306678771973, "learning_rate": 4.860078660352638e-05, "loss": 0.0921, "step": 15786 }, { "epoch": 7.339841933984193, "grad_norm": 2.0529613494873047, "learning_rate": 4.862657687949525e-05, "loss": 0.079, "step": 15788 }, { "epoch": 7.340771734077173, "grad_norm": 2.5458292961120605, "learning_rate": 4.865213397068874e-05, "loss": 0.0824, "step": 15790 }, { "epoch": 7.341701534170153, "grad_norm": 2.0719337463378906, "learning_rate": 4.867745762486873e-05, "loss": 0.067, "step": 15792 }, { "epoch": 7.3426313342631335, "grad_norm": 1.881712555885315, "learning_rate": 4.870254759210093e-05, "loss": 0.0518, "step": 15794 }, { "epoch": 7.343561134356113, "grad_norm": 1.646986722946167, "learning_rate": 4.8727403624757494e-05, "loss": 0.0836, "step": 15796 }, { "epoch": 7.344490934449094, "grad_norm": 1.741188883781433, "learning_rate": 4.875202547751942e-05, "loss": 0.0385, "step": 15798 }, { "epoch": 7.345420734542073, "grad_norm": 1.3656954765319824, "learning_rate": 4.877641290737897e-05, "loss": 0.0448, "step": 15800 }, { "epoch": 7.346350534635054, "grad_norm": 1.3840248584747314, "learning_rate": 4.8800565673642036e-05, "loss": 0.0616, "step": 15802 }, { "epoch": 7.347280334728033, "grad_norm": 3.2519748210906982, "learning_rate": 4.882448353793058e-05, "loss": 0.0882, "step": 15804 }, { "epoch": 7.348210134821014, "grad_norm": 2.6968019008636475, "learning_rate": 4.8848166264184966e-05, "loss": 0.0918, "step": 15806 }, { "epoch": 7.349139934913993, "grad_norm": 2.0067358016967773, "learning_rate": 4.887161361866621e-05, "loss": 0.0687, "step": 15808 }, { "epoch": 7.350069735006974, "grad_norm": 1.6120080947875977, "learning_rate": 4.88948253699584e-05, "loss": 0.0749, "step": 15810 }, { "epoch": 7.3509995350999535, "grad_norm": 1.665618658065796, "learning_rate": 4.89178012889709e-05, "loss": 0.0699, "step": 15812 }, { "epoch": 7.351929335192933, "grad_norm": 1.3553518056869507, "learning_rate": 4.8940541148940677e-05, "loss": 0.0399, "step": 15814 }, { "epoch": 7.352859135285914, "grad_norm": 1.4695632457733154, "learning_rate": 4.8963044725434516e-05, "loss": 0.0602, "step": 15816 }, { "epoch": 7.353788935378893, "grad_norm": 1.1850769519805908, "learning_rate": 4.898531179635118e-05, "loss": 0.0449, "step": 15818 }, { "epoch": 7.354718735471874, "grad_norm": 2.7643909454345703, "learning_rate": 4.9007342141923713e-05, "loss": 0.0837, "step": 15820 }, { "epoch": 7.355648535564853, "grad_norm": 1.342455506324768, "learning_rate": 4.9029135544721444e-05, "loss": 0.0542, "step": 15822 }, { "epoch": 7.356578335657834, "grad_norm": 1.0245879888534546, "learning_rate": 4.9050691789652276e-05, "loss": 0.0927, "step": 15824 }, { "epoch": 7.357508135750813, "grad_norm": 1.505143404006958, "learning_rate": 4.9072010663964836e-05, "loss": 0.0708, "step": 15826 }, { "epoch": 7.358437935843794, "grad_norm": 1.8904078006744385, "learning_rate": 4.909309195725037e-05, "loss": 0.077, "step": 15828 }, { "epoch": 7.3593677359367735, "grad_norm": 1.6469900608062744, "learning_rate": 4.9113935461445084e-05, "loss": 0.0422, "step": 15830 }, { "epoch": 7.360297536029754, "grad_norm": 2.1644198894500732, "learning_rate": 4.913454097083196e-05, "loss": 0.0642, "step": 15832 }, { "epoch": 7.361227336122734, "grad_norm": 1.4008532762527466, "learning_rate": 4.9154908282042965e-05, "loss": 0.0574, "step": 15834 }, { "epoch": 7.362157136215713, "grad_norm": 1.306175708770752, "learning_rate": 4.9175037194061005e-05, "loss": 0.0401, "step": 15836 }, { "epoch": 7.363086936308694, "grad_norm": 1.4575947523117065, "learning_rate": 4.9194927508221746e-05, "loss": 0.0825, "step": 15838 }, { "epoch": 7.364016736401673, "grad_norm": 1.9882323741912842, "learning_rate": 4.9214579028215884e-05, "loss": 0.0804, "step": 15840 }, { "epoch": 7.364946536494654, "grad_norm": 2.042290449142456, "learning_rate": 4.9233991560090825e-05, "loss": 0.0582, "step": 15842 }, { "epoch": 7.365876336587633, "grad_norm": 2.1669259071350098, "learning_rate": 4.9253164912252756e-05, "loss": 0.0506, "step": 15844 }, { "epoch": 7.366806136680614, "grad_norm": 1.2499529123306274, "learning_rate": 4.927209889546837e-05, "loss": 0.06, "step": 15846 }, { "epoch": 7.3677359367735935, "grad_norm": 1.949234962463379, "learning_rate": 4.929079332286693e-05, "loss": 0.0702, "step": 15848 }, { "epoch": 7.368665736866574, "grad_norm": 1.4052027463912964, "learning_rate": 4.930924800994203e-05, "loss": 0.0486, "step": 15850 }, { "epoch": 7.369595536959554, "grad_norm": 2.00125789642334, "learning_rate": 4.932746277455327e-05, "loss": 0.0744, "step": 15852 }, { "epoch": 7.370525337052534, "grad_norm": 1.5805461406707764, "learning_rate": 4.9345437436928344e-05, "loss": 0.0426, "step": 15854 }, { "epoch": 7.371455137145514, "grad_norm": 1.7640104293823242, "learning_rate": 4.936317181966453e-05, "loss": 0.0465, "step": 15856 }, { "epoch": 7.372384937238493, "grad_norm": 1.7542908191680908, "learning_rate": 4.938066574773067e-05, "loss": 0.0872, "step": 15858 }, { "epoch": 7.373314737331474, "grad_norm": 2.320891857147217, "learning_rate": 4.939791904846878e-05, "loss": 0.0881, "step": 15860 }, { "epoch": 7.374244537424453, "grad_norm": 1.7708098888397217, "learning_rate": 4.941493155159571e-05, "loss": 0.0643, "step": 15862 }, { "epoch": 7.375174337517434, "grad_norm": 1.0453953742980957, "learning_rate": 4.9431703089204954e-05, "loss": 0.0715, "step": 15864 }, { "epoch": 7.3761041376104135, "grad_norm": 1.3285695314407349, "learning_rate": 4.944823349576815e-05, "loss": 0.1152, "step": 15866 }, { "epoch": 7.377033937703394, "grad_norm": 1.2881535291671753, "learning_rate": 4.946452260813689e-05, "loss": 0.0459, "step": 15868 }, { "epoch": 7.377963737796374, "grad_norm": 1.822354793548584, "learning_rate": 4.948057026554424e-05, "loss": 0.0678, "step": 15870 }, { "epoch": 7.378893537889354, "grad_norm": 2.12807297706604, "learning_rate": 4.949637630960626e-05, "loss": 0.0586, "step": 15872 }, { "epoch": 7.379823337982334, "grad_norm": 2.173868417739868, "learning_rate": 4.9511940584323706e-05, "loss": 0.0759, "step": 15874 }, { "epoch": 7.380753138075314, "grad_norm": 1.3119373321533203, "learning_rate": 4.952726293608345e-05, "loss": 0.0735, "step": 15876 }, { "epoch": 7.381682938168294, "grad_norm": 1.7180931568145752, "learning_rate": 4.954234321366006e-05, "loss": 0.0692, "step": 15878 }, { "epoch": 7.382612738261274, "grad_norm": 1.445483922958374, "learning_rate": 4.955718126821732e-05, "loss": 0.0515, "step": 15880 }, { "epoch": 7.383542538354254, "grad_norm": 1.7014639377593994, "learning_rate": 4.957177695330956e-05, "loss": 0.0454, "step": 15882 }, { "epoch": 7.3844723384472335, "grad_norm": 3.4025044441223145, "learning_rate": 4.958613012488333e-05, "loss": 0.113, "step": 15884 }, { "epoch": 7.385402138540214, "grad_norm": 2.3699028491973877, "learning_rate": 4.960024064127856e-05, "loss": 0.0632, "step": 15886 }, { "epoch": 7.386331938633194, "grad_norm": 2.544626474380493, "learning_rate": 4.96141083632302e-05, "loss": 0.0872, "step": 15888 }, { "epoch": 7.387261738726174, "grad_norm": 1.7485440969467163, "learning_rate": 4.9627733153869426e-05, "loss": 0.0739, "step": 15890 }, { "epoch": 7.388191538819154, "grad_norm": 3.362858295440674, "learning_rate": 4.9641114878725024e-05, "loss": 0.0852, "step": 15892 }, { "epoch": 7.389121338912134, "grad_norm": 2.3588004112243652, "learning_rate": 4.965425340572479e-05, "loss": 0.0554, "step": 15894 }, { "epoch": 7.390051139005114, "grad_norm": 1.1208128929138184, "learning_rate": 4.966714860519677e-05, "loss": 0.0467, "step": 15896 }, { "epoch": 7.390980939098094, "grad_norm": 1.1727733612060547, "learning_rate": 4.9679800349870555e-05, "loss": 0.047, "step": 15898 }, { "epoch": 7.391910739191074, "grad_norm": 1.844855546951294, "learning_rate": 4.9692208514878505e-05, "loss": 0.0615, "step": 15900 }, { "epoch": 7.3928405392840535, "grad_norm": 1.3307856321334839, "learning_rate": 4.970437297775706e-05, "loss": 0.0902, "step": 15902 }, { "epoch": 7.393770339377034, "grad_norm": 2.3103086948394775, "learning_rate": 4.971629361844792e-05, "loss": 0.0958, "step": 15904 }, { "epoch": 7.394700139470014, "grad_norm": 1.8574343919754028, "learning_rate": 4.97279703192991e-05, "loss": 0.0606, "step": 15906 }, { "epoch": 7.395629939562994, "grad_norm": 1.318162202835083, "learning_rate": 4.973940296506635e-05, "loss": 0.0604, "step": 15908 }, { "epoch": 7.396559739655974, "grad_norm": 1.6411820650100708, "learning_rate": 4.9750591442914016e-05, "loss": 0.0573, "step": 15910 }, { "epoch": 7.397489539748954, "grad_norm": 1.2208491563796997, "learning_rate": 4.9761535642416354e-05, "loss": 0.0767, "step": 15912 }, { "epoch": 7.398419339841934, "grad_norm": 0.5109043121337891, "learning_rate": 4.977223545555855e-05, "loss": 0.0712, "step": 15914 }, { "epoch": 7.399349139934914, "grad_norm": 1.9356937408447266, "learning_rate": 4.978269077673774e-05, "loss": 0.1021, "step": 15916 }, { "epoch": 7.400278940027894, "grad_norm": 1.6950613260269165, "learning_rate": 4.979290150276416e-05, "loss": 0.0822, "step": 15918 }, { "epoch": 7.401208740120874, "grad_norm": 2.29669451713562, "learning_rate": 4.980286753286204e-05, "loss": 0.0689, "step": 15920 }, { "epoch": 7.402138540213854, "grad_norm": 2.8264734745025635, "learning_rate": 4.9812588768670675e-05, "loss": 0.0616, "step": 15922 }, { "epoch": 7.4030683403068345, "grad_norm": 2.0195424556732178, "learning_rate": 4.982206511424543e-05, "loss": 0.0981, "step": 15924 }, { "epoch": 7.403998140399814, "grad_norm": 1.5044161081314087, "learning_rate": 4.983129647605857e-05, "loss": 0.0823, "step": 15926 }, { "epoch": 7.404927940492794, "grad_norm": 3.1714987754821777, "learning_rate": 4.98402827630003e-05, "loss": 0.0948, "step": 15928 }, { "epoch": 7.405857740585774, "grad_norm": 1.546980619430542, "learning_rate": 4.984902388637957e-05, "loss": 0.0686, "step": 15930 }, { "epoch": 7.406787540678754, "grad_norm": 2.336944580078125, "learning_rate": 4.9857519759925075e-05, "loss": 0.0579, "step": 15932 }, { "epoch": 7.407717340771734, "grad_norm": 2.7264175415039062, "learning_rate": 4.986577029978591e-05, "loss": 0.0826, "step": 15934 }, { "epoch": 7.408647140864714, "grad_norm": 2.2935450077056885, "learning_rate": 4.9873775424532596e-05, "loss": 0.0716, "step": 15936 }, { "epoch": 7.409576940957694, "grad_norm": 1.2866604328155518, "learning_rate": 4.988153505515782e-05, "loss": 0.0738, "step": 15938 }, { "epoch": 7.410506741050674, "grad_norm": 2.21242618560791, "learning_rate": 4.98890491150771e-05, "loss": 0.0719, "step": 15940 }, { "epoch": 7.4114365411436545, "grad_norm": 1.950758695602417, "learning_rate": 4.9896317530129744e-05, "loss": 0.086, "step": 15942 }, { "epoch": 7.412366341236634, "grad_norm": 1.9105353355407715, "learning_rate": 4.9903340228579406e-05, "loss": 0.0785, "step": 15944 }, { "epoch": 7.413296141329614, "grad_norm": 1.396453857421875, "learning_rate": 4.991011714111491e-05, "loss": 0.0634, "step": 15946 }, { "epoch": 7.414225941422594, "grad_norm": 2.627300262451172, "learning_rate": 4.991664820085085e-05, "loss": 0.0934, "step": 15948 }, { "epoch": 7.415155741515574, "grad_norm": 2.0776965618133545, "learning_rate": 4.99229333433283e-05, "loss": 0.104, "step": 15950 }, { "epoch": 7.416085541608554, "grad_norm": 2.2719528675079346, "learning_rate": 4.992897250651544e-05, "loss": 0.0746, "step": 15952 }, { "epoch": 7.417015341701534, "grad_norm": 2.3308651447296143, "learning_rate": 4.993476563080819e-05, "loss": 0.0907, "step": 15954 }, { "epoch": 7.417945141794514, "grad_norm": 2.747767210006714, "learning_rate": 4.994031265903073e-05, "loss": 0.1003, "step": 15956 }, { "epoch": 7.418874941887494, "grad_norm": 2.4265382289886475, "learning_rate": 4.994561353643615e-05, "loss": 0.0901, "step": 15958 }, { "epoch": 7.4198047419804745, "grad_norm": 1.3364421129226685, "learning_rate": 4.99506682107069e-05, "loss": 0.0628, "step": 15960 }, { "epoch": 7.420734542073454, "grad_norm": 2.6024529933929443, "learning_rate": 4.995547663195541e-05, "loss": 0.0938, "step": 15962 }, { "epoch": 7.421664342166435, "grad_norm": 1.2083845138549805, "learning_rate": 4.996003875272449e-05, "loss": 0.0617, "step": 15964 }, { "epoch": 7.422594142259414, "grad_norm": 2.0905306339263916, "learning_rate": 4.996435452798786e-05, "loss": 0.0715, "step": 15966 }, { "epoch": 7.423523942352395, "grad_norm": 1.4697099924087524, "learning_rate": 4.9968423915150555e-05, "loss": 0.0796, "step": 15968 }, { "epoch": 7.424453742445374, "grad_norm": 2.245431900024414, "learning_rate": 4.9972246874049356e-05, "loss": 0.0823, "step": 15970 }, { "epoch": 7.425383542538354, "grad_norm": 2.816070556640625, "learning_rate": 4.997582336695323e-05, "loss": 0.0976, "step": 15972 }, { "epoch": 7.426313342631334, "grad_norm": 2.477862596511841, "learning_rate": 4.997915335856361e-05, "loss": 0.0585, "step": 15974 }, { "epoch": 7.427243142724314, "grad_norm": 2.2118489742279053, "learning_rate": 4.998223681601484e-05, "loss": 0.0982, "step": 15976 }, { "epoch": 7.4281729428172945, "grad_norm": 2.4499993324279785, "learning_rate": 4.998507370887443e-05, "loss": 0.0723, "step": 15978 }, { "epoch": 7.429102742910274, "grad_norm": 1.754163146018982, "learning_rate": 4.9987664009143386e-05, "loss": 0.0548, "step": 15980 }, { "epoch": 7.4300325430032546, "grad_norm": 2.0508038997650146, "learning_rate": 4.9990007691256514e-05, "loss": 0.0695, "step": 15982 }, { "epoch": 7.430962343096234, "grad_norm": 1.4129756689071655, "learning_rate": 4.9992104732082604e-05, "loss": 0.0746, "step": 15984 }, { "epoch": 7.431892143189215, "grad_norm": 2.570185899734497, "learning_rate": 4.999395511092471e-05, "loss": 0.0466, "step": 15986 }, { "epoch": 7.432821943282194, "grad_norm": 1.5990344285964966, "learning_rate": 4.9995558809520336e-05, "loss": 0.06, "step": 15988 }, { "epoch": 7.433751743375175, "grad_norm": 1.9283192157745361, "learning_rate": 4.999691581204163e-05, "loss": 0.05, "step": 15990 }, { "epoch": 7.434681543468154, "grad_norm": 2.259960651397705, "learning_rate": 4.999802610509552e-05, "loss": 0.0756, "step": 15992 }, { "epoch": 7.435611343561134, "grad_norm": 1.7821929454803467, "learning_rate": 4.999888967772386e-05, "loss": 0.0494, "step": 15994 }, { "epoch": 7.4365411436541144, "grad_norm": 1.7117700576782227, "learning_rate": 4.9999506521403536e-05, "loss": 0.0612, "step": 15996 }, { "epoch": 7.437470943747094, "grad_norm": 1.2766218185424805, "learning_rate": 4.9999876630046576e-05, "loss": 0.0358, "step": 15998 }, { "epoch": 7.4384007438400745, "grad_norm": 1.0031380653381348, "learning_rate": 5.000000000000013e-05, "loss": 0.0476, "step": 16000 }, { "epoch": 7.4384007438400745, "eval_cer": 0.1717434999675809, "eval_loss": 0.28743648529052734, "eval_runtime": 402.0735, "eval_samples_per_second": 31.571, "eval_steps_per_second": 0.987, "step": 16000 }, { "epoch": 7.439330543933054, "grad_norm": 1.4906249046325684, "learning_rate": 4.999987663004659e-05, "loss": 0.0738, "step": 16002 }, { "epoch": 7.440260344026035, "grad_norm": 2.7574350833892822, "learning_rate": 4.9999506521403556e-05, "loss": 0.1035, "step": 16004 }, { "epoch": 7.441190144119014, "grad_norm": 1.363521695137024, "learning_rate": 4.999888967772388e-05, "loss": 0.0639, "step": 16006 }, { "epoch": 7.442119944211995, "grad_norm": 2.0395891666412354, "learning_rate": 4.999802610509554e-05, "loss": 0.0429, "step": 16008 }, { "epoch": 7.443049744304974, "grad_norm": 2.1918764114379883, "learning_rate": 4.9996915812041654e-05, "loss": 0.1028, "step": 16010 }, { "epoch": 7.443979544397955, "grad_norm": 2.490239381790161, "learning_rate": 4.9995558809520356e-05, "loss": 0.0883, "step": 16012 }, { "epoch": 7.4449093444909344, "grad_norm": 2.432473659515381, "learning_rate": 4.999395511092474e-05, "loss": 0.0916, "step": 16014 }, { "epoch": 7.445839144583914, "grad_norm": 1.2817882299423218, "learning_rate": 4.999210473208263e-05, "loss": 0.0363, "step": 16016 }, { "epoch": 7.4467689446768945, "grad_norm": 1.3805829286575317, "learning_rate": 4.9990007691256534e-05, "loss": 0.0505, "step": 16018 }, { "epoch": 7.447698744769874, "grad_norm": 1.5348154306411743, "learning_rate": 4.9987664009143406e-05, "loss": 0.051, "step": 16020 }, { "epoch": 7.448628544862855, "grad_norm": 1.8339473009109497, "learning_rate": 4.998507370887444e-05, "loss": 0.0588, "step": 16022 }, { "epoch": 7.449558344955834, "grad_norm": 2.0943329334259033, "learning_rate": 4.998223681601486e-05, "loss": 0.0715, "step": 16024 }, { "epoch": 7.450488145048815, "grad_norm": 1.3035175800323486, "learning_rate": 4.997915335856363e-05, "loss": 0.0457, "step": 16026 }, { "epoch": 7.451417945141794, "grad_norm": 1.5184366703033447, "learning_rate": 4.9975823366953246e-05, "loss": 0.0488, "step": 16028 }, { "epoch": 7.452347745234775, "grad_norm": 2.052983522415161, "learning_rate": 4.9972246874049376e-05, "loss": 0.0604, "step": 16030 }, { "epoch": 7.453277545327754, "grad_norm": 2.434978723526001, "learning_rate": 4.9968423915150575e-05, "loss": 0.0829, "step": 16032 }, { "epoch": 7.454207345420735, "grad_norm": 2.0433881282806396, "learning_rate": 4.9964354527987884e-05, "loss": 0.0626, "step": 16034 }, { "epoch": 7.4551371455137145, "grad_norm": 1.857287883758545, "learning_rate": 4.9960038752724516e-05, "loss": 0.0642, "step": 16036 }, { "epoch": 7.456066945606695, "grad_norm": 1.489014983177185, "learning_rate": 4.995547663195544e-05, "loss": 0.0729, "step": 16038 }, { "epoch": 7.456996745699675, "grad_norm": 2.7665023803710938, "learning_rate": 4.9950668210706935e-05, "loss": 0.0723, "step": 16040 }, { "epoch": 7.457926545792654, "grad_norm": 1.8021845817565918, "learning_rate": 4.9945613536436185e-05, "loss": 0.0749, "step": 16042 }, { "epoch": 7.458856345885635, "grad_norm": 0.9800166487693787, "learning_rate": 4.994031265903078e-05, "loss": 0.063, "step": 16044 }, { "epoch": 7.459786145978614, "grad_norm": 1.7579408884048462, "learning_rate": 4.993476563080824e-05, "loss": 0.0553, "step": 16046 }, { "epoch": 7.460715946071595, "grad_norm": 1.730936050415039, "learning_rate": 4.9928972506515503e-05, "loss": 0.0607, "step": 16048 }, { "epoch": 7.461645746164574, "grad_norm": 1.5705456733703613, "learning_rate": 4.992293334332836e-05, "loss": 0.0504, "step": 16050 }, { "epoch": 7.462575546257555, "grad_norm": 1.7296472787857056, "learning_rate": 4.991664820085091e-05, "loss": 0.0658, "step": 16052 }, { "epoch": 7.4635053463505345, "grad_norm": 1.3486418724060059, "learning_rate": 4.991011714111497e-05, "loss": 0.04, "step": 16054 }, { "epoch": 7.464435146443515, "grad_norm": 2.4081876277923584, "learning_rate": 4.990334022857946e-05, "loss": 0.0994, "step": 16056 }, { "epoch": 7.465364946536495, "grad_norm": 2.120098829269409, "learning_rate": 4.989631753012979e-05, "loss": 0.0663, "step": 16058 }, { "epoch": 7.466294746629474, "grad_norm": 2.826887607574463, "learning_rate": 4.988904911507715e-05, "loss": 0.0867, "step": 16060 }, { "epoch": 7.467224546722455, "grad_norm": 2.9921488761901855, "learning_rate": 4.988153505515786e-05, "loss": 0.0493, "step": 16062 }, { "epoch": 7.468154346815434, "grad_norm": 1.1323472261428833, "learning_rate": 4.987377542453265e-05, "loss": 0.0688, "step": 16064 }, { "epoch": 7.469084146908415, "grad_norm": 2.128364086151123, "learning_rate": 4.986577029978596e-05, "loss": 0.0835, "step": 16066 }, { "epoch": 7.470013947001394, "grad_norm": 2.354698419570923, "learning_rate": 4.9857519759925116e-05, "loss": 0.085, "step": 16068 }, { "epoch": 7.470943747094375, "grad_norm": 1.824855923652649, "learning_rate": 4.9849023886379626e-05, "loss": 0.0749, "step": 16070 }, { "epoch": 7.4718735471873545, "grad_norm": 1.8312492370605469, "learning_rate": 4.984028276300035e-05, "loss": 0.066, "step": 16072 }, { "epoch": 7.472803347280335, "grad_norm": 1.0040462017059326, "learning_rate": 4.9831296476058626e-05, "loss": 0.0487, "step": 16074 }, { "epoch": 7.473733147373315, "grad_norm": 1.9227725267410278, "learning_rate": 4.982206511424549e-05, "loss": 0.0433, "step": 16076 }, { "epoch": 7.474662947466295, "grad_norm": 2.3561642169952393, "learning_rate": 4.981258876867075e-05, "loss": 0.1053, "step": 16078 }, { "epoch": 7.475592747559275, "grad_norm": 2.155759334564209, "learning_rate": 4.980286753286209e-05, "loss": 0.0989, "step": 16080 }, { "epoch": 7.476522547652255, "grad_norm": 3.135009765625, "learning_rate": 4.979290150276422e-05, "loss": 0.118, "step": 16082 }, { "epoch": 7.477452347745235, "grad_norm": 2.2873594760894775, "learning_rate": 4.97826907767378e-05, "loss": 0.0843, "step": 16084 }, { "epoch": 7.478382147838214, "grad_norm": 1.3752391338348389, "learning_rate": 4.977223545555861e-05, "loss": 0.0578, "step": 16086 }, { "epoch": 7.479311947931195, "grad_norm": 1.9049627780914307, "learning_rate": 4.976153564241643e-05, "loss": 0.0749, "step": 16088 }, { "epoch": 7.4802417480241745, "grad_norm": 2.415687322616577, "learning_rate": 4.975059144291408e-05, "loss": 0.0637, "step": 16090 }, { "epoch": 7.481171548117155, "grad_norm": 2.1130120754241943, "learning_rate": 4.973940296506642e-05, "loss": 0.0948, "step": 16092 }, { "epoch": 7.482101348210135, "grad_norm": 1.0676159858703613, "learning_rate": 4.9727970319299193e-05, "loss": 0.0422, "step": 16094 }, { "epoch": 7.483031148303115, "grad_norm": 2.0330865383148193, "learning_rate": 4.971629361844799e-05, "loss": 0.073, "step": 16096 }, { "epoch": 7.483960948396095, "grad_norm": 2.3310694694519043, "learning_rate": 4.9704372977757156e-05, "loss": 0.0975, "step": 16098 }, { "epoch": 7.484890748489075, "grad_norm": 1.9494973421096802, "learning_rate": 4.9692208514878586e-05, "loss": 0.0641, "step": 16100 }, { "epoch": 7.485820548582055, "grad_norm": 2.370861768722534, "learning_rate": 4.967980034987063e-05, "loss": 0.0889, "step": 16102 }, { "epoch": 7.486750348675034, "grad_norm": 3.400993824005127, "learning_rate": 4.966714860519685e-05, "loss": 0.1003, "step": 16104 }, { "epoch": 7.487680148768015, "grad_norm": 2.742769718170166, "learning_rate": 4.965425340572488e-05, "loss": 0.0911, "step": 16106 }, { "epoch": 7.4886099488609945, "grad_norm": 1.9762648344039917, "learning_rate": 4.96411148787251e-05, "loss": 0.1196, "step": 16108 }, { "epoch": 7.489539748953975, "grad_norm": 1.7091255187988281, "learning_rate": 4.962773315386949e-05, "loss": 0.055, "step": 16110 }, { "epoch": 7.490469549046955, "grad_norm": 2.3404018878936768, "learning_rate": 4.961410836323028e-05, "loss": 0.1219, "step": 16112 }, { "epoch": 7.491399349139935, "grad_norm": 2.237271785736084, "learning_rate": 4.960024064127863e-05, "loss": 0.0949, "step": 16114 }, { "epoch": 7.492329149232915, "grad_norm": 2.7617549896240234, "learning_rate": 4.958613012488339e-05, "loss": 0.0992, "step": 16116 }, { "epoch": 7.493258949325895, "grad_norm": 1.6853536367416382, "learning_rate": 4.957177695330962e-05, "loss": 0.0606, "step": 16118 }, { "epoch": 7.494188749418875, "grad_norm": 1.521432638168335, "learning_rate": 4.9557181268217376e-05, "loss": 0.0578, "step": 16120 }, { "epoch": 7.495118549511855, "grad_norm": 2.446108341217041, "learning_rate": 4.9542343213660144e-05, "loss": 0.0734, "step": 16122 }, { "epoch": 7.496048349604835, "grad_norm": 2.723189115524292, "learning_rate": 4.952726293608351e-05, "loss": 0.0838, "step": 16124 }, { "epoch": 7.496978149697815, "grad_norm": 1.1054598093032837, "learning_rate": 4.951194058432377e-05, "loss": 0.0459, "step": 16126 }, { "epoch": 7.497907949790795, "grad_norm": 1.7318832874298096, "learning_rate": 4.949637630960633e-05, "loss": 0.0881, "step": 16128 }, { "epoch": 7.498837749883775, "grad_norm": 2.2289159297943115, "learning_rate": 4.948057026554431e-05, "loss": 0.0647, "step": 16130 }, { "epoch": 7.499767549976755, "grad_norm": 2.116347551345825, "learning_rate": 4.9464522608136975e-05, "loss": 0.0619, "step": 16132 }, { "epoch": 7.500697350069735, "grad_norm": 2.229219436645508, "learning_rate": 4.944823349576822e-05, "loss": 0.0598, "step": 16134 }, { "epoch": 7.501627150162715, "grad_norm": 2.2202398777008057, "learning_rate": 4.9431703089205015e-05, "loss": 0.098, "step": 16136 }, { "epoch": 7.502556950255695, "grad_norm": 2.555926561355591, "learning_rate": 4.941493155159577e-05, "loss": 0.0942, "step": 16138 }, { "epoch": 7.503486750348675, "grad_norm": 2.2733404636383057, "learning_rate": 4.939791904846885e-05, "loss": 0.0902, "step": 16140 }, { "epoch": 7.504416550441655, "grad_norm": 2.4594829082489014, "learning_rate": 4.9380665747730766e-05, "loss": 0.1053, "step": 16142 }, { "epoch": 7.505346350534635, "grad_norm": 2.523994207382202, "learning_rate": 4.93631718196646e-05, "loss": 0.0744, "step": 16144 }, { "epoch": 7.506276150627615, "grad_norm": 3.5451483726501465, "learning_rate": 4.9345437436928405e-05, "loss": 0.0992, "step": 16146 }, { "epoch": 7.507205950720595, "grad_norm": 1.544278860092163, "learning_rate": 4.932746277455334e-05, "loss": 0.0564, "step": 16148 }, { "epoch": 7.508135750813575, "grad_norm": 1.9618622064590454, "learning_rate": 4.9309248009942104e-05, "loss": 0.0915, "step": 16150 }, { "epoch": 7.509065550906556, "grad_norm": 1.7924060821533203, "learning_rate": 4.9290793322867004e-05, "loss": 0.0852, "step": 16152 }, { "epoch": 7.509995350999535, "grad_norm": 1.8266124725341797, "learning_rate": 4.927209889546844e-05, "loss": 0.0784, "step": 16154 }, { "epoch": 7.510925151092515, "grad_norm": 1.3631643056869507, "learning_rate": 4.925316491225282e-05, "loss": 0.0553, "step": 16156 }, { "epoch": 7.511854951185495, "grad_norm": 2.2412819862365723, "learning_rate": 4.923399156009089e-05, "loss": 0.0687, "step": 16158 }, { "epoch": 7.512784751278475, "grad_norm": 2.266382932662964, "learning_rate": 4.921457902821595e-05, "loss": 0.0723, "step": 16160 }, { "epoch": 7.513714551371455, "grad_norm": 1.629154920578003, "learning_rate": 4.9194927508221794e-05, "loss": 0.0463, "step": 16162 }, { "epoch": 7.514644351464435, "grad_norm": 2.259033679962158, "learning_rate": 4.917503719406102e-05, "loss": 0.0852, "step": 16164 }, { "epoch": 7.5155741515574155, "grad_norm": 2.2209243774414062, "learning_rate": 4.915490828204302e-05, "loss": 0.0859, "step": 16166 }, { "epoch": 7.516503951650395, "grad_norm": 2.400188684463501, "learning_rate": 4.913454097083199e-05, "loss": 0.0848, "step": 16168 }, { "epoch": 7.517433751743376, "grad_norm": 1.7679874897003174, "learning_rate": 4.911393546144511e-05, "loss": 0.0929, "step": 16170 }, { "epoch": 7.518363551836355, "grad_norm": 1.5733165740966797, "learning_rate": 4.9093091957250394e-05, "loss": 0.043, "step": 16172 }, { "epoch": 7.519293351929335, "grad_norm": 1.2762701511383057, "learning_rate": 4.907201066396485e-05, "loss": 0.055, "step": 16174 }, { "epoch": 7.520223152022315, "grad_norm": 2.2822530269622803, "learning_rate": 4.905069178965232e-05, "loss": 0.0755, "step": 16176 }, { "epoch": 7.521152952115295, "grad_norm": 2.099620819091797, "learning_rate": 4.902913554472145e-05, "loss": 0.0696, "step": 16178 }, { "epoch": 7.522082752208275, "grad_norm": 2.243650197982788, "learning_rate": 4.900734214192375e-05, "loss": 0.1046, "step": 16180 }, { "epoch": 7.523012552301255, "grad_norm": 1.928752064704895, "learning_rate": 4.898531179635123e-05, "loss": 0.0584, "step": 16182 }, { "epoch": 7.5239423523942355, "grad_norm": 2.295762777328491, "learning_rate": 4.896304472543458e-05, "loss": 0.1011, "step": 16184 }, { "epoch": 7.524872152487215, "grad_norm": 2.1789419651031494, "learning_rate": 4.894054114894074e-05, "loss": 0.0755, "step": 16186 }, { "epoch": 7.525801952580196, "grad_norm": 1.2507808208465576, "learning_rate": 4.891780128897094e-05, "loss": 0.0558, "step": 16188 }, { "epoch": 7.526731752673175, "grad_norm": 1.5301766395568848, "learning_rate": 4.8894825369958445e-05, "loss": 0.0669, "step": 16190 }, { "epoch": 7.527661552766155, "grad_norm": 1.6688482761383057, "learning_rate": 4.8871613618666234e-05, "loss": 0.0586, "step": 16192 }, { "epoch": 7.528591352859135, "grad_norm": 2.346849203109741, "learning_rate": 4.8848166264185014e-05, "loss": 0.1027, "step": 16194 }, { "epoch": 7.529521152952116, "grad_norm": 2.628941059112549, "learning_rate": 4.882448353793063e-05, "loss": 0.0894, "step": 16196 }, { "epoch": 7.530450953045095, "grad_norm": 1.5968211889266968, "learning_rate": 4.880056567364209e-05, "loss": 0.0757, "step": 16198 }, { "epoch": 7.531380753138075, "grad_norm": 2.141247034072876, "learning_rate": 4.8776412907379026e-05, "loss": 0.0696, "step": 16200 }, { "epoch": 7.5323105532310555, "grad_norm": 1.9958629608154297, "learning_rate": 4.875202547751946e-05, "loss": 0.088, "step": 16202 }, { "epoch": 7.533240353324035, "grad_norm": 1.4806060791015625, "learning_rate": 4.8727403624757555e-05, "loss": 0.0633, "step": 16204 }, { "epoch": 7.534170153417016, "grad_norm": 2.235520124435425, "learning_rate": 4.870254759210095e-05, "loss": 0.0775, "step": 16206 }, { "epoch": 7.535099953509995, "grad_norm": 1.5590993165969849, "learning_rate": 4.867745762486877e-05, "loss": 0.0482, "step": 16208 }, { "epoch": 7.536029753602976, "grad_norm": 1.9286705255508423, "learning_rate": 4.865213397068881e-05, "loss": 0.1008, "step": 16210 }, { "epoch": 7.536959553695955, "grad_norm": 2.2457454204559326, "learning_rate": 4.862657687949529e-05, "loss": 0.0667, "step": 16212 }, { "epoch": 7.537889353788936, "grad_norm": 1.6524841785430908, "learning_rate": 4.860078660352643e-05, "loss": 0.0597, "step": 16214 }, { "epoch": 7.538819153881915, "grad_norm": 2.021156072616577, "learning_rate": 4.857476339732179e-05, "loss": 0.047, "step": 16216 }, { "epoch": 7.539748953974895, "grad_norm": 1.4370654821395874, "learning_rate": 4.854850751771996e-05, "loss": 0.0591, "step": 16218 }, { "epoch": 7.5406787540678755, "grad_norm": 1.252434253692627, "learning_rate": 4.8522019223855825e-05, "loss": 0.0711, "step": 16220 }, { "epoch": 7.541608554160855, "grad_norm": 2.103041648864746, "learning_rate": 4.849529877715816e-05, "loss": 0.0557, "step": 16222 }, { "epoch": 7.542538354253836, "grad_norm": 1.579787015914917, "learning_rate": 4.8468346441347036e-05, "loss": 0.043, "step": 16224 }, { "epoch": 7.543468154346815, "grad_norm": 1.3254371881484985, "learning_rate": 4.844116248243105e-05, "loss": 0.049, "step": 16226 }, { "epoch": 7.544397954439796, "grad_norm": 3.0259525775909424, "learning_rate": 4.841374716870499e-05, "loss": 0.0827, "step": 16228 }, { "epoch": 7.545327754532775, "grad_norm": 1.8920683860778809, "learning_rate": 4.8386100770746884e-05, "loss": 0.0991, "step": 16230 }, { "epoch": 7.546257554625756, "grad_norm": 1.3139240741729736, "learning_rate": 4.835822356141549e-05, "loss": 0.0544, "step": 16232 }, { "epoch": 7.547187354718735, "grad_norm": 1.9607921838760376, "learning_rate": 4.8330115815847635e-05, "loss": 0.0886, "step": 16234 }, { "epoch": 7.548117154811716, "grad_norm": 1.7599294185638428, "learning_rate": 4.8301777811455425e-05, "loss": 0.0774, "step": 16236 }, { "epoch": 7.5490469549046955, "grad_norm": 0.9095650315284729, "learning_rate": 4.8273209827923554e-05, "loss": 0.043, "step": 16238 }, { "epoch": 7.549976754997676, "grad_norm": 2.0337936878204346, "learning_rate": 4.824441214720644e-05, "loss": 0.0674, "step": 16240 }, { "epoch": 7.550906555090656, "grad_norm": 1.457047462463379, "learning_rate": 4.821538505352561e-05, "loss": 0.0671, "step": 16242 }, { "epoch": 7.551836355183635, "grad_norm": 2.3687257766723633, "learning_rate": 4.818612883336672e-05, "loss": 0.0752, "step": 16244 }, { "epoch": 7.552766155276616, "grad_norm": 1.6887630224227905, "learning_rate": 4.815664377547681e-05, "loss": 0.037, "step": 16246 }, { "epoch": 7.553695955369595, "grad_norm": 1.7166166305541992, "learning_rate": 4.812693017086161e-05, "loss": 0.0678, "step": 16248 }, { "epoch": 7.554625755462576, "grad_norm": 2.476872205734253, "learning_rate": 4.8096988312782316e-05, "loss": 0.0932, "step": 16250 }, { "epoch": 7.555555555555555, "grad_norm": 2.590996742248535, "learning_rate": 4.806681849675304e-05, "loss": 0.0764, "step": 16252 }, { "epoch": 7.556485355648536, "grad_norm": 1.646662950515747, "learning_rate": 4.8036421020537646e-05, "loss": 0.0555, "step": 16254 }, { "epoch": 7.5574151557415155, "grad_norm": 2.1746420860290527, "learning_rate": 4.8005796184146924e-05, "loss": 0.0619, "step": 16256 }, { "epoch": 7.558344955834496, "grad_norm": 3.275632381439209, "learning_rate": 4.7974944289835715e-05, "loss": 0.1128, "step": 16258 }, { "epoch": 7.5592747559274756, "grad_norm": 1.736100196838379, "learning_rate": 4.7943865642099666e-05, "loss": 0.0657, "step": 16260 }, { "epoch": 7.560204556020455, "grad_norm": 2.501912832260132, "learning_rate": 4.79125605476726e-05, "loss": 0.0683, "step": 16262 }, { "epoch": 7.561134356113436, "grad_norm": 2.1995153427124023, "learning_rate": 4.788102931552312e-05, "loss": 0.0686, "step": 16264 }, { "epoch": 7.562064156206415, "grad_norm": 1.6884323358535767, "learning_rate": 4.784927225685169e-05, "loss": 0.0746, "step": 16266 }, { "epoch": 7.562993956299396, "grad_norm": 1.2659306526184082, "learning_rate": 4.781728968508775e-05, "loss": 0.0388, "step": 16268 }, { "epoch": 7.563923756392375, "grad_norm": 2.1301591396331787, "learning_rate": 4.7785081915886296e-05, "loss": 0.0594, "step": 16270 }, { "epoch": 7.564853556485356, "grad_norm": 1.8077845573425293, "learning_rate": 4.7752649267125076e-05, "loss": 0.0689, "step": 16272 }, { "epoch": 7.5657833565783355, "grad_norm": 1.509114384651184, "learning_rate": 4.771999205890117e-05, "loss": 0.0544, "step": 16274 }, { "epoch": 7.566713156671316, "grad_norm": 1.5668935775756836, "learning_rate": 4.7687110613528075e-05, "loss": 0.061, "step": 16276 }, { "epoch": 7.5676429567642955, "grad_norm": 2.0586655139923096, "learning_rate": 4.7654005255532413e-05, "loss": 0.0696, "step": 16278 }, { "epoch": 7.568572756857276, "grad_norm": 2.1231415271759033, "learning_rate": 4.762067631165064e-05, "loss": 0.0615, "step": 16280 }, { "epoch": 7.569502556950256, "grad_norm": 0.6332096457481384, "learning_rate": 4.7587124110826044e-05, "loss": 0.0489, "step": 16282 }, { "epoch": 7.570432357043236, "grad_norm": 1.8988428115844727, "learning_rate": 4.7553348984205226e-05, "loss": 0.0445, "step": 16284 }, { "epoch": 7.571362157136216, "grad_norm": 1.4148528575897217, "learning_rate": 4.751935126513513e-05, "loss": 0.0519, "step": 16286 }, { "epoch": 7.572291957229195, "grad_norm": 2.6898903846740723, "learning_rate": 4.748513128915944e-05, "loss": 0.0625, "step": 16288 }, { "epoch": 7.573221757322176, "grad_norm": 3.6674559116363525, "learning_rate": 4.7450689394015535e-05, "loss": 0.0824, "step": 16290 }, { "epoch": 7.5741515574151554, "grad_norm": 1.761204719543457, "learning_rate": 4.741602591963107e-05, "loss": 0.0814, "step": 16292 }, { "epoch": 7.575081357508136, "grad_norm": 2.0871195793151855, "learning_rate": 4.738114120812044e-05, "loss": 0.0504, "step": 16294 }, { "epoch": 7.5760111576011155, "grad_norm": 1.9457606077194214, "learning_rate": 4.734603560378177e-05, "loss": 0.0551, "step": 16296 }, { "epoch": 7.576940957694096, "grad_norm": 1.5286970138549805, "learning_rate": 4.731070945309314e-05, "loss": 0.043, "step": 16298 }, { "epoch": 7.577870757787076, "grad_norm": 1.9908161163330078, "learning_rate": 4.727516310470937e-05, "loss": 0.0627, "step": 16300 }, { "epoch": 7.578800557880056, "grad_norm": 1.60124671459198, "learning_rate": 4.723939690945861e-05, "loss": 0.0422, "step": 16302 }, { "epoch": 7.579730357973036, "grad_norm": 2.6889007091522217, "learning_rate": 4.7203411220338755e-05, "loss": 0.1027, "step": 16304 }, { "epoch": 7.580660158066015, "grad_norm": 2.315621852874756, "learning_rate": 4.716720639251408e-05, "loss": 0.0684, "step": 16306 }, { "epoch": 7.581589958158996, "grad_norm": 1.4583121538162231, "learning_rate": 4.713078278331158e-05, "loss": 0.0622, "step": 16308 }, { "epoch": 7.582519758251975, "grad_norm": 1.9982163906097412, "learning_rate": 4.709414075221752e-05, "loss": 0.0867, "step": 16310 }, { "epoch": 7.583449558344956, "grad_norm": 2.984041690826416, "learning_rate": 4.705728066087404e-05, "loss": 0.0923, "step": 16312 }, { "epoch": 7.5843793584379355, "grad_norm": 2.04728364944458, "learning_rate": 4.702020287307527e-05, "loss": 0.0682, "step": 16314 }, { "epoch": 7.585309158530916, "grad_norm": 1.3289321660995483, "learning_rate": 4.698290775476407e-05, "loss": 0.0502, "step": 16316 }, { "epoch": 7.586238958623896, "grad_norm": 2.906374454498291, "learning_rate": 4.6945395674028236e-05, "loss": 0.0915, "step": 16318 }, { "epoch": 7.587168758716876, "grad_norm": 2.9996883869171143, "learning_rate": 4.690766700109676e-05, "loss": 0.1055, "step": 16320 }, { "epoch": 7.588098558809856, "grad_norm": 2.6697564125061035, "learning_rate": 4.686972210833651e-05, "loss": 0.0719, "step": 16322 }, { "epoch": 7.589028358902836, "grad_norm": 1.5712190866470337, "learning_rate": 4.683156137024817e-05, "loss": 0.0625, "step": 16324 }, { "epoch": 7.589958158995816, "grad_norm": 1.0769314765930176, "learning_rate": 4.679318516346292e-05, "loss": 0.0484, "step": 16326 }, { "epoch": 7.590887959088796, "grad_norm": 1.7161332368850708, "learning_rate": 4.6754593866738324e-05, "loss": 0.0547, "step": 16328 }, { "epoch": 7.591817759181776, "grad_norm": 1.436228632926941, "learning_rate": 4.671578786095494e-05, "loss": 0.0791, "step": 16330 }, { "epoch": 7.5927475592747555, "grad_norm": 1.7822920083999634, "learning_rate": 4.6676767529112435e-05, "loss": 0.083, "step": 16332 }, { "epoch": 7.593677359367736, "grad_norm": 2.0116958618164062, "learning_rate": 4.663753325632563e-05, "loss": 0.0961, "step": 16334 }, { "epoch": 7.594607159460716, "grad_norm": 1.657147765159607, "learning_rate": 4.659808542982107e-05, "loss": 0.0515, "step": 16336 }, { "epoch": 7.595536959553696, "grad_norm": 1.9590282440185547, "learning_rate": 4.655842443893277e-05, "loss": 0.0702, "step": 16338 }, { "epoch": 7.596466759646676, "grad_norm": 1.718369960784912, "learning_rate": 4.6518550675098786e-05, "loss": 0.0774, "step": 16340 }, { "epoch": 7.597396559739656, "grad_norm": 3.0564866065979004, "learning_rate": 4.6478464531857036e-05, "loss": 0.0945, "step": 16342 }, { "epoch": 7.598326359832636, "grad_norm": 2.135237216949463, "learning_rate": 4.643816640484146e-05, "loss": 0.0761, "step": 16344 }, { "epoch": 7.599256159925616, "grad_norm": 2.9799582958221436, "learning_rate": 4.63976566917785e-05, "loss": 0.0874, "step": 16346 }, { "epoch": 7.600185960018596, "grad_norm": 3.1290314197540283, "learning_rate": 4.635693579248254e-05, "loss": 0.1362, "step": 16348 }, { "epoch": 7.6011157601115755, "grad_norm": 0.8477113842964172, "learning_rate": 4.63160041088525e-05, "loss": 0.0476, "step": 16350 }, { "epoch": 7.602045560204556, "grad_norm": 1.5493569374084473, "learning_rate": 4.627486204486751e-05, "loss": 0.0723, "step": 16352 }, { "epoch": 7.6029753602975365, "grad_norm": 1.612432837486267, "learning_rate": 4.6233510006583104e-05, "loss": 0.0523, "step": 16354 }, { "epoch": 7.603905160390516, "grad_norm": 1.2938413619995117, "learning_rate": 4.6191948402127244e-05, "loss": 0.0576, "step": 16356 }, { "epoch": 7.604834960483496, "grad_norm": 1.7060500383377075, "learning_rate": 4.61501776416962e-05, "loss": 0.0869, "step": 16358 }, { "epoch": 7.605764760576476, "grad_norm": 1.6919125318527222, "learning_rate": 4.610819813755055e-05, "loss": 0.0554, "step": 16360 }, { "epoch": 7.606694560669456, "grad_norm": 1.93319833278656, "learning_rate": 4.606601030401101e-05, "loss": 0.0743, "step": 16362 }, { "epoch": 7.607624360762436, "grad_norm": 0.9499289989471436, "learning_rate": 4.602361455745441e-05, "loss": 0.0449, "step": 16364 }, { "epoch": 7.608554160855416, "grad_norm": 1.6478419303894043, "learning_rate": 4.598101131630975e-05, "loss": 0.0563, "step": 16366 }, { "epoch": 7.609483960948396, "grad_norm": 1.0567574501037598, "learning_rate": 4.593820100105373e-05, "loss": 0.0463, "step": 16368 }, { "epoch": 7.610413761041376, "grad_norm": 1.6983312368392944, "learning_rate": 4.589518403420692e-05, "loss": 0.1183, "step": 16370 }, { "epoch": 7.6113435611343565, "grad_norm": 1.2510842084884644, "learning_rate": 4.585196084032941e-05, "loss": 0.059, "step": 16372 }, { "epoch": 7.612273361227336, "grad_norm": 1.8960065841674805, "learning_rate": 4.580853184601675e-05, "loss": 0.0557, "step": 16374 }, { "epoch": 7.613203161320316, "grad_norm": 1.2054035663604736, "learning_rate": 4.576489747989551e-05, "loss": 0.0463, "step": 16376 }, { "epoch": 7.614132961413296, "grad_norm": 1.867017149925232, "learning_rate": 4.572105817261922e-05, "loss": 0.0596, "step": 16378 }, { "epoch": 7.615062761506276, "grad_norm": 1.6178290843963623, "learning_rate": 4.567701435686424e-05, "loss": 0.0579, "step": 16380 }, { "epoch": 7.615992561599256, "grad_norm": 1.7551339864730835, "learning_rate": 4.563276646732516e-05, "loss": 0.0615, "step": 16382 }, { "epoch": 7.616922361692236, "grad_norm": 2.6217684745788574, "learning_rate": 4.558831494071084e-05, "loss": 0.0533, "step": 16384 }, { "epoch": 7.617852161785216, "grad_norm": 2.501049280166626, "learning_rate": 4.554366021573994e-05, "loss": 0.0715, "step": 16386 }, { "epoch": 7.618781961878196, "grad_norm": 2.297776460647583, "learning_rate": 4.549880273313647e-05, "loss": 0.0783, "step": 16388 }, { "epoch": 7.6197117619711765, "grad_norm": 1.8148043155670166, "learning_rate": 4.545374293562577e-05, "loss": 0.0638, "step": 16390 }, { "epoch": 7.620641562064156, "grad_norm": 2.0764284133911133, "learning_rate": 4.540848126792976e-05, "loss": 0.0574, "step": 16392 }, { "epoch": 7.621571362157137, "grad_norm": 1.272364616394043, "learning_rate": 4.536301817676293e-05, "loss": 0.1012, "step": 16394 }, { "epoch": 7.622501162250116, "grad_norm": 1.796527624130249, "learning_rate": 4.5317354110827575e-05, "loss": 0.0631, "step": 16396 }, { "epoch": 7.623430962343097, "grad_norm": 1.2678488492965698, "learning_rate": 4.527148952080948e-05, "loss": 0.05, "step": 16398 }, { "epoch": 7.624360762436076, "grad_norm": 1.5575586557388306, "learning_rate": 4.522542485937385e-05, "loss": 0.0484, "step": 16400 }, { "epoch": 7.625290562529056, "grad_norm": 1.6651686429977417, "learning_rate": 4.5179160581160154e-05, "loss": 0.0531, "step": 16402 }, { "epoch": 7.626220362622036, "grad_norm": 1.0605406761169434, "learning_rate": 4.513269714277822e-05, "loss": 0.0428, "step": 16404 }, { "epoch": 7.627150162715016, "grad_norm": 1.5226154327392578, "learning_rate": 4.5086035002803344e-05, "loss": 0.0408, "step": 16406 }, { "epoch": 7.6280799628079965, "grad_norm": 2.440094232559204, "learning_rate": 4.50391746217721e-05, "loss": 0.1006, "step": 16408 }, { "epoch": 7.629009762900976, "grad_norm": 2.345613479614258, "learning_rate": 4.499211646217748e-05, "loss": 0.0803, "step": 16410 }, { "epoch": 7.629939562993957, "grad_norm": 2.3480277061462402, "learning_rate": 4.494486098846441e-05, "loss": 0.0672, "step": 16412 }, { "epoch": 7.630869363086936, "grad_norm": 2.3687920570373535, "learning_rate": 4.489740866702556e-05, "loss": 0.0975, "step": 16414 }, { "epoch": 7.631799163179917, "grad_norm": 0.9876396059989929, "learning_rate": 4.484975996619602e-05, "loss": 0.0506, "step": 16416 }, { "epoch": 7.632728963272896, "grad_norm": 1.8207764625549316, "learning_rate": 4.480191535624935e-05, "loss": 0.0907, "step": 16418 }, { "epoch": 7.633658763365876, "grad_norm": 1.9624446630477905, "learning_rate": 4.475387530939246e-05, "loss": 0.1223, "step": 16420 }, { "epoch": 7.634588563458856, "grad_norm": 1.4513062238693237, "learning_rate": 4.4705640299761176e-05, "loss": 0.073, "step": 16422 }, { "epoch": 7.635518363551836, "grad_norm": 2.258592367172241, "learning_rate": 4.465721080341568e-05, "loss": 0.0392, "step": 16424 }, { "epoch": 7.6364481636448165, "grad_norm": 0.8997532725334167, "learning_rate": 4.4608587298335375e-05, "loss": 0.0435, "step": 16426 }, { "epoch": 7.637377963737796, "grad_norm": 1.8166064023971558, "learning_rate": 4.455977026441485e-05, "loss": 0.0532, "step": 16428 }, { "epoch": 7.638307763830777, "grad_norm": 2.0735697746276855, "learning_rate": 4.4510760183458424e-05, "loss": 0.0666, "step": 16430 }, { "epoch": 7.639237563923756, "grad_norm": 2.022789716720581, "learning_rate": 4.446155753917574e-05, "loss": 0.0816, "step": 16432 }, { "epoch": 7.640167364016737, "grad_norm": 1.724409818649292, "learning_rate": 4.4412162817177154e-05, "loss": 0.0562, "step": 16434 }, { "epoch": 7.641097164109716, "grad_norm": 1.7442737817764282, "learning_rate": 4.4362576504968506e-05, "loss": 0.0622, "step": 16436 }, { "epoch": 7.642026964202697, "grad_norm": 1.2578014135360718, "learning_rate": 4.4312799091946806e-05, "loss": 0.0588, "step": 16438 }, { "epoch": 7.642956764295676, "grad_norm": 1.1861724853515625, "learning_rate": 4.42628310693949e-05, "loss": 0.0302, "step": 16440 }, { "epoch": 7.643886564388657, "grad_norm": 2.020603895187378, "learning_rate": 4.421267293047706e-05, "loss": 0.083, "step": 16442 }, { "epoch": 7.6448163644816365, "grad_norm": 1.5917326211929321, "learning_rate": 4.4162325170233914e-05, "loss": 0.0705, "step": 16444 }, { "epoch": 7.645746164574616, "grad_norm": 2.5309627056121826, "learning_rate": 4.4111788285577437e-05, "loss": 0.0929, "step": 16446 }, { "epoch": 7.646675964667597, "grad_norm": 2.463437795639038, "learning_rate": 4.406106277528638e-05, "loss": 0.0454, "step": 16448 }, { "epoch": 7.647605764760576, "grad_norm": 1.001634955406189, "learning_rate": 4.401014914000093e-05, "loss": 0.0441, "step": 16450 }, { "epoch": 7.648535564853557, "grad_norm": 1.5948001146316528, "learning_rate": 4.395904788221817e-05, "loss": 0.0878, "step": 16452 }, { "epoch": 7.649465364946536, "grad_norm": 1.4601383209228516, "learning_rate": 4.390775950628696e-05, "loss": 0.066, "step": 16454 }, { "epoch": 7.650395165039517, "grad_norm": 1.639439582824707, "learning_rate": 4.385628451840274e-05, "loss": 0.0587, "step": 16456 }, { "epoch": 7.651324965132496, "grad_norm": 1.8584269285202026, "learning_rate": 4.380462342660296e-05, "loss": 0.0703, "step": 16458 }, { "epoch": 7.652254765225477, "grad_norm": 1.9326549768447876, "learning_rate": 4.375277674076163e-05, "loss": 0.0434, "step": 16460 }, { "epoch": 7.6531845653184565, "grad_norm": 1.9983763694763184, "learning_rate": 4.370074497258474e-05, "loss": 0.0763, "step": 16462 }, { "epoch": 7.654114365411436, "grad_norm": 1.844057321548462, "learning_rate": 4.3648528635604766e-05, "loss": 0.0736, "step": 16464 }, { "epoch": 7.655044165504417, "grad_norm": 1.2980393171310425, "learning_rate": 4.359612824517575e-05, "loss": 0.0937, "step": 16466 }, { "epoch": 7.655973965597396, "grad_norm": 1.7219237089157104, "learning_rate": 4.3543544318468634e-05, "loss": 0.0437, "step": 16468 }, { "epoch": 7.656903765690377, "grad_norm": 0.7118760943412781, "learning_rate": 4.349077737446538e-05, "loss": 0.0499, "step": 16470 }, { "epoch": 7.657833565783356, "grad_norm": 0.8784856796264648, "learning_rate": 4.3437827933954526e-05, "loss": 0.0674, "step": 16472 }, { "epoch": 7.658763365876337, "grad_norm": 2.3799169063568115, "learning_rate": 4.3384696519525606e-05, "loss": 0.0406, "step": 16474 }, { "epoch": 7.659693165969316, "grad_norm": 1.8958579301834106, "learning_rate": 4.333138365556418e-05, "loss": 0.0786, "step": 16476 }, { "epoch": 7.660622966062297, "grad_norm": 1.964813232421875, "learning_rate": 4.327788986824681e-05, "loss": 0.0692, "step": 16478 }, { "epoch": 7.6615527661552765, "grad_norm": 1.951161503791809, "learning_rate": 4.3224215685535416e-05, "loss": 0.0708, "step": 16480 }, { "epoch": 7.662482566248257, "grad_norm": 1.4997496604919434, "learning_rate": 4.317036163717274e-05, "loss": 0.0886, "step": 16482 }, { "epoch": 7.663412366341237, "grad_norm": 2.4844448566436768, "learning_rate": 4.3116328254676376e-05, "loss": 0.1015, "step": 16484 }, { "epoch": 7.664342166434217, "grad_norm": 2.425931930541992, "learning_rate": 4.306211607133393e-05, "loss": 0.0895, "step": 16486 }, { "epoch": 7.665271966527197, "grad_norm": 1.4681051969528198, "learning_rate": 4.3007725622197884e-05, "loss": 0.0378, "step": 16488 }, { "epoch": 7.666201766620176, "grad_norm": 1.5801461935043335, "learning_rate": 4.295315744407991e-05, "loss": 0.0516, "step": 16490 }, { "epoch": 7.667131566713157, "grad_norm": 1.7994458675384521, "learning_rate": 4.2898412075546e-05, "loss": 0.0614, "step": 16492 }, { "epoch": 7.668061366806136, "grad_norm": 2.3633341789245605, "learning_rate": 4.284349005691066e-05, "loss": 0.0941, "step": 16494 }, { "epoch": 7.668991166899117, "grad_norm": 2.082887887954712, "learning_rate": 4.27883919302323e-05, "loss": 0.0785, "step": 16496 }, { "epoch": 7.6699209669920965, "grad_norm": 1.7722452878952026, "learning_rate": 4.273311823930705e-05, "loss": 0.0686, "step": 16498 }, { "epoch": 7.670850767085077, "grad_norm": 1.7778290510177612, "learning_rate": 4.267766952966386e-05, "loss": 0.0449, "step": 16500 }, { "epoch": 7.671780567178057, "grad_norm": 1.6052570343017578, "learning_rate": 4.262204634855925e-05, "loss": 0.0465, "step": 16502 }, { "epoch": 7.672710367271037, "grad_norm": 2.691450834274292, "learning_rate": 4.2566249244971415e-05, "loss": 0.0873, "step": 16504 }, { "epoch": 7.673640167364017, "grad_norm": 2.7165744304656982, "learning_rate": 4.251027876959538e-05, "loss": 0.1092, "step": 16506 }, { "epoch": 7.674569967456996, "grad_norm": 2.3172802925109863, "learning_rate": 4.2454135474837e-05, "loss": 0.092, "step": 16508 }, { "epoch": 7.675499767549977, "grad_norm": 1.6713138818740845, "learning_rate": 4.239781991480802e-05, "loss": 0.0725, "step": 16510 }, { "epoch": 7.676429567642957, "grad_norm": 3.2537429332733154, "learning_rate": 4.234133264532032e-05, "loss": 0.0871, "step": 16512 }, { "epoch": 7.677359367735937, "grad_norm": 2.2490062713623047, "learning_rate": 4.228467422388032e-05, "loss": 0.0937, "step": 16514 }, { "epoch": 7.6782891678289165, "grad_norm": 2.2114241123199463, "learning_rate": 4.2227845209683906e-05, "loss": 0.0711, "step": 16516 }, { "epoch": 7.679218967921897, "grad_norm": 1.323087453842163, "learning_rate": 4.2170846163610437e-05, "loss": 0.0367, "step": 16518 }, { "epoch": 7.680148768014877, "grad_norm": 0.7153851389884949, "learning_rate": 4.2113677648217415e-05, "loss": 0.0356, "step": 16520 }, { "epoch": 7.681078568107857, "grad_norm": 2.4091720581054688, "learning_rate": 4.205634022773509e-05, "loss": 0.0592, "step": 16522 }, { "epoch": 7.682008368200837, "grad_norm": 0.86461341381073, "learning_rate": 4.199883446806062e-05, "loss": 0.0489, "step": 16524 }, { "epoch": 7.682938168293817, "grad_norm": 1.5402188301086426, "learning_rate": 4.194116093675273e-05, "loss": 0.0703, "step": 16526 }, { "epoch": 7.683867968386797, "grad_norm": 1.7969849109649658, "learning_rate": 4.188332020302582e-05, "loss": 0.0533, "step": 16528 }, { "epoch": 7.684797768479777, "grad_norm": 1.025529146194458, "learning_rate": 4.182531283774452e-05, "loss": 0.0609, "step": 16530 }, { "epoch": 7.685727568572757, "grad_norm": 1.5107156038284302, "learning_rate": 4.176713941341822e-05, "loss": 0.0387, "step": 16532 }, { "epoch": 7.6866573686657365, "grad_norm": 1.9991124868392944, "learning_rate": 4.170880050419495e-05, "loss": 0.0723, "step": 16534 }, { "epoch": 7.687587168758717, "grad_norm": 0.7958803772926331, "learning_rate": 4.1650296685856454e-05, "loss": 0.0527, "step": 16536 }, { "epoch": 7.6885169688516966, "grad_norm": 2.1678032875061035, "learning_rate": 4.1591628535811596e-05, "loss": 0.0733, "step": 16538 }, { "epoch": 7.689446768944677, "grad_norm": 2.6482949256896973, "learning_rate": 4.153279663309148e-05, "loss": 0.0543, "step": 16540 }, { "epoch": 7.690376569037657, "grad_norm": 1.7240976095199585, "learning_rate": 4.1473801558343145e-05, "loss": 0.0582, "step": 16542 }, { "epoch": 7.691306369130637, "grad_norm": 1.4971176385879517, "learning_rate": 4.14146438938241e-05, "loss": 0.0421, "step": 16544 }, { "epoch": 7.692236169223617, "grad_norm": 1.4484384059906006, "learning_rate": 4.135532422339675e-05, "loss": 0.0385, "step": 16546 }, { "epoch": 7.693165969316597, "grad_norm": 1.6176060438156128, "learning_rate": 4.1295843132522095e-05, "loss": 0.0418, "step": 16548 }, { "epoch": 7.694095769409577, "grad_norm": 1.86875319480896, "learning_rate": 4.123620120825476e-05, "loss": 0.0737, "step": 16550 }, { "epoch": 7.695025569502557, "grad_norm": 2.0641510486602783, "learning_rate": 4.117639903923631e-05, "loss": 0.0445, "step": 16552 }, { "epoch": 7.695955369595537, "grad_norm": 2.014247179031372, "learning_rate": 4.111643721568995e-05, "loss": 0.0593, "step": 16554 }, { "epoch": 7.696885169688517, "grad_norm": 0.9549909830093384, "learning_rate": 4.105631632941482e-05, "loss": 0.0677, "step": 16556 }, { "epoch": 7.697814969781497, "grad_norm": 0.9546562433242798, "learning_rate": 4.099603697377964e-05, "loss": 0.0553, "step": 16558 }, { "epoch": 7.698744769874477, "grad_norm": 2.2062957286834717, "learning_rate": 4.0935599743717464e-05, "loss": 0.0867, "step": 16560 }, { "epoch": 7.699674569967457, "grad_norm": 1.3636808395385742, "learning_rate": 4.087500523571921e-05, "loss": 0.0498, "step": 16562 }, { "epoch": 7.700604370060437, "grad_norm": 1.935593843460083, "learning_rate": 4.081425404782828e-05, "loss": 0.0669, "step": 16564 }, { "epoch": 7.701534170153417, "grad_norm": 2.3592817783355713, "learning_rate": 4.0753346779634435e-05, "loss": 0.0782, "step": 16566 }, { "epoch": 7.702463970246397, "grad_norm": 1.428572177886963, "learning_rate": 4.0692284032267686e-05, "loss": 0.0654, "step": 16568 }, { "epoch": 7.703393770339377, "grad_norm": 1.1302045583724976, "learning_rate": 4.0631066408392834e-05, "loss": 0.0216, "step": 16570 }, { "epoch": 7.704323570432357, "grad_norm": 1.4944626092910767, "learning_rate": 4.0569694512203055e-05, "loss": 0.0531, "step": 16572 }, { "epoch": 7.705253370525337, "grad_norm": 1.7435684204101562, "learning_rate": 4.050816894941411e-05, "loss": 0.0607, "step": 16574 }, { "epoch": 7.706183170618317, "grad_norm": 2.9673802852630615, "learning_rate": 4.044649032725854e-05, "loss": 0.0823, "step": 16576 }, { "epoch": 7.707112970711297, "grad_norm": 0.9323350191116333, "learning_rate": 4.0384659254479435e-05, "loss": 0.0725, "step": 16578 }, { "epoch": 7.708042770804277, "grad_norm": 1.6475379467010498, "learning_rate": 4.03226763413246e-05, "loss": 0.0466, "step": 16580 }, { "epoch": 7.708972570897257, "grad_norm": 0.9185786247253418, "learning_rate": 4.026054219954022e-05, "loss": 0.0351, "step": 16582 }, { "epoch": 7.709902370990237, "grad_norm": 1.4570480585098267, "learning_rate": 4.019825744236533e-05, "loss": 0.0521, "step": 16584 }, { "epoch": 7.710832171083217, "grad_norm": 2.8402373790740967, "learning_rate": 4.013582268452527e-05, "loss": 0.069, "step": 16586 }, { "epoch": 7.711761971176197, "grad_norm": 1.8914541006088257, "learning_rate": 4.007323854222582e-05, "loss": 0.0336, "step": 16588 }, { "epoch": 7.712691771269177, "grad_norm": 1.53150475025177, "learning_rate": 4.001050563314727e-05, "loss": 0.061, "step": 16590 }, { "epoch": 7.713621571362157, "grad_norm": 1.8939992189407349, "learning_rate": 3.9947624576438104e-05, "loss": 0.033, "step": 16592 }, { "epoch": 7.714551371455137, "grad_norm": 3.4138498306274414, "learning_rate": 3.988459599270905e-05, "loss": 0.0768, "step": 16594 }, { "epoch": 7.7154811715481175, "grad_norm": 2.11031436920166, "learning_rate": 3.9821420504026704e-05, "loss": 0.1326, "step": 16596 }, { "epoch": 7.716410971641097, "grad_norm": 1.6940261125564575, "learning_rate": 3.975809873390756e-05, "loss": 0.0618, "step": 16598 }, { "epoch": 7.717340771734078, "grad_norm": 1.1371643543243408, "learning_rate": 3.969463130731207e-05, "loss": 0.0524, "step": 16600 }, { "epoch": 7.718270571827057, "grad_norm": 2.4144175052642822, "learning_rate": 3.963101885063796e-05, "loss": 0.1465, "step": 16602 }, { "epoch": 7.719200371920037, "grad_norm": 2.4241418838500977, "learning_rate": 3.956726199171458e-05, "loss": 0.0683, "step": 16604 }, { "epoch": 7.720130172013017, "grad_norm": 2.0282697677612305, "learning_rate": 3.950336135979644e-05, "loss": 0.0577, "step": 16606 }, { "epoch": 7.721059972105997, "grad_norm": 2.1100547313690186, "learning_rate": 3.9439317585556865e-05, "loss": 0.0689, "step": 16608 }, { "epoch": 7.721989772198977, "grad_norm": 1.181229829788208, "learning_rate": 3.9375131301082184e-05, "loss": 0.0326, "step": 16610 }, { "epoch": 7.722919572291957, "grad_norm": 1.3818840980529785, "learning_rate": 3.931080313986496e-05, "loss": 0.0629, "step": 16612 }, { "epoch": 7.7238493723849375, "grad_norm": 1.7079371213912964, "learning_rate": 3.924633373679832e-05, "loss": 0.0404, "step": 16614 }, { "epoch": 7.724779172477917, "grad_norm": 1.1948773860931396, "learning_rate": 3.918172372816918e-05, "loss": 0.048, "step": 16616 }, { "epoch": 7.725708972570898, "grad_norm": 1.136445164680481, "learning_rate": 3.911697375165209e-05, "loss": 0.0575, "step": 16618 }, { "epoch": 7.726638772663877, "grad_norm": 2.459636926651001, "learning_rate": 3.905208444630347e-05, "loss": 0.1002, "step": 16620 }, { "epoch": 7.727568572756857, "grad_norm": 1.8773080110549927, "learning_rate": 3.8987056452554346e-05, "loss": 0.044, "step": 16622 }, { "epoch": 7.728498372849837, "grad_norm": 2.229318618774414, "learning_rate": 3.892189041220491e-05, "loss": 0.0473, "step": 16624 }, { "epoch": 7.729428172942817, "grad_norm": 1.238055944442749, "learning_rate": 3.885658696841753e-05, "loss": 0.0675, "step": 16626 }, { "epoch": 7.730357973035797, "grad_norm": 1.4915502071380615, "learning_rate": 3.8791146765710975e-05, "loss": 0.0439, "step": 16628 }, { "epoch": 7.731287773128777, "grad_norm": 1.531948447227478, "learning_rate": 3.8725570449953556e-05, "loss": 0.052, "step": 16630 }, { "epoch": 7.7322175732217575, "grad_norm": 2.687082529067993, "learning_rate": 3.8659858668356894e-05, "loss": 0.0662, "step": 16632 }, { "epoch": 7.733147373314737, "grad_norm": 1.3376476764678955, "learning_rate": 3.859401206947002e-05, "loss": 0.0438, "step": 16634 }, { "epoch": 7.734077173407718, "grad_norm": 1.391611099243164, "learning_rate": 3.852803130317207e-05, "loss": 0.0627, "step": 16636 }, { "epoch": 7.735006973500697, "grad_norm": 1.6566659212112427, "learning_rate": 3.846191702066673e-05, "loss": 0.056, "step": 16638 }, { "epoch": 7.735936773593678, "grad_norm": 1.7004902362823486, "learning_rate": 3.839566987447518e-05, "loss": 0.0435, "step": 16640 }, { "epoch": 7.736866573686657, "grad_norm": 1.5452935695648193, "learning_rate": 3.832929051842994e-05, "loss": 0.065, "step": 16642 }, { "epoch": 7.737796373779638, "grad_norm": 1.5651588439941406, "learning_rate": 3.826277960766855e-05, "loss": 0.0549, "step": 16644 }, { "epoch": 7.738726173872617, "grad_norm": 0.96825110912323, "learning_rate": 3.819613779862682e-05, "loss": 0.0426, "step": 16646 }, { "epoch": 7.739655973965597, "grad_norm": 1.8648048639297485, "learning_rate": 3.8129365749032596e-05, "loss": 0.0987, "step": 16648 }, { "epoch": 7.7405857740585775, "grad_norm": 2.457866668701172, "learning_rate": 3.806246411789897e-05, "loss": 0.0712, "step": 16650 }, { "epoch": 7.741515574151557, "grad_norm": 1.3700443506240845, "learning_rate": 3.7995433565517945e-05, "loss": 0.034, "step": 16652 }, { "epoch": 7.742445374244538, "grad_norm": 1.1164630651474, "learning_rate": 3.792827475345418e-05, "loss": 0.0431, "step": 16654 }, { "epoch": 7.743375174337517, "grad_norm": 2.3903965950012207, "learning_rate": 3.786098834453787e-05, "loss": 0.0546, "step": 16656 }, { "epoch": 7.744304974430498, "grad_norm": 1.540971279144287, "learning_rate": 3.77935750028588e-05, "loss": 0.0348, "step": 16658 }, { "epoch": 7.745234774523477, "grad_norm": 1.3250186443328857, "learning_rate": 3.7726035393759495e-05, "loss": 0.0439, "step": 16660 }, { "epoch": 7.746164574616458, "grad_norm": 2.5310006141662598, "learning_rate": 3.765837018382849e-05, "loss": 0.0599, "step": 16662 }, { "epoch": 7.747094374709437, "grad_norm": 2.076176166534424, "learning_rate": 3.759058004089423e-05, "loss": 0.0735, "step": 16664 }, { "epoch": 7.748024174802417, "grad_norm": 1.4688868522644043, "learning_rate": 3.752266563401793e-05, "loss": 0.0393, "step": 16666 }, { "epoch": 7.7489539748953975, "grad_norm": 1.1742470264434814, "learning_rate": 3.745462763348749e-05, "loss": 0.0349, "step": 16668 }, { "epoch": 7.749883774988378, "grad_norm": 1.303466558456421, "learning_rate": 3.738646671081038e-05, "loss": 0.0396, "step": 16670 }, { "epoch": 7.750813575081358, "grad_norm": 1.230114459991455, "learning_rate": 3.7318183538707443e-05, "loss": 0.0415, "step": 16672 }, { "epoch": 7.751743375174337, "grad_norm": 1.3477482795715332, "learning_rate": 3.72497787911061e-05, "loss": 0.0363, "step": 16674 }, { "epoch": 7.752673175267318, "grad_norm": 1.5844939947128296, "learning_rate": 3.718125314313346e-05, "loss": 0.0512, "step": 16676 }, { "epoch": 7.753602975360297, "grad_norm": 2.2860658168792725, "learning_rate": 3.711260727111014e-05, "loss": 0.0557, "step": 16678 }, { "epoch": 7.754532775453278, "grad_norm": 2.422670841217041, "learning_rate": 3.7043841852543044e-05, "loss": 0.0609, "step": 16680 }, { "epoch": 7.755462575546257, "grad_norm": 2.304481267929077, "learning_rate": 3.6974957566119234e-05, "loss": 0.0637, "step": 16682 }, { "epoch": 7.756392375639238, "grad_norm": 1.8713693618774414, "learning_rate": 3.690595509169872e-05, "loss": 0.0432, "step": 16684 }, { "epoch": 7.7573221757322175, "grad_norm": 1.2793174982070923, "learning_rate": 3.683683511030794e-05, "loss": 0.0499, "step": 16686 }, { "epoch": 7.758251975825198, "grad_norm": 1.9418411254882812, "learning_rate": 3.6767598304133486e-05, "loss": 0.0561, "step": 16688 }, { "epoch": 7.759181775918178, "grad_norm": 2.365631341934204, "learning_rate": 3.669824535651447e-05, "loss": 0.069, "step": 16690 }, { "epoch": 7.760111576011157, "grad_norm": 1.726607084274292, "learning_rate": 3.662877695193664e-05, "loss": 0.0435, "step": 16692 }, { "epoch": 7.761041376104138, "grad_norm": 0.7404807806015015, "learning_rate": 3.6559193776025025e-05, "loss": 0.0444, "step": 16694 }, { "epoch": 7.761971176197117, "grad_norm": 1.5684552192687988, "learning_rate": 3.648949651553739e-05, "loss": 0.0394, "step": 16696 }, { "epoch": 7.762900976290098, "grad_norm": 1.0017389059066772, "learning_rate": 3.641968585835772e-05, "loss": 0.0771, "step": 16698 }, { "epoch": 7.763830776383077, "grad_norm": 1.8056001663208008, "learning_rate": 3.634976249348879e-05, "loss": 0.0645, "step": 16700 }, { "epoch": 7.764760576476058, "grad_norm": 1.1664016246795654, "learning_rate": 3.6279727111046295e-05, "loss": 0.0822, "step": 16702 }, { "epoch": 7.7656903765690375, "grad_norm": 1.6783802509307861, "learning_rate": 3.620958040225102e-05, "loss": 0.0453, "step": 16704 }, { "epoch": 7.766620176662018, "grad_norm": 2.0768213272094727, "learning_rate": 3.6139323059422584e-05, "loss": 0.0976, "step": 16706 }, { "epoch": 7.767549976754998, "grad_norm": 1.6672829389572144, "learning_rate": 3.606895577597276e-05, "loss": 0.0542, "step": 16708 }, { "epoch": 7.768479776847977, "grad_norm": 1.3531455993652344, "learning_rate": 3.599847924639806e-05, "loss": 0.046, "step": 16710 }, { "epoch": 7.769409576940958, "grad_norm": 1.0554219484329224, "learning_rate": 3.592789416627355e-05, "loss": 0.0355, "step": 16712 }, { "epoch": 7.770339377033938, "grad_norm": 1.7216734886169434, "learning_rate": 3.5857201232245235e-05, "loss": 0.0544, "step": 16714 }, { "epoch": 7.771269177126918, "grad_norm": 2.0144011974334717, "learning_rate": 3.578640114202414e-05, "loss": 0.0462, "step": 16716 }, { "epoch": 7.772198977219897, "grad_norm": 1.5671961307525635, "learning_rate": 3.571549459437841e-05, "loss": 0.0533, "step": 16718 }, { "epoch": 7.773128777312878, "grad_norm": 1.9782191514968872, "learning_rate": 3.564448228912698e-05, "loss": 0.0543, "step": 16720 }, { "epoch": 7.7740585774058575, "grad_norm": 1.5657219886779785, "learning_rate": 3.5573364927132796e-05, "loss": 0.0634, "step": 16722 }, { "epoch": 7.774988377498838, "grad_norm": 1.478164553642273, "learning_rate": 3.5502143210295334e-05, "loss": 0.055, "step": 16724 }, { "epoch": 7.775918177591818, "grad_norm": 2.3846020698547363, "learning_rate": 3.5430817841544356e-05, "loss": 0.0685, "step": 16726 }, { "epoch": 7.776847977684798, "grad_norm": 1.263056993484497, "learning_rate": 3.5359389524832296e-05, "loss": 0.0422, "step": 16728 }, { "epoch": 7.777777777777778, "grad_norm": 1.3131389617919922, "learning_rate": 3.528785896512787e-05, "loss": 0.059, "step": 16730 }, { "epoch": 7.778707577870758, "grad_norm": 2.196650266647339, "learning_rate": 3.521622686840892e-05, "loss": 0.0601, "step": 16732 }, { "epoch": 7.779637377963738, "grad_norm": 2.104022264480591, "learning_rate": 3.5144493941655157e-05, "loss": 0.0673, "step": 16734 }, { "epoch": 7.780567178056717, "grad_norm": 2.234419107437134, "learning_rate": 3.507266089284177e-05, "loss": 0.0677, "step": 16736 }, { "epoch": 7.781496978149698, "grad_norm": 1.4702624082565308, "learning_rate": 3.5000728430931856e-05, "loss": 0.0553, "step": 16738 }, { "epoch": 7.7824267782426775, "grad_norm": 1.1998854875564575, "learning_rate": 3.492869726586964e-05, "loss": 0.0427, "step": 16740 }, { "epoch": 7.783356578335658, "grad_norm": 0.9571369886398315, "learning_rate": 3.485656810857395e-05, "loss": 0.0548, "step": 16742 }, { "epoch": 7.784286378428638, "grad_norm": 1.7162528038024902, "learning_rate": 3.47843416709302e-05, "loss": 0.0855, "step": 16744 }, { "epoch": 7.785216178521618, "grad_norm": 1.2826361656188965, "learning_rate": 3.4712018665784334e-05, "loss": 0.0372, "step": 16746 }, { "epoch": 7.786145978614598, "grad_norm": 2.257774829864502, "learning_rate": 3.4639599806935136e-05, "loss": 0.0564, "step": 16748 }, { "epoch": 7.787075778707578, "grad_norm": 1.2946947813034058, "learning_rate": 3.4567085809127425e-05, "loss": 0.0666, "step": 16750 }, { "epoch": 7.788005578800558, "grad_norm": 1.137647271156311, "learning_rate": 3.449447738804526e-05, "loss": 0.0582, "step": 16752 }, { "epoch": 7.788935378893538, "grad_norm": 2.2476775646209717, "learning_rate": 3.442177526030417e-05, "loss": 0.089, "step": 16754 }, { "epoch": 7.789865178986518, "grad_norm": 1.882694959640503, "learning_rate": 3.4348980143445156e-05, "loss": 0.0846, "step": 16756 }, { "epoch": 7.790794979079498, "grad_norm": 1.406518816947937, "learning_rate": 3.427609275592638e-05, "loss": 0.0424, "step": 16758 }, { "epoch": 7.791724779172478, "grad_norm": 1.955222249031067, "learning_rate": 3.42031138171171e-05, "loss": 0.0579, "step": 16760 }, { "epoch": 7.792654579265458, "grad_norm": 1.3459556102752686, "learning_rate": 3.413004404728989e-05, "loss": 0.0366, "step": 16762 }, { "epoch": 7.793584379358438, "grad_norm": 0.7511118650436401, "learning_rate": 3.4056884167613806e-05, "loss": 0.0292, "step": 16764 }, { "epoch": 7.794514179451418, "grad_norm": 1.1171377897262573, "learning_rate": 3.3983634900147474e-05, "loss": 0.0519, "step": 16766 }, { "epoch": 7.795443979544398, "grad_norm": 1.0624902248382568, "learning_rate": 3.391029696783136e-05, "loss": 0.0499, "step": 16768 }, { "epoch": 7.796373779637378, "grad_norm": 1.3172053098678589, "learning_rate": 3.383687109448157e-05, "loss": 0.0415, "step": 16770 }, { "epoch": 7.797303579730358, "grad_norm": 1.0181148052215576, "learning_rate": 3.376335800478166e-05, "loss": 0.0408, "step": 16772 }, { "epoch": 7.798233379823338, "grad_norm": 0.9088833332061768, "learning_rate": 3.368975842427607e-05, "loss": 0.0299, "step": 16774 }, { "epoch": 7.799163179916318, "grad_norm": 0.7739241123199463, "learning_rate": 3.361607307936311e-05, "loss": 0.0327, "step": 16776 }, { "epoch": 7.800092980009298, "grad_norm": 0.320134699344635, "learning_rate": 3.354230269728723e-05, "loss": 0.0245, "step": 16778 }, { "epoch": 7.801022780102278, "grad_norm": 1.3308786153793335, "learning_rate": 3.346844800613248e-05, "loss": 0.0603, "step": 16780 }, { "epoch": 7.801952580195258, "grad_norm": 1.1318663358688354, "learning_rate": 3.3394509734814687e-05, "loss": 0.0501, "step": 16782 }, { "epoch": 7.802882380288238, "grad_norm": 1.488613247871399, "learning_rate": 3.332048861307479e-05, "loss": 0.0373, "step": 16784 }, { "epoch": 7.803812180381218, "grad_norm": 1.9261399507522583, "learning_rate": 3.3246385371471496e-05, "loss": 0.053, "step": 16786 }, { "epoch": 7.804741980474198, "grad_norm": 1.0522964000701904, "learning_rate": 3.31722007413737e-05, "loss": 0.0365, "step": 16788 }, { "epoch": 7.805671780567178, "grad_norm": 2.4280471801757812, "learning_rate": 3.3097935454953915e-05, "loss": 0.0986, "step": 16790 }, { "epoch": 7.806601580660158, "grad_norm": 1.3463249206542969, "learning_rate": 3.302359024518047e-05, "loss": 0.0437, "step": 16792 }, { "epoch": 7.807531380753138, "grad_norm": 0.7615600228309631, "learning_rate": 3.2949165845810466e-05, "loss": 0.0274, "step": 16794 }, { "epoch": 7.808461180846118, "grad_norm": 0.8470311164855957, "learning_rate": 3.287466299138278e-05, "loss": 0.0684, "step": 16796 }, { "epoch": 7.8093909809390984, "grad_norm": 1.4062306880950928, "learning_rate": 3.280008241721049e-05, "loss": 0.0443, "step": 16798 }, { "epoch": 7.810320781032078, "grad_norm": 1.3433717489242554, "learning_rate": 3.272542485937385e-05, "loss": 0.0697, "step": 16800 }, { "epoch": 7.8112505811250585, "grad_norm": 2.9770514965057373, "learning_rate": 3.265069105471265e-05, "loss": 0.0817, "step": 16802 }, { "epoch": 7.812180381218038, "grad_norm": 0.6693090200424194, "learning_rate": 3.2575881740819524e-05, "loss": 0.0184, "step": 16804 }, { "epoch": 7.813110181311018, "grad_norm": 0.9840891361236572, "learning_rate": 3.2500997656032125e-05, "loss": 0.0377, "step": 16806 }, { "epoch": 7.814039981403998, "grad_norm": 1.0029600858688354, "learning_rate": 3.242603953942605e-05, "loss": 0.0444, "step": 16808 }, { "epoch": 7.814969781496978, "grad_norm": 1.5436441898345947, "learning_rate": 3.2351008130807736e-05, "loss": 0.0835, "step": 16810 }, { "epoch": 7.815899581589958, "grad_norm": 1.6808786392211914, "learning_rate": 3.22759041707069e-05, "loss": 0.0444, "step": 16812 }, { "epoch": 7.816829381682938, "grad_norm": 1.31619393825531, "learning_rate": 3.2200728400369375e-05, "loss": 0.0353, "step": 16814 }, { "epoch": 7.817759181775918, "grad_norm": 1.313462734222412, "learning_rate": 3.212548156174959e-05, "loss": 0.0579, "step": 16816 }, { "epoch": 7.818688981868898, "grad_norm": 2.210641860961914, "learning_rate": 3.205016439750339e-05, "loss": 0.0781, "step": 16818 }, { "epoch": 7.8196187819618785, "grad_norm": 0.8725653290748596, "learning_rate": 3.197477765098093e-05, "loss": 0.0285, "step": 16820 }, { "epoch": 7.820548582054858, "grad_norm": 1.7775822877883911, "learning_rate": 3.1899322066218733e-05, "loss": 0.0552, "step": 16822 }, { "epoch": 7.821478382147838, "grad_norm": 1.027217984199524, "learning_rate": 3.182379838793327e-05, "loss": 0.0372, "step": 16824 }, { "epoch": 7.822408182240818, "grad_norm": 1.596207857131958, "learning_rate": 3.1748207361512586e-05, "loss": 0.0779, "step": 16826 }, { "epoch": 7.823337982333798, "grad_norm": 2.127506971359253, "learning_rate": 3.167254973300953e-05, "loss": 0.0495, "step": 16828 }, { "epoch": 7.824267782426778, "grad_norm": 2.168199300765991, "learning_rate": 3.159682624913452e-05, "loss": 0.0603, "step": 16830 }, { "epoch": 7.825197582519758, "grad_norm": 1.690405249595642, "learning_rate": 3.1521037657247585e-05, "loss": 0.0473, "step": 16832 }, { "epoch": 7.826127382612738, "grad_norm": 1.3589327335357666, "learning_rate": 3.144518470535171e-05, "loss": 0.0583, "step": 16834 }, { "epoch": 7.827057182705718, "grad_norm": 1.0813300609588623, "learning_rate": 3.1369268142084726e-05, "loss": 0.0402, "step": 16836 }, { "epoch": 7.8279869827986985, "grad_norm": 1.1382421255111694, "learning_rate": 3.129328871671256e-05, "loss": 0.0382, "step": 16838 }, { "epoch": 7.828916782891678, "grad_norm": 0.520941972732544, "learning_rate": 3.121724717912155e-05, "loss": 0.0275, "step": 16840 }, { "epoch": 7.829846582984659, "grad_norm": 0.9902595281600952, "learning_rate": 3.1141144279810794e-05, "loss": 0.0712, "step": 16842 }, { "epoch": 7.830776383077638, "grad_norm": 1.271768569946289, "learning_rate": 3.1064980769885376e-05, "loss": 0.038, "step": 16844 }, { "epoch": 7.831706183170619, "grad_norm": 1.1388310194015503, "learning_rate": 3.09887574010482e-05, "loss": 0.0385, "step": 16846 }, { "epoch": 7.832635983263598, "grad_norm": 1.5462336540222168, "learning_rate": 3.091247492559332e-05, "loss": 0.0378, "step": 16848 }, { "epoch": 7.833565783356578, "grad_norm": 1.9319510459899902, "learning_rate": 3.0836134096397805e-05, "loss": 0.0611, "step": 16850 }, { "epoch": 7.834495583449558, "grad_norm": 0.5678504109382629, "learning_rate": 3.07597356669149e-05, "loss": 0.0183, "step": 16852 }, { "epoch": 7.835425383542538, "grad_norm": 1.8095173835754395, "learning_rate": 3.0683280391166344e-05, "loss": 0.0564, "step": 16854 }, { "epoch": 7.8363551836355185, "grad_norm": 1.7496118545532227, "learning_rate": 3.060676902373467e-05, "loss": 0.0582, "step": 16856 }, { "epoch": 7.837284983728498, "grad_norm": 1.915982961654663, "learning_rate": 3.053020231975637e-05, "loss": 0.054, "step": 16858 }, { "epoch": 7.838214783821479, "grad_norm": 0.8851522207260132, "learning_rate": 3.04535810349138e-05, "loss": 0.047, "step": 16860 }, { "epoch": 7.839144583914458, "grad_norm": 1.8189263343811035, "learning_rate": 3.037690592542803e-05, "loss": 0.036, "step": 16862 }, { "epoch": 7.840074384007439, "grad_norm": 1.7083461284637451, "learning_rate": 3.0300177748051525e-05, "loss": 0.0343, "step": 16864 }, { "epoch": 7.841004184100418, "grad_norm": 1.4613621234893799, "learning_rate": 3.0223397260060414e-05, "loss": 0.0463, "step": 16866 }, { "epoch": 7.841933984193398, "grad_norm": 0.999251663684845, "learning_rate": 3.0146565219247202e-05, "loss": 0.0477, "step": 16868 }, { "epoch": 7.842863784286378, "grad_norm": 0.7600599527359009, "learning_rate": 3.0069682383913033e-05, "loss": 0.0263, "step": 16870 }, { "epoch": 7.843793584379359, "grad_norm": 1.8509740829467773, "learning_rate": 2.9992749512860356e-05, "loss": 0.0526, "step": 16872 }, { "epoch": 7.8447233844723385, "grad_norm": 1.090415358543396, "learning_rate": 2.9915767365385726e-05, "loss": 0.0455, "step": 16874 }, { "epoch": 7.845653184565318, "grad_norm": 1.4777929782867432, "learning_rate": 2.9838736701271694e-05, "loss": 0.0382, "step": 16876 }, { "epoch": 7.846582984658299, "grad_norm": 2.370012044906616, "learning_rate": 2.9761658280779897e-05, "loss": 0.0688, "step": 16878 }, { "epoch": 7.847512784751278, "grad_norm": 1.8055044412612915, "learning_rate": 2.968453286464332e-05, "loss": 0.0545, "step": 16880 }, { "epoch": 7.848442584844259, "grad_norm": 0.986985445022583, "learning_rate": 2.9607361214058494e-05, "loss": 0.0241, "step": 16882 }, { "epoch": 7.849372384937238, "grad_norm": 0.4060528576374054, "learning_rate": 2.9530144090678642e-05, "loss": 0.0172, "step": 16884 }, { "epoch": 7.850302185030219, "grad_norm": 1.971962809562683, "learning_rate": 2.9452882256605424e-05, "loss": 0.0587, "step": 16886 }, { "epoch": 7.851231985123198, "grad_norm": 1.0496793985366821, "learning_rate": 2.937557647438212e-05, "loss": 0.0292, "step": 16888 }, { "epoch": 7.852161785216179, "grad_norm": 0.6125305891036987, "learning_rate": 2.929822750698542e-05, "loss": 0.0184, "step": 16890 }, { "epoch": 7.8530915853091585, "grad_norm": 0.9373854398727417, "learning_rate": 2.9220836117818487e-05, "loss": 0.0464, "step": 16892 }, { "epoch": 7.854021385402138, "grad_norm": 1.58176589012146, "learning_rate": 2.914340307070318e-05, "loss": 0.0478, "step": 16894 }, { "epoch": 7.854951185495119, "grad_norm": 1.5521425008773804, "learning_rate": 2.9065929129872233e-05, "loss": 0.0415, "step": 16896 }, { "epoch": 7.855880985588098, "grad_norm": 1.2798234224319458, "learning_rate": 2.8988415059962353e-05, "loss": 0.044, "step": 16898 }, { "epoch": 7.856810785681079, "grad_norm": 1.0469868183135986, "learning_rate": 2.8910861626005925e-05, "loss": 0.038, "step": 16900 }, { "epoch": 7.857740585774058, "grad_norm": 2.3321781158447266, "learning_rate": 2.883326959342422e-05, "loss": 0.0471, "step": 16902 }, { "epoch": 7.858670385867039, "grad_norm": 1.020408034324646, "learning_rate": 2.8755639728019177e-05, "loss": 0.0372, "step": 16904 }, { "epoch": 7.859600185960018, "grad_norm": 0.8558564186096191, "learning_rate": 2.8677972795966058e-05, "loss": 0.044, "step": 16906 }, { "epoch": 7.860529986052999, "grad_norm": 1.5546613931655884, "learning_rate": 2.8600269563806468e-05, "loss": 0.0615, "step": 16908 }, { "epoch": 7.8614597861459785, "grad_norm": 2.1554057598114014, "learning_rate": 2.8522530798439692e-05, "loss": 0.0547, "step": 16910 }, { "epoch": 7.862389586238959, "grad_norm": 1.2615684270858765, "learning_rate": 2.8444757267116124e-05, "loss": 0.0179, "step": 16912 }, { "epoch": 7.863319386331939, "grad_norm": 0.8303524851799011, "learning_rate": 2.836694973742904e-05, "loss": 0.0281, "step": 16914 }, { "epoch": 7.864249186424919, "grad_norm": 1.8049952983856201, "learning_rate": 2.8289108977307243e-05, "loss": 0.0685, "step": 16916 }, { "epoch": 7.865178986517899, "grad_norm": 2.3211915493011475, "learning_rate": 2.8211235755007802e-05, "loss": 0.0802, "step": 16918 }, { "epoch": 7.866108786610878, "grad_norm": 1.2424092292785645, "learning_rate": 2.8133330839107713e-05, "loss": 0.0546, "step": 16920 }, { "epoch": 7.867038586703859, "grad_norm": 2.545078754425049, "learning_rate": 2.8055394998497393e-05, "loss": 0.0452, "step": 16922 }, { "epoch": 7.867968386796838, "grad_norm": 1.0147775411605835, "learning_rate": 2.797742900237195e-05, "loss": 0.0482, "step": 16924 }, { "epoch": 7.868898186889819, "grad_norm": 1.1976957321166992, "learning_rate": 2.7899433620224198e-05, "loss": 0.0276, "step": 16926 }, { "epoch": 7.8698279869827985, "grad_norm": 0.7205071449279785, "learning_rate": 2.7821409621837253e-05, "loss": 0.031, "step": 16928 }, { "epoch": 7.870757787075779, "grad_norm": 1.0689069032669067, "learning_rate": 2.7743357777276303e-05, "loss": 0.0219, "step": 16930 }, { "epoch": 7.871687587168759, "grad_norm": 1.2882702350616455, "learning_rate": 2.766527885688163e-05, "loss": 0.0314, "step": 16932 }, { "epoch": 7.872617387261739, "grad_norm": 1.4014194011688232, "learning_rate": 2.7587173631260657e-05, "loss": 0.0633, "step": 16934 }, { "epoch": 7.873547187354719, "grad_norm": 1.0366567373275757, "learning_rate": 2.750904287128051e-05, "loss": 0.0373, "step": 16936 }, { "epoch": 7.874476987447698, "grad_norm": 1.2654105424880981, "learning_rate": 2.7430887348060166e-05, "loss": 0.0418, "step": 16938 }, { "epoch": 7.875406787540679, "grad_norm": 0.7775326371192932, "learning_rate": 2.7352707832963004e-05, "loss": 0.0224, "step": 16940 }, { "epoch": 7.876336587633658, "grad_norm": 1.5793368816375732, "learning_rate": 2.727450509758944e-05, "loss": 0.052, "step": 16942 }, { "epoch": 7.877266387726639, "grad_norm": 1.7244926691055298, "learning_rate": 2.7196279913768733e-05, "loss": 0.0272, "step": 16944 }, { "epoch": 7.8781961878196185, "grad_norm": 1.9323381185531616, "learning_rate": 2.7118033053551954e-05, "loss": 0.0416, "step": 16946 }, { "epoch": 7.879125987912599, "grad_norm": 1.2189791202545166, "learning_rate": 2.7039765289204105e-05, "loss": 0.0364, "step": 16948 }, { "epoch": 7.880055788005579, "grad_norm": 2.4582650661468506, "learning_rate": 2.6961477393196248e-05, "loss": 0.05, "step": 16950 }, { "epoch": 7.880985588098559, "grad_norm": 0.6699551343917847, "learning_rate": 2.688317013819849e-05, "loss": 0.0246, "step": 16952 }, { "epoch": 7.881915388191539, "grad_norm": 1.103470802307129, "learning_rate": 2.6804844297071644e-05, "loss": 0.0425, "step": 16954 }, { "epoch": 7.882845188284519, "grad_norm": 1.6474987268447876, "learning_rate": 2.672650064286032e-05, "loss": 0.0549, "step": 16956 }, { "epoch": 7.883774988377499, "grad_norm": 1.4729564189910889, "learning_rate": 2.664813994878467e-05, "loss": 0.0529, "step": 16958 }, { "epoch": 7.884704788470479, "grad_norm": 0.929309606552124, "learning_rate": 2.6569762988232933e-05, "loss": 0.0391, "step": 16960 }, { "epoch": 7.885634588563459, "grad_norm": 1.4501826763153076, "learning_rate": 2.6491370534754413e-05, "loss": 0.0509, "step": 16962 }, { "epoch": 7.8865643886564385, "grad_norm": 1.4687976837158203, "learning_rate": 2.641296336205072e-05, "loss": 0.0336, "step": 16964 }, { "epoch": 7.887494188749419, "grad_norm": 2.054729700088501, "learning_rate": 2.6334542243969167e-05, "loss": 0.062, "step": 16966 }, { "epoch": 7.888423988842399, "grad_norm": 1.5302907228469849, "learning_rate": 2.6256107954494442e-05, "loss": 0.0409, "step": 16968 }, { "epoch": 7.889353788935379, "grad_norm": 1.574060320854187, "learning_rate": 2.617766126774123e-05, "loss": 0.0334, "step": 16970 }, { "epoch": 7.890283589028359, "grad_norm": 1.7400606870651245, "learning_rate": 2.6099202957946838e-05, "loss": 0.0304, "step": 16972 }, { "epoch": 7.891213389121339, "grad_norm": 1.8483622074127197, "learning_rate": 2.6020733799462835e-05, "loss": 0.0563, "step": 16974 }, { "epoch": 7.892143189214319, "grad_norm": 1.1352672576904297, "learning_rate": 2.5942254566748496e-05, "loss": 0.0444, "step": 16976 }, { "epoch": 7.893072989307299, "grad_norm": 0.894137442111969, "learning_rate": 2.58637660343619e-05, "loss": 0.0501, "step": 16978 }, { "epoch": 7.894002789400279, "grad_norm": 1.1908776760101318, "learning_rate": 2.578526897695335e-05, "loss": 0.0311, "step": 16980 }, { "epoch": 7.8949325894932585, "grad_norm": 1.596888542175293, "learning_rate": 2.570676416925702e-05, "loss": 0.0602, "step": 16982 }, { "epoch": 7.895862389586239, "grad_norm": 1.2478244304656982, "learning_rate": 2.5628252386083586e-05, "loss": 0.0411, "step": 16984 }, { "epoch": 7.896792189679219, "grad_norm": 0.8717209100723267, "learning_rate": 2.5549734402312823e-05, "loss": 0.0297, "step": 16986 }, { "epoch": 7.897721989772199, "grad_norm": 2.8928956985473633, "learning_rate": 2.5471210992885273e-05, "loss": 0.0567, "step": 16988 }, { "epoch": 7.898651789865179, "grad_norm": 1.469499111175537, "learning_rate": 2.5392682932795635e-05, "loss": 0.0454, "step": 16990 }, { "epoch": 7.899581589958159, "grad_norm": 2.0605990886688232, "learning_rate": 2.5314150997083986e-05, "loss": 0.064, "step": 16992 }, { "epoch": 7.900511390051139, "grad_norm": 1.8408159017562866, "learning_rate": 2.5235615960828727e-05, "loss": 0.0397, "step": 16994 }, { "epoch": 7.901441190144119, "grad_norm": 1.499558687210083, "learning_rate": 2.5157078599139146e-05, "loss": 0.0406, "step": 16996 }, { "epoch": 7.902370990237099, "grad_norm": 1.661634922027588, "learning_rate": 2.5078539687147124e-05, "loss": 0.0484, "step": 16998 }, { "epoch": 7.903300790330079, "grad_norm": 1.7572276592254639, "learning_rate": 2.5000000000000184e-05, "loss": 0.0464, "step": 17000 }, { "epoch": 7.903300790330079, "eval_cer": 0.14354584424552763, "eval_loss": 0.22967231273651123, "eval_runtime": 401.9709, "eval_samples_per_second": 31.579, "eval_steps_per_second": 0.988, "step": 17000 }, { "epoch": 7.904230590423059, "grad_norm": 1.0240886211395264, "learning_rate": 2.4921460312853153e-05, "loss": 0.0219, "step": 17002 }, { "epoch": 7.9051603905160395, "grad_norm": 2.001498222351074, "learning_rate": 2.4842921400861127e-05, "loss": 0.0478, "step": 17004 }, { "epoch": 7.906090190609019, "grad_norm": 1.5566520690917969, "learning_rate": 2.476438403917155e-05, "loss": 0.0347, "step": 17006 }, { "epoch": 7.907019990701999, "grad_norm": 0.9786196947097778, "learning_rate": 2.468584900291629e-05, "loss": 0.0291, "step": 17008 }, { "epoch": 7.907949790794979, "grad_norm": 1.2961280345916748, "learning_rate": 2.4607317067204638e-05, "loss": 0.0327, "step": 17010 }, { "epoch": 7.908879590887959, "grad_norm": 1.1175256967544556, "learning_rate": 2.4528789007115e-05, "loss": 0.0553, "step": 17012 }, { "epoch": 7.909809390980939, "grad_norm": 1.1178295612335205, "learning_rate": 2.4450265597687535e-05, "loss": 0.0282, "step": 17014 }, { "epoch": 7.910739191073919, "grad_norm": 1.1712572574615479, "learning_rate": 2.4371747613916684e-05, "loss": 0.0251, "step": 17016 }, { "epoch": 7.911668991166899, "grad_norm": 1.63914954662323, "learning_rate": 2.4293235830743252e-05, "loss": 0.0632, "step": 17018 }, { "epoch": 7.912598791259879, "grad_norm": 1.0681523084640503, "learning_rate": 2.4214731023046925e-05, "loss": 0.0464, "step": 17020 }, { "epoch": 7.9135285913528595, "grad_norm": 1.169343113899231, "learning_rate": 2.4136233965638286e-05, "loss": 0.0625, "step": 17022 }, { "epoch": 7.914458391445839, "grad_norm": 1.1800718307495117, "learning_rate": 2.4057745433251777e-05, "loss": 0.0456, "step": 17024 }, { "epoch": 7.915388191538819, "grad_norm": 1.4794801473617554, "learning_rate": 2.397926620053744e-05, "loss": 0.0311, "step": 17026 }, { "epoch": 7.916317991631799, "grad_norm": 1.520194172859192, "learning_rate": 2.3900797042053436e-05, "loss": 0.0357, "step": 17028 }, { "epoch": 7.91724779172478, "grad_norm": 1.0426750183105469, "learning_rate": 2.382233873225904e-05, "loss": 0.0452, "step": 17030 }, { "epoch": 7.918177591817759, "grad_norm": 0.6891811490058899, "learning_rate": 2.3743892045505828e-05, "loss": 0.0273, "step": 17032 }, { "epoch": 7.919107391910739, "grad_norm": 0.8722942471504211, "learning_rate": 2.3665457756031107e-05, "loss": 0.0352, "step": 17034 }, { "epoch": 7.920037192003719, "grad_norm": 1.1189128160476685, "learning_rate": 2.3587036637949557e-05, "loss": 0.0421, "step": 17036 }, { "epoch": 7.920966992096699, "grad_norm": 1.7127363681793213, "learning_rate": 2.3508629465245863e-05, "loss": 0.0443, "step": 17038 }, { "epoch": 7.9218967921896795, "grad_norm": 0.9019880294799805, "learning_rate": 2.3430237011767347e-05, "loss": 0.0221, "step": 17040 }, { "epoch": 7.922826592282659, "grad_norm": 1.362216591835022, "learning_rate": 2.3351860051215606e-05, "loss": 0.0609, "step": 17042 }, { "epoch": 7.92375639237564, "grad_norm": 0.778521716594696, "learning_rate": 2.327349935713995e-05, "loss": 0.0407, "step": 17044 }, { "epoch": 7.924686192468619, "grad_norm": 1.730947732925415, "learning_rate": 2.3195155702928633e-05, "loss": 0.0483, "step": 17046 }, { "epoch": 7.9256159925616, "grad_norm": 1.8926277160644531, "learning_rate": 2.3116829861801785e-05, "loss": 0.0424, "step": 17048 }, { "epoch": 7.926545792654579, "grad_norm": 0.9496641159057617, "learning_rate": 2.303852260680403e-05, "loss": 0.018, "step": 17050 }, { "epoch": 7.927475592747559, "grad_norm": 0.5569877028465271, "learning_rate": 2.2960234710796172e-05, "loss": 0.0221, "step": 17052 }, { "epoch": 7.928405392840539, "grad_norm": 1.285869836807251, "learning_rate": 2.2881966946448322e-05, "loss": 0.0363, "step": 17054 }, { "epoch": 7.929335192933519, "grad_norm": 0.6855406761169434, "learning_rate": 2.280372008623154e-05, "loss": 0.0133, "step": 17056 }, { "epoch": 7.9302649930264995, "grad_norm": 1.6617883443832397, "learning_rate": 2.272549490241083e-05, "loss": 0.0359, "step": 17058 }, { "epoch": 7.931194793119479, "grad_norm": 1.7353312969207764, "learning_rate": 2.264729216703727e-05, "loss": 0.0591, "step": 17060 }, { "epoch": 7.9321245932124596, "grad_norm": 2.569652795791626, "learning_rate": 2.2569112651940104e-05, "loss": 0.0285, "step": 17062 }, { "epoch": 7.933054393305439, "grad_norm": 1.058506727218628, "learning_rate": 2.2490957128719766e-05, "loss": 0.0265, "step": 17064 }, { "epoch": 7.93398419339842, "grad_norm": 0.6668624877929688, "learning_rate": 2.2412826368739535e-05, "loss": 0.0646, "step": 17066 }, { "epoch": 7.934913993491399, "grad_norm": 0.8597091436386108, "learning_rate": 2.2334721143118646e-05, "loss": 0.0345, "step": 17068 }, { "epoch": 7.93584379358438, "grad_norm": 1.2284756898880005, "learning_rate": 2.2256642222723974e-05, "loss": 0.0494, "step": 17070 }, { "epoch": 7.936773593677359, "grad_norm": 1.7570805549621582, "learning_rate": 2.2178590378163024e-05, "loss": 0.0527, "step": 17072 }, { "epoch": 7.93770339377034, "grad_norm": 0.7102115750312805, "learning_rate": 2.210056637977608e-05, "loss": 0.0438, "step": 17074 }, { "epoch": 7.9386331938633194, "grad_norm": 0.8482915759086609, "learning_rate": 2.202257099762833e-05, "loss": 0.0264, "step": 17076 }, { "epoch": 7.939562993956299, "grad_norm": 0.44240614771842957, "learning_rate": 2.1944605001502887e-05, "loss": 0.0135, "step": 17078 }, { "epoch": 7.9404927940492795, "grad_norm": 0.9551742076873779, "learning_rate": 2.1866669160892564e-05, "loss": 0.0252, "step": 17080 }, { "epoch": 7.941422594142259, "grad_norm": 1.002084493637085, "learning_rate": 2.1788764244992556e-05, "loss": 0.0334, "step": 17082 }, { "epoch": 7.94235239423524, "grad_norm": 1.1256871223449707, "learning_rate": 2.1710891022693023e-05, "loss": 0.0424, "step": 17084 }, { "epoch": 7.943282194328219, "grad_norm": 0.43199700117111206, "learning_rate": 2.1633050262571233e-05, "loss": 0.0279, "step": 17086 }, { "epoch": 7.9442119944212, "grad_norm": 1.3706029653549194, "learning_rate": 2.1555242732884143e-05, "loss": 0.0388, "step": 17088 }, { "epoch": 7.945141794514179, "grad_norm": 0.9761912226676941, "learning_rate": 2.1477469201560578e-05, "loss": 0.0254, "step": 17090 }, { "epoch": 7.94607159460716, "grad_norm": 1.3571336269378662, "learning_rate": 2.1399730436193802e-05, "loss": 0.0293, "step": 17092 }, { "epoch": 7.947001394700139, "grad_norm": 0.8218622803688049, "learning_rate": 2.132202720403422e-05, "loss": 0.031, "step": 17094 }, { "epoch": 7.947931194793119, "grad_norm": 0.3939952850341797, "learning_rate": 2.124436027198119e-05, "loss": 0.0194, "step": 17096 }, { "epoch": 7.9488609948860995, "grad_norm": 0.46943050622940063, "learning_rate": 2.1166730406576063e-05, "loss": 0.0306, "step": 17098 }, { "epoch": 7.949790794979079, "grad_norm": 1.2978891134262085, "learning_rate": 2.1089138373994355e-05, "loss": 0.0581, "step": 17100 }, { "epoch": 7.95072059507206, "grad_norm": 1.437003493309021, "learning_rate": 2.1011584940037927e-05, "loss": 0.0391, "step": 17102 }, { "epoch": 7.951650395165039, "grad_norm": 2.1572680473327637, "learning_rate": 2.0934070870128047e-05, "loss": 0.0619, "step": 17104 }, { "epoch": 7.95258019525802, "grad_norm": 0.8763570189476013, "learning_rate": 2.08565969292971e-05, "loss": 0.0208, "step": 17106 }, { "epoch": 7.953509995350999, "grad_norm": 1.3702055215835571, "learning_rate": 2.0779163882181794e-05, "loss": 0.0366, "step": 17108 }, { "epoch": 7.95443979544398, "grad_norm": 0.7137638330459595, "learning_rate": 2.0701772493014777e-05, "loss": 0.0497, "step": 17110 }, { "epoch": 7.955369595536959, "grad_norm": 0.9447154402732849, "learning_rate": 2.0624423525618162e-05, "loss": 0.0494, "step": 17112 }, { "epoch": 7.95629939562994, "grad_norm": 1.087967872619629, "learning_rate": 2.0547117743394856e-05, "loss": 0.0434, "step": 17114 }, { "epoch": 7.9572291957229195, "grad_norm": 1.0905892848968506, "learning_rate": 2.0469855909321638e-05, "loss": 0.0342, "step": 17116 }, { "epoch": 7.9581589958159, "grad_norm": 0.7249439358711243, "learning_rate": 2.0392638785941786e-05, "loss": 0.0248, "step": 17118 }, { "epoch": 7.95908879590888, "grad_norm": 1.1671842336654663, "learning_rate": 2.0315467135356965e-05, "loss": 0.0369, "step": 17120 }, { "epoch": 7.960018596001859, "grad_norm": 1.929055094718933, "learning_rate": 2.0238341719220383e-05, "loss": 0.0399, "step": 17122 }, { "epoch": 7.96094839609484, "grad_norm": 1.3251698017120361, "learning_rate": 2.0161263298728583e-05, "loss": 0.0312, "step": 17124 }, { "epoch": 7.961878196187819, "grad_norm": 1.9260154962539673, "learning_rate": 2.008423263461455e-05, "loss": 0.0424, "step": 17126 }, { "epoch": 7.9628079962808, "grad_norm": 0.8540048003196716, "learning_rate": 2.0007250487139924e-05, "loss": 0.0181, "step": 17128 }, { "epoch": 7.963737796373779, "grad_norm": 0.8400813937187195, "learning_rate": 1.9930317616087247e-05, "loss": 0.0362, "step": 17130 }, { "epoch": 7.96466759646676, "grad_norm": 1.1808228492736816, "learning_rate": 1.9853434780753075e-05, "loss": 0.024, "step": 17132 }, { "epoch": 7.9655973965597395, "grad_norm": 0.9030763506889343, "learning_rate": 1.9776602739939863e-05, "loss": 0.023, "step": 17134 }, { "epoch": 7.96652719665272, "grad_norm": 1.3473292589187622, "learning_rate": 1.969982225194875e-05, "loss": 0.0569, "step": 17136 }, { "epoch": 7.9674569967457, "grad_norm": 1.6183627843856812, "learning_rate": 1.9623094074572244e-05, "loss": 0.0485, "step": 17138 }, { "epoch": 7.968386796838679, "grad_norm": 1.2642794847488403, "learning_rate": 1.9546418965086476e-05, "loss": 0.0265, "step": 17140 }, { "epoch": 7.96931659693166, "grad_norm": 0.9130685925483704, "learning_rate": 1.9469797680243913e-05, "loss": 0.0468, "step": 17142 }, { "epoch": 7.970246397024639, "grad_norm": 1.135852336883545, "learning_rate": 1.939323097626561e-05, "loss": 0.0187, "step": 17144 }, { "epoch": 7.97117619711762, "grad_norm": 0.9032883048057556, "learning_rate": 1.931671960883394e-05, "loss": 0.014, "step": 17146 }, { "epoch": 7.972105997210599, "grad_norm": 1.1002949476242065, "learning_rate": 1.9240264333085384e-05, "loss": 0.0317, "step": 17148 }, { "epoch": 7.97303579730358, "grad_norm": 0.6478521823883057, "learning_rate": 1.9163865903602475e-05, "loss": 0.0167, "step": 17150 }, { "epoch": 7.9739655973965595, "grad_norm": 0.9623079895973206, "learning_rate": 1.9087525074406953e-05, "loss": 0.0245, "step": 17152 }, { "epoch": 7.97489539748954, "grad_norm": 0.539216160774231, "learning_rate": 1.9011242598951986e-05, "loss": 0.0264, "step": 17154 }, { "epoch": 7.97582519758252, "grad_norm": 0.8583088517189026, "learning_rate": 1.8935019230114897e-05, "loss": 0.0229, "step": 17156 }, { "epoch": 7.9767549976755, "grad_norm": 0.47310149669647217, "learning_rate": 1.8858855720189472e-05, "loss": 0.0293, "step": 17158 }, { "epoch": 7.97768479776848, "grad_norm": 0.886222243309021, "learning_rate": 1.8782752820878722e-05, "loss": 0.0297, "step": 17160 }, { "epoch": 7.97861459786146, "grad_norm": 0.9984604716300964, "learning_rate": 1.8706711283287708e-05, "loss": 0.022, "step": 17162 }, { "epoch": 7.97954439795444, "grad_norm": 1.0886180400848389, "learning_rate": 1.8630731857915544e-05, "loss": 0.0246, "step": 17164 }, { "epoch": 7.980474198047419, "grad_norm": 1.3979657888412476, "learning_rate": 1.855481529464856e-05, "loss": 0.0282, "step": 17166 }, { "epoch": 7.9814039981404, "grad_norm": 0.8585319519042969, "learning_rate": 1.8478962342752685e-05, "loss": 0.0428, "step": 17168 }, { "epoch": 7.9823337982333795, "grad_norm": 1.138187050819397, "learning_rate": 1.8403173750865746e-05, "loss": 0.0439, "step": 17170 }, { "epoch": 7.98326359832636, "grad_norm": 1.4926069974899292, "learning_rate": 1.8327450266990725e-05, "loss": 0.0409, "step": 17172 }, { "epoch": 7.98419339841934, "grad_norm": 1.3336563110351562, "learning_rate": 1.8251792638487664e-05, "loss": 0.0217, "step": 17174 }, { "epoch": 7.98512319851232, "grad_norm": 1.5885719060897827, "learning_rate": 1.8176201612066987e-05, "loss": 0.0269, "step": 17176 }, { "epoch": 7.9860529986053, "grad_norm": 0.5965472459793091, "learning_rate": 1.8100677933781523e-05, "loss": 0.0311, "step": 17178 }, { "epoch": 7.98698279869828, "grad_norm": 0.8130292892456055, "learning_rate": 1.802522234901931e-05, "loss": 0.0259, "step": 17180 }, { "epoch": 7.98791259879126, "grad_norm": 1.359251856803894, "learning_rate": 1.794983560249685e-05, "loss": 0.0203, "step": 17182 }, { "epoch": 7.988842398884239, "grad_norm": 1.838219165802002, "learning_rate": 1.7874518438250645e-05, "loss": 0.0495, "step": 17184 }, { "epoch": 7.98977219897722, "grad_norm": 1.2322708368301392, "learning_rate": 1.7799271599630864e-05, "loss": 0.0463, "step": 17186 }, { "epoch": 7.9907019990702, "grad_norm": 0.7383685111999512, "learning_rate": 1.772409582929334e-05, "loss": 0.0224, "step": 17188 }, { "epoch": 7.99163179916318, "grad_norm": 0.8839756846427917, "learning_rate": 1.7648991869192503e-05, "loss": 0.0297, "step": 17190 }, { "epoch": 7.99256159925616, "grad_norm": 0.3749838173389435, "learning_rate": 1.7573960460574272e-05, "loss": 0.0209, "step": 17192 }, { "epoch": 7.99349139934914, "grad_norm": 1.4395751953125, "learning_rate": 1.7499002343968115e-05, "loss": 0.0435, "step": 17194 }, { "epoch": 7.99442119944212, "grad_norm": 0.8017077445983887, "learning_rate": 1.742411825918072e-05, "loss": 0.0265, "step": 17196 }, { "epoch": 7.9953509995351, "grad_norm": 1.3616739511489868, "learning_rate": 1.7349308945287514e-05, "loss": 0.0668, "step": 17198 }, { "epoch": 7.99628079962808, "grad_norm": 0.9301283359527588, "learning_rate": 1.7274575140626396e-05, "loss": 0.0193, "step": 17200 }, { "epoch": 7.99721059972106, "grad_norm": 0.5836116075515747, "learning_rate": 1.7199917582789755e-05, "loss": 0.0165, "step": 17202 }, { "epoch": 7.99814039981404, "grad_norm": 1.43370521068573, "learning_rate": 1.7125337008617467e-05, "loss": 0.0245, "step": 17204 }, { "epoch": 7.99907019990702, "grad_norm": 0.6531291604042053, "learning_rate": 1.7050834154189862e-05, "loss": 0.0444, "step": 17206 }, { "epoch": 8.0, "grad_norm": 2.1169381141662598, "learning_rate": 1.6976409754819774e-05, "loss": 0.0509, "step": 17208 }, { "epoch": 8.00092980009298, "grad_norm": 1.1292816400527954, "learning_rate": 1.690206454504632e-05, "loss": 0.0225, "step": 17210 }, { "epoch": 8.00185960018596, "grad_norm": 0.8145692348480225, "learning_rate": 1.6827799258626544e-05, "loss": 0.0185, "step": 17212 }, { "epoch": 8.00278940027894, "grad_norm": 0.35627099871635437, "learning_rate": 1.6753614628528743e-05, "loss": 0.0095, "step": 17214 }, { "epoch": 8.00371920037192, "grad_norm": 0.669105589389801, "learning_rate": 1.6679511386925445e-05, "loss": 0.0104, "step": 17216 }, { "epoch": 8.0046490004649, "grad_norm": 0.3783988058567047, "learning_rate": 1.660549026518556e-05, "loss": 0.0109, "step": 17218 }, { "epoch": 8.00557880055788, "grad_norm": 0.7230713367462158, "learning_rate": 1.6531551993867757e-05, "loss": 0.0136, "step": 17220 }, { "epoch": 8.00650860065086, "grad_norm": 1.7853714227676392, "learning_rate": 1.6457697302713006e-05, "loss": 0.0265, "step": 17222 }, { "epoch": 8.00743840074384, "grad_norm": 0.35101157426834106, "learning_rate": 1.6383926920637124e-05, "loss": 0.0149, "step": 17224 }, { "epoch": 8.00836820083682, "grad_norm": 0.9217919707298279, "learning_rate": 1.6310241575724168e-05, "loss": 0.0185, "step": 17226 }, { "epoch": 8.0092980009298, "grad_norm": 0.7999720573425293, "learning_rate": 1.6236641995218585e-05, "loss": 0.014, "step": 17228 }, { "epoch": 8.010227801022781, "grad_norm": 0.4854505956172943, "learning_rate": 1.616312890551868e-05, "loss": 0.0128, "step": 17230 }, { "epoch": 8.01115760111576, "grad_norm": 0.9401900768280029, "learning_rate": 1.6089703032168883e-05, "loss": 0.0203, "step": 17232 }, { "epoch": 8.01208740120874, "grad_norm": 0.3969458043575287, "learning_rate": 1.6016365099852762e-05, "loss": 0.026, "step": 17234 }, { "epoch": 8.01301720130172, "grad_norm": 0.7704681754112244, "learning_rate": 1.594311583238643e-05, "loss": 0.014, "step": 17236 }, { "epoch": 8.0139470013947, "grad_norm": 1.13335382938385, "learning_rate": 1.5869955952710342e-05, "loss": 0.0252, "step": 17238 }, { "epoch": 8.01487680148768, "grad_norm": 0.7220318913459778, "learning_rate": 1.5796886182883137e-05, "loss": 0.0134, "step": 17240 }, { "epoch": 8.01580660158066, "grad_norm": 0.6039432883262634, "learning_rate": 1.572390724407377e-05, "loss": 0.0182, "step": 17242 }, { "epoch": 8.01673640167364, "grad_norm": 0.40525299310684204, "learning_rate": 1.565101985655508e-05, "loss": 0.0222, "step": 17244 }, { "epoch": 8.01766620176662, "grad_norm": 1.0951064825057983, "learning_rate": 1.557822473969607e-05, "loss": 0.019, "step": 17246 }, { "epoch": 8.018596001859601, "grad_norm": 0.8766157627105713, "learning_rate": 1.5505522611954988e-05, "loss": 0.0139, "step": 17248 }, { "epoch": 8.01952580195258, "grad_norm": 0.8260278701782227, "learning_rate": 1.5432914190872818e-05, "loss": 0.0179, "step": 17250 }, { "epoch": 8.02045560204556, "grad_norm": 1.3827780485153198, "learning_rate": 1.536040019306511e-05, "loss": 0.0193, "step": 17252 }, { "epoch": 8.02138540213854, "grad_norm": 1.7284228801727295, "learning_rate": 1.528798133421592e-05, "loss": 0.037, "step": 17254 }, { "epoch": 8.02231520223152, "grad_norm": 0.31003743410110474, "learning_rate": 1.5215658329070054e-05, "loss": 0.0095, "step": 17256 }, { "epoch": 8.0232450023245, "grad_norm": 0.5172628164291382, "learning_rate": 1.5143431891426301e-05, "loss": 0.0447, "step": 17258 }, { "epoch": 8.02417480241748, "grad_norm": 1.3834909200668335, "learning_rate": 1.5071302734130606e-05, "loss": 0.0197, "step": 17260 }, { "epoch": 8.02510460251046, "grad_norm": 0.3123247027397156, "learning_rate": 1.4999271569068384e-05, "loss": 0.0103, "step": 17262 }, { "epoch": 8.02603440260344, "grad_norm": 0.8858749270439148, "learning_rate": 1.4927339107158482e-05, "loss": 0.019, "step": 17264 }, { "epoch": 8.026964202696421, "grad_norm": 1.035248875617981, "learning_rate": 1.4855506058345095e-05, "loss": 0.0158, "step": 17266 }, { "epoch": 8.0278940027894, "grad_norm": 0.6961906552314758, "learning_rate": 1.4783773131591332e-05, "loss": 0.0158, "step": 17268 }, { "epoch": 8.02882380288238, "grad_norm": 0.4202362895011902, "learning_rate": 1.4712141034872385e-05, "loss": 0.0097, "step": 17270 }, { "epoch": 8.02975360297536, "grad_norm": 0.5196175575256348, "learning_rate": 1.4640610475167964e-05, "loss": 0.0187, "step": 17272 }, { "epoch": 8.030683403068341, "grad_norm": 0.9038172364234924, "learning_rate": 1.4569182158455978e-05, "loss": 0.0262, "step": 17274 }, { "epoch": 8.03161320316132, "grad_norm": 0.29837775230407715, "learning_rate": 1.4497856789704917e-05, "loss": 0.0151, "step": 17276 }, { "epoch": 8.0325430032543, "grad_norm": 0.4513050615787506, "learning_rate": 1.4426635072867455e-05, "loss": 0.0238, "step": 17278 }, { "epoch": 8.03347280334728, "grad_norm": 1.3143779039382935, "learning_rate": 1.4355517710873265e-05, "loss": 0.0259, "step": 17280 }, { "epoch": 8.03440260344026, "grad_norm": 0.29148927330970764, "learning_rate": 1.4284505405621838e-05, "loss": 0.0101, "step": 17282 }, { "epoch": 8.035332403533241, "grad_norm": 0.5844970345497131, "learning_rate": 1.4213598857976117e-05, "loss": 0.0131, "step": 17284 }, { "epoch": 8.03626220362622, "grad_norm": 0.3854750990867615, "learning_rate": 1.4142798767754943e-05, "loss": 0.0094, "step": 17286 }, { "epoch": 8.0371920037192, "grad_norm": 1.2031219005584717, "learning_rate": 1.4072105833726779e-05, "loss": 0.0314, "step": 17288 }, { "epoch": 8.03812180381218, "grad_norm": 0.672511637210846, "learning_rate": 1.4001520753602187e-05, "loss": 0.0207, "step": 17290 }, { "epoch": 8.039051603905161, "grad_norm": 0.2786092460155487, "learning_rate": 1.3931044224027497e-05, "loss": 0.0104, "step": 17292 }, { "epoch": 8.03998140399814, "grad_norm": 0.23906134068965912, "learning_rate": 1.386067694057767e-05, "loss": 0.008, "step": 17294 }, { "epoch": 8.04091120409112, "grad_norm": 1.2297064065933228, "learning_rate": 1.3790419597749233e-05, "loss": 0.0176, "step": 17296 }, { "epoch": 8.0418410041841, "grad_norm": 0.5838314890861511, "learning_rate": 1.372027288895396e-05, "loss": 0.0116, "step": 17298 }, { "epoch": 8.04277080427708, "grad_norm": 0.9650667905807495, "learning_rate": 1.365023750651146e-05, "loss": 0.0196, "step": 17300 }, { "epoch": 8.043700604370061, "grad_norm": 0.8134907484054565, "learning_rate": 1.3580314141642606e-05, "loss": 0.0179, "step": 17302 }, { "epoch": 8.04463040446304, "grad_norm": 1.7696505784988403, "learning_rate": 1.351050348446286e-05, "loss": 0.0173, "step": 17304 }, { "epoch": 8.04556020455602, "grad_norm": 1.2148170471191406, "learning_rate": 1.3440806223975225e-05, "loss": 0.0113, "step": 17306 }, { "epoch": 8.046490004649, "grad_norm": 1.0775004625320435, "learning_rate": 1.3371223048063605e-05, "loss": 0.023, "step": 17308 }, { "epoch": 8.047419804741981, "grad_norm": 0.8164393305778503, "learning_rate": 1.3301754643485773e-05, "loss": 0.0219, "step": 17310 }, { "epoch": 8.04834960483496, "grad_norm": 0.8094502687454224, "learning_rate": 1.3232401695866758e-05, "loss": 0.0155, "step": 17312 }, { "epoch": 8.04927940492794, "grad_norm": 1.8713628053665161, "learning_rate": 1.3163164889692313e-05, "loss": 0.0385, "step": 17314 }, { "epoch": 8.05020920502092, "grad_norm": 0.5116755962371826, "learning_rate": 1.3094044908301526e-05, "loss": 0.0093, "step": 17316 }, { "epoch": 8.051139005113901, "grad_norm": 0.6148163676261902, "learning_rate": 1.3025042433881021e-05, "loss": 0.0162, "step": 17318 }, { "epoch": 8.052068805206881, "grad_norm": 0.46956488490104675, "learning_rate": 1.2956158147457206e-05, "loss": 0.0131, "step": 17320 }, { "epoch": 8.05299860529986, "grad_norm": 0.3472520411014557, "learning_rate": 1.2887392728890102e-05, "loss": 0.0125, "step": 17322 }, { "epoch": 8.05392840539284, "grad_norm": 0.3019677698612213, "learning_rate": 1.2818746856866789e-05, "loss": 0.0111, "step": 17324 }, { "epoch": 8.05485820548582, "grad_norm": 1.5772067308425903, "learning_rate": 1.2750221208894147e-05, "loss": 0.0274, "step": 17326 }, { "epoch": 8.055788005578801, "grad_norm": 0.6581627130508423, "learning_rate": 1.2681816461292816e-05, "loss": 0.0106, "step": 17328 }, { "epoch": 8.05671780567178, "grad_norm": 0.5217828154563904, "learning_rate": 1.2613533289189804e-05, "loss": 0.0106, "step": 17330 }, { "epoch": 8.05764760576476, "grad_norm": 0.6574950218200684, "learning_rate": 1.2545372366512766e-05, "loss": 0.0234, "step": 17332 }, { "epoch": 8.05857740585774, "grad_norm": 0.2643159329891205, "learning_rate": 1.247733436598233e-05, "loss": 0.0101, "step": 17334 }, { "epoch": 8.059507205950721, "grad_norm": 0.7043961882591248, "learning_rate": 1.2409419959106022e-05, "loss": 0.0102, "step": 17336 }, { "epoch": 8.060437006043701, "grad_norm": 0.7438539266586304, "learning_rate": 1.2341629816171771e-05, "loss": 0.0101, "step": 17338 }, { "epoch": 8.06136680613668, "grad_norm": 1.2943100929260254, "learning_rate": 1.2273964606240767e-05, "loss": 0.0183, "step": 17340 }, { "epoch": 8.06229660622966, "grad_norm": 0.4569284915924072, "learning_rate": 1.2206424997141468e-05, "loss": 0.0107, "step": 17342 }, { "epoch": 8.06322640632264, "grad_norm": 1.0860191583633423, "learning_rate": 1.2139011655462396e-05, "loss": 0.0115, "step": 17344 }, { "epoch": 8.064156206415621, "grad_norm": 1.2265652418136597, "learning_rate": 1.2071725246546092e-05, "loss": 0.018, "step": 17346 }, { "epoch": 8.0650860065086, "grad_norm": 0.5949340462684631, "learning_rate": 1.2004566434482327e-05, "loss": 0.0129, "step": 17348 }, { "epoch": 8.06601580660158, "grad_norm": 1.3673235177993774, "learning_rate": 1.1937535882101307e-05, "loss": 0.0244, "step": 17350 }, { "epoch": 8.06694560669456, "grad_norm": 0.4188942611217499, "learning_rate": 1.1870634250967677e-05, "loss": 0.0169, "step": 17352 }, { "epoch": 8.067875406787541, "grad_norm": 0.4273422956466675, "learning_rate": 1.1803862201373452e-05, "loss": 0.0137, "step": 17354 }, { "epoch": 8.068805206880521, "grad_norm": 0.31050029397010803, "learning_rate": 1.1737220392331726e-05, "loss": 0.0143, "step": 17356 }, { "epoch": 8.0697350069735, "grad_norm": 0.7280254364013672, "learning_rate": 1.1670709481570328e-05, "loss": 0.0176, "step": 17358 }, { "epoch": 8.07066480706648, "grad_norm": 1.2220388650894165, "learning_rate": 1.1604330125525093e-05, "loss": 0.0204, "step": 17360 }, { "epoch": 8.071594607159462, "grad_norm": 0.8993173837661743, "learning_rate": 1.1538082979333544e-05, "loss": 0.0251, "step": 17362 }, { "epoch": 8.072524407252441, "grad_norm": 0.5351976752281189, "learning_rate": 1.1471968696828194e-05, "loss": 0.0302, "step": 17364 }, { "epoch": 8.07345420734542, "grad_norm": 0.6942078471183777, "learning_rate": 1.1405987930530245e-05, "loss": 0.0178, "step": 17366 }, { "epoch": 8.0743840074384, "grad_norm": 1.3543479442596436, "learning_rate": 1.1340141331643373e-05, "loss": 0.0243, "step": 17368 }, { "epoch": 8.07531380753138, "grad_norm": 1.4989315271377563, "learning_rate": 1.1274429550046773e-05, "loss": 0.026, "step": 17370 }, { "epoch": 8.076243607624361, "grad_norm": 0.2220623940229416, "learning_rate": 1.1208853234289278e-05, "loss": 0.0133, "step": 17372 }, { "epoch": 8.077173407717341, "grad_norm": 0.6024366021156311, "learning_rate": 1.1143413031582648e-05, "loss": 0.0132, "step": 17374 }, { "epoch": 8.07810320781032, "grad_norm": 0.9483928084373474, "learning_rate": 1.1078109587795342e-05, "loss": 0.0107, "step": 17376 }, { "epoch": 8.0790330079033, "grad_norm": 0.4624194800853729, "learning_rate": 1.101294354744591e-05, "loss": 0.0123, "step": 17378 }, { "epoch": 8.079962807996282, "grad_norm": 0.423808217048645, "learning_rate": 1.0947915553696784e-05, "loss": 0.0111, "step": 17380 }, { "epoch": 8.080892608089261, "grad_norm": 0.3131750822067261, "learning_rate": 1.0883026248348165e-05, "loss": 0.0128, "step": 17382 }, { "epoch": 8.08182240818224, "grad_norm": 0.1930091828107834, "learning_rate": 1.0818276271831145e-05, "loss": 0.0075, "step": 17384 }, { "epoch": 8.08275220827522, "grad_norm": 0.2787240147590637, "learning_rate": 1.0753666263201931e-05, "loss": 0.015, "step": 17386 }, { "epoch": 8.0836820083682, "grad_norm": 0.9827878475189209, "learning_rate": 1.0689196860135292e-05, "loss": 0.018, "step": 17388 }, { "epoch": 8.084611808461181, "grad_norm": 0.9424211978912354, "learning_rate": 1.0624868698918067e-05, "loss": 0.0299, "step": 17390 }, { "epoch": 8.085541608554161, "grad_norm": 0.4747207462787628, "learning_rate": 1.056068241444338e-05, "loss": 0.0152, "step": 17392 }, { "epoch": 8.08647140864714, "grad_norm": 0.39886075258255005, "learning_rate": 1.0496638640203805e-05, "loss": 0.0302, "step": 17394 }, { "epoch": 8.08740120874012, "grad_norm": 1.1616275310516357, "learning_rate": 1.0432738008285674e-05, "loss": 0.0188, "step": 17396 }, { "epoch": 8.088331008833102, "grad_norm": 0.5175045728683472, "learning_rate": 1.0368981149362366e-05, "loss": 0.0139, "step": 17398 }, { "epoch": 8.089260808926081, "grad_norm": 1.114249587059021, "learning_rate": 1.0305368692688185e-05, "loss": 0.0175, "step": 17400 }, { "epoch": 8.09019060901906, "grad_norm": 0.37740692496299744, "learning_rate": 1.0241901266092689e-05, "loss": 0.0105, "step": 17402 }, { "epoch": 8.09112040911204, "grad_norm": 1.1847525835037231, "learning_rate": 1.017857949597354e-05, "loss": 0.0166, "step": 17404 }, { "epoch": 8.092050209205022, "grad_norm": 0.4020256996154785, "learning_rate": 1.0115404007291187e-05, "loss": 0.0101, "step": 17406 }, { "epoch": 8.092980009298001, "grad_norm": 0.2997359037399292, "learning_rate": 1.0052375423562056e-05, "loss": 0.0163, "step": 17408 }, { "epoch": 8.093909809390981, "grad_norm": 0.550014078617096, "learning_rate": 9.989494366852965e-06, "loss": 0.0117, "step": 17410 }, { "epoch": 8.09483960948396, "grad_norm": 1.4213244915008545, "learning_rate": 9.926761457774417e-06, "loss": 0.0222, "step": 17412 }, { "epoch": 8.09576940957694, "grad_norm": 0.8321213722229004, "learning_rate": 9.864177315474968e-06, "loss": 0.0124, "step": 17414 }, { "epoch": 8.096699209669922, "grad_norm": 0.7673649191856384, "learning_rate": 9.801742557634908e-06, "loss": 0.0154, "step": 17416 }, { "epoch": 8.097629009762901, "grad_norm": 0.9099611639976501, "learning_rate": 9.739457800459948e-06, "loss": 0.0199, "step": 17418 }, { "epoch": 8.09855880985588, "grad_norm": 2.5416977405548096, "learning_rate": 9.677323658675638e-06, "loss": 0.0229, "step": 17420 }, { "epoch": 8.09948860994886, "grad_norm": 0.22554749250411987, "learning_rate": 9.6153407455208e-06, "loss": 0.0125, "step": 17422 }, { "epoch": 8.100418410041842, "grad_norm": 1.2731984853744507, "learning_rate": 9.553509672741697e-06, "loss": 0.0166, "step": 17424 }, { "epoch": 8.101348210134821, "grad_norm": 2.379606008529663, "learning_rate": 9.491831050586125e-06, "loss": 0.0402, "step": 17426 }, { "epoch": 8.102278010227801, "grad_norm": 0.9077581167221069, "learning_rate": 9.430305487797183e-06, "loss": 0.0428, "step": 17428 }, { "epoch": 8.10320781032078, "grad_norm": 1.1082916259765625, "learning_rate": 9.368933591607402e-06, "loss": 0.0205, "step": 17430 }, { "epoch": 8.104137610413762, "grad_norm": 0.9430263638496399, "learning_rate": 9.30771596773256e-06, "loss": 0.0303, "step": 17432 }, { "epoch": 8.105067410506742, "grad_norm": 0.9860255718231201, "learning_rate": 9.246653220365813e-06, "loss": 0.0103, "step": 17434 }, { "epoch": 8.105997210599721, "grad_norm": 0.2661532461643219, "learning_rate": 9.185745952171968e-06, "loss": 0.0068, "step": 17436 }, { "epoch": 8.1069270106927, "grad_norm": 1.191781759262085, "learning_rate": 9.124994764281034e-06, "loss": 0.0213, "step": 17438 }, { "epoch": 8.10785681078568, "grad_norm": 0.35087910294532776, "learning_rate": 9.064400256282775e-06, "loss": 0.0152, "step": 17440 }, { "epoch": 8.108786610878662, "grad_norm": 0.5969927310943604, "learning_rate": 9.003963026220597e-06, "loss": 0.0176, "step": 17442 }, { "epoch": 8.109716410971641, "grad_norm": 0.6165249347686768, "learning_rate": 8.943683670585417e-06, "loss": 0.0106, "step": 17444 }, { "epoch": 8.110646211064621, "grad_norm": 0.7748493552207947, "learning_rate": 8.883562784310281e-06, "loss": 0.0167, "step": 17446 }, { "epoch": 8.1115760111576, "grad_norm": 0.2931350767612457, "learning_rate": 8.823600960763925e-06, "loss": 0.0201, "step": 17448 }, { "epoch": 8.112505811250582, "grad_norm": 0.4611387550830841, "learning_rate": 8.763798791745479e-06, "loss": 0.0192, "step": 17450 }, { "epoch": 8.113435611343562, "grad_norm": 0.6139688491821289, "learning_rate": 8.704156867478071e-06, "loss": 0.0124, "step": 17452 }, { "epoch": 8.114365411436541, "grad_norm": 0.33424466848373413, "learning_rate": 8.644675776603483e-06, "loss": 0.0217, "step": 17454 }, { "epoch": 8.11529521152952, "grad_norm": 0.4767255485057831, "learning_rate": 8.585356106176136e-06, "loss": 0.0119, "step": 17456 }, { "epoch": 8.1162250116225, "grad_norm": 0.4439815282821655, "learning_rate": 8.526198441657095e-06, "loss": 0.0099, "step": 17458 }, { "epoch": 8.117154811715482, "grad_norm": 1.1206741333007812, "learning_rate": 8.467203366908756e-06, "loss": 0.0458, "step": 17460 }, { "epoch": 8.118084611808461, "grad_norm": 0.5768001079559326, "learning_rate": 8.408371464188563e-06, "loss": 0.0162, "step": 17462 }, { "epoch": 8.119014411901441, "grad_norm": 0.363972932100296, "learning_rate": 8.349703314143772e-06, "loss": 0.0166, "step": 17464 }, { "epoch": 8.11994421199442, "grad_norm": 0.42018958926200867, "learning_rate": 8.291199495805274e-06, "loss": 0.0178, "step": 17466 }, { "epoch": 8.120874012087402, "grad_norm": 0.3156043291091919, "learning_rate": 8.232860586582003e-06, "loss": 0.0122, "step": 17468 }, { "epoch": 8.121803812180381, "grad_norm": 0.7731499671936035, "learning_rate": 8.174687162255706e-06, "loss": 0.0101, "step": 17470 }, { "epoch": 8.122733612273361, "grad_norm": 0.2357454001903534, "learning_rate": 8.1166797969744e-06, "loss": 0.0133, "step": 17472 }, { "epoch": 8.12366341236634, "grad_norm": 1.5389155149459839, "learning_rate": 8.058839063247491e-06, "loss": 0.0233, "step": 17474 }, { "epoch": 8.124593212459322, "grad_norm": 0.7002127766609192, "learning_rate": 8.001165531939601e-06, "loss": 0.0087, "step": 17476 }, { "epoch": 8.125523012552302, "grad_norm": 0.25536811351776123, "learning_rate": 7.943659772265142e-06, "loss": 0.0095, "step": 17478 }, { "epoch": 8.126452812645281, "grad_norm": 0.5269440412521362, "learning_rate": 7.88632235178287e-06, "loss": 0.0193, "step": 17480 }, { "epoch": 8.127382612738261, "grad_norm": 1.0121365785598755, "learning_rate": 7.829153836389788e-06, "loss": 0.0156, "step": 17482 }, { "epoch": 8.12831241283124, "grad_norm": 0.20652812719345093, "learning_rate": 7.772154790316318e-06, "loss": 0.0122, "step": 17484 }, { "epoch": 8.129242212924222, "grad_norm": 0.16951608657836914, "learning_rate": 7.715325776119903e-06, "loss": 0.006, "step": 17486 }, { "epoch": 8.130172013017201, "grad_norm": 0.394057959318161, "learning_rate": 7.658667354679912e-06, "loss": 0.0083, "step": 17488 }, { "epoch": 8.131101813110181, "grad_norm": 0.20260852575302124, "learning_rate": 7.602180085192213e-06, "loss": 0.0094, "step": 17490 }, { "epoch": 8.13203161320316, "grad_norm": 0.4007527530193329, "learning_rate": 7.545864525163225e-06, "loss": 0.0146, "step": 17492 }, { "epoch": 8.132961413296142, "grad_norm": 0.2875538468360901, "learning_rate": 7.489721230404918e-06, "loss": 0.0114, "step": 17494 }, { "epoch": 8.133891213389122, "grad_norm": 0.7943793535232544, "learning_rate": 7.433750755028754e-06, "loss": 0.0133, "step": 17496 }, { "epoch": 8.134821013482101, "grad_norm": 0.9113589525222778, "learning_rate": 7.377953651440983e-06, "loss": 0.0183, "step": 17498 }, { "epoch": 8.135750813575081, "grad_norm": 0.49858641624450684, "learning_rate": 7.322330470336366e-06, "loss": 0.0099, "step": 17500 }, { "epoch": 8.13668061366806, "grad_norm": 0.5963518023490906, "learning_rate": 7.266881760693178e-06, "loss": 0.0148, "step": 17502 }, { "epoch": 8.137610413761042, "grad_norm": 0.8966203927993774, "learning_rate": 7.211608069767926e-06, "loss": 0.011, "step": 17504 }, { "epoch": 8.138540213854021, "grad_norm": 0.23595985770225525, "learning_rate": 7.1565099430894985e-06, "loss": 0.0142, "step": 17506 }, { "epoch": 8.139470013947001, "grad_norm": 0.27719786763191223, "learning_rate": 7.101587924454223e-06, "loss": 0.0128, "step": 17508 }, { "epoch": 8.14039981403998, "grad_norm": 1.2596172094345093, "learning_rate": 7.046842555920317e-06, "loss": 0.0119, "step": 17510 }, { "epoch": 8.141329614132962, "grad_norm": 0.2518630027770996, "learning_rate": 6.992274377802338e-06, "loss": 0.0087, "step": 17512 }, { "epoch": 8.142259414225942, "grad_norm": 0.7770331501960754, "learning_rate": 6.937883928666297e-06, "loss": 0.0162, "step": 17514 }, { "epoch": 8.143189214318921, "grad_norm": 0.2571823000907898, "learning_rate": 6.883671745323848e-06, "loss": 0.0053, "step": 17516 }, { "epoch": 8.144119014411901, "grad_norm": 0.7959539890289307, "learning_rate": 6.8296383628274765e-06, "loss": 0.0261, "step": 17518 }, { "epoch": 8.145048814504882, "grad_norm": 0.43683722615242004, "learning_rate": 6.775784314464793e-06, "loss": 0.0099, "step": 17520 }, { "epoch": 8.145978614597862, "grad_norm": 0.4102689027786255, "learning_rate": 6.722110131753391e-06, "loss": 0.0109, "step": 17522 }, { "epoch": 8.146908414690841, "grad_norm": 0.7303023934364319, "learning_rate": 6.668616344436028e-06, "loss": 0.0322, "step": 17524 }, { "epoch": 8.147838214783821, "grad_norm": 0.3262164294719696, "learning_rate": 6.615303480474603e-06, "loss": 0.0108, "step": 17526 }, { "epoch": 8.1487680148768, "grad_norm": 0.1235070675611496, "learning_rate": 6.562172066045686e-06, "loss": 0.0111, "step": 17528 }, { "epoch": 8.149697814969782, "grad_norm": 0.7367669939994812, "learning_rate": 6.509222625534826e-06, "loss": 0.0156, "step": 17530 }, { "epoch": 8.150627615062762, "grad_norm": 1.1495015621185303, "learning_rate": 6.45645568153156e-06, "loss": 0.0182, "step": 17532 }, { "epoch": 8.151557415155741, "grad_norm": 0.3351016640663147, "learning_rate": 6.403871754824448e-06, "loss": 0.0082, "step": 17534 }, { "epoch": 8.15248721524872, "grad_norm": 1.2436610460281372, "learning_rate": 6.351471364395433e-06, "loss": 0.025, "step": 17536 }, { "epoch": 8.153417015341702, "grad_norm": 1.2877182960510254, "learning_rate": 6.2992550274154565e-06, "loss": 0.0294, "step": 17538 }, { "epoch": 8.154346815434682, "grad_norm": 1.5594291687011719, "learning_rate": 6.247223259238504e-06, "loss": 0.0245, "step": 17540 }, { "epoch": 8.155276615527661, "grad_norm": 1.2488491535186768, "learning_rate": 6.1953765733972394e-06, "loss": 0.0194, "step": 17542 }, { "epoch": 8.156206415620641, "grad_norm": 0.2612262964248657, "learning_rate": 6.143715481597464e-06, "loss": 0.0203, "step": 17544 }, { "epoch": 8.15713621571362, "grad_norm": 0.7412928938865662, "learning_rate": 6.092240493713232e-06, "loss": 0.0142, "step": 17546 }, { "epoch": 8.158066015806602, "grad_norm": 0.2861688733100891, "learning_rate": 6.0409521177820165e-06, "loss": 0.0077, "step": 17548 }, { "epoch": 8.158995815899582, "grad_norm": 0.7508985996246338, "learning_rate": 5.989850859999207e-06, "loss": 0.0143, "step": 17550 }, { "epoch": 8.159925615992561, "grad_norm": 0.2211771458387375, "learning_rate": 5.938937224713816e-06, "loss": 0.0069, "step": 17552 }, { "epoch": 8.16085541608554, "grad_norm": 0.3297227919101715, "learning_rate": 5.888211714422755e-06, "loss": 0.0089, "step": 17554 }, { "epoch": 8.161785216178522, "grad_norm": 0.650084376335144, "learning_rate": 5.837674829766273e-06, "loss": 0.0158, "step": 17556 }, { "epoch": 8.162715016271502, "grad_norm": 0.22287099063396454, "learning_rate": 5.787327069523135e-06, "loss": 0.0083, "step": 17558 }, { "epoch": 8.163644816364481, "grad_norm": 0.9382660388946533, "learning_rate": 5.737168930605294e-06, "loss": 0.0145, "step": 17560 }, { "epoch": 8.164574616457461, "grad_norm": 0.6349606513977051, "learning_rate": 5.687200908053448e-06, "loss": 0.0129, "step": 17562 }, { "epoch": 8.165504416550442, "grad_norm": 1.8261853456497192, "learning_rate": 5.637423495031686e-06, "loss": 0.0191, "step": 17564 }, { "epoch": 8.166434216643422, "grad_norm": 0.16950082778930664, "learning_rate": 5.587837182823042e-06, "loss": 0.0109, "step": 17566 }, { "epoch": 8.167364016736402, "grad_norm": 1.1420984268188477, "learning_rate": 5.538442460824453e-06, "loss": 0.0235, "step": 17568 }, { "epoch": 8.168293816829381, "grad_norm": 0.256989061832428, "learning_rate": 5.489239816541769e-06, "loss": 0.0105, "step": 17570 }, { "epoch": 8.16922361692236, "grad_norm": 0.5044122338294983, "learning_rate": 5.4402297355853396e-06, "loss": 0.009, "step": 17572 }, { "epoch": 8.170153417015342, "grad_norm": 0.49874553084373474, "learning_rate": 5.391412701664822e-06, "loss": 0.0125, "step": 17574 }, { "epoch": 8.171083217108322, "grad_norm": 0.33904072642326355, "learning_rate": 5.3427891965845765e-06, "loss": 0.0089, "step": 17576 }, { "epoch": 8.172013017201301, "grad_norm": 0.7649303674697876, "learning_rate": 5.294359700239023e-06, "loss": 0.0203, "step": 17578 }, { "epoch": 8.172942817294281, "grad_norm": 0.39284560084342957, "learning_rate": 5.246124690607742e-06, "loss": 0.007, "step": 17580 }, { "epoch": 8.173872617387262, "grad_norm": 0.31402695178985596, "learning_rate": 5.198084643750855e-06, "loss": 0.0165, "step": 17582 }, { "epoch": 8.174802417480242, "grad_norm": 0.4196714460849762, "learning_rate": 5.150240033804123e-06, "loss": 0.0065, "step": 17584 }, { "epoch": 8.175732217573222, "grad_norm": 1.2827214002609253, "learning_rate": 5.10259133297464e-06, "loss": 0.0332, "step": 17586 }, { "epoch": 8.176662017666201, "grad_norm": 0.3696572780609131, "learning_rate": 5.055139011535792e-06, "loss": 0.0076, "step": 17588 }, { "epoch": 8.177591817759183, "grad_norm": 0.2205604463815689, "learning_rate": 5.007883537822779e-06, "loss": 0.0135, "step": 17590 }, { "epoch": 8.178521617852162, "grad_norm": 0.608763575553894, "learning_rate": 4.960825378228102e-06, "loss": 0.0142, "step": 17592 }, { "epoch": 8.179451417945142, "grad_norm": 0.8227359652519226, "learning_rate": 4.913964997196803e-06, "loss": 0.0174, "step": 17594 }, { "epoch": 8.180381218038121, "grad_norm": 1.1322906017303467, "learning_rate": 4.867302857221973e-06, "loss": 0.0174, "step": 17596 }, { "epoch": 8.181311018131101, "grad_norm": 0.161976158618927, "learning_rate": 4.8208394188400425e-06, "loss": 0.0129, "step": 17598 }, { "epoch": 8.182240818224082, "grad_norm": 0.9909967184066772, "learning_rate": 4.774575140626342e-06, "loss": 0.0168, "step": 17600 }, { "epoch": 8.183170618317062, "grad_norm": 0.41228434443473816, "learning_rate": 4.72851047919072e-06, "loss": 0.0089, "step": 17602 }, { "epoch": 8.184100418410042, "grad_norm": 0.8688303232192993, "learning_rate": 4.682645889172632e-06, "loss": 0.0253, "step": 17604 }, { "epoch": 8.185030218503021, "grad_norm": 0.8959554433822632, "learning_rate": 4.636981823237273e-06, "loss": 0.0135, "step": 17606 }, { "epoch": 8.185960018596003, "grad_norm": 0.36872413754463196, "learning_rate": 4.591518732070439e-06, "loss": 0.0082, "step": 17608 }, { "epoch": 8.186889818688982, "grad_norm": 0.3128483295440674, "learning_rate": 4.546257064374428e-06, "loss": 0.0108, "step": 17610 }, { "epoch": 8.187819618781962, "grad_norm": 0.2053775191307068, "learning_rate": 4.501197266863733e-06, "loss": 0.0123, "step": 17612 }, { "epoch": 8.188749418874941, "grad_norm": 1.3818728923797607, "learning_rate": 4.456339784260265e-06, "loss": 0.0208, "step": 17614 }, { "epoch": 8.189679218967921, "grad_norm": 0.6078409552574158, "learning_rate": 4.411685059289362e-06, "loss": 0.0143, "step": 17616 }, { "epoch": 8.190609019060902, "grad_norm": 0.4001050293445587, "learning_rate": 4.367233532675035e-06, "loss": 0.0128, "step": 17618 }, { "epoch": 8.191538819153882, "grad_norm": 0.28544992208480835, "learning_rate": 4.322985643135957e-06, "loss": 0.016, "step": 17620 }, { "epoch": 8.192468619246862, "grad_norm": 0.21574990451335907, "learning_rate": 4.278941827380984e-06, "loss": 0.0158, "step": 17622 }, { "epoch": 8.193398419339841, "grad_norm": 0.27796927094459534, "learning_rate": 4.235102520104694e-06, "loss": 0.0085, "step": 17624 }, { "epoch": 8.194328219432823, "grad_norm": 0.487204909324646, "learning_rate": 4.191468153983451e-06, "loss": 0.0215, "step": 17626 }, { "epoch": 8.195258019525802, "grad_norm": 1.129125952720642, "learning_rate": 4.148039159670735e-06, "loss": 0.0197, "step": 17628 }, { "epoch": 8.196187819618782, "grad_norm": 0.6392461061477661, "learning_rate": 4.104815965793284e-06, "loss": 0.01, "step": 17630 }, { "epoch": 8.197117619711761, "grad_norm": 0.37800487875938416, "learning_rate": 4.061798998946477e-06, "loss": 0.0149, "step": 17632 }, { "epoch": 8.198047419804743, "grad_norm": 0.29997122287750244, "learning_rate": 4.018988683690454e-06, "loss": 0.0117, "step": 17634 }, { "epoch": 8.198977219897722, "grad_norm": 0.5677655935287476, "learning_rate": 3.976385442545798e-06, "loss": 0.0112, "step": 17636 }, { "epoch": 8.199907019990702, "grad_norm": 0.2569011449813843, "learning_rate": 3.933989695989197e-06, "loss": 0.0077, "step": 17638 }, { "epoch": 8.200836820083682, "grad_norm": 0.7635115385055542, "learning_rate": 3.891801862449652e-06, "loss": 0.01, "step": 17640 }, { "epoch": 8.201766620176661, "grad_norm": 0.34548333287239075, "learning_rate": 3.849822358304001e-06, "loss": 0.0114, "step": 17642 }, { "epoch": 8.202696420269643, "grad_norm": 0.3384954333305359, "learning_rate": 3.8080515978729535e-06, "loss": 0.0084, "step": 17644 }, { "epoch": 8.203626220362622, "grad_norm": 1.353066325187683, "learning_rate": 3.7664899934171005e-06, "loss": 0.0204, "step": 17646 }, { "epoch": 8.204556020455602, "grad_norm": 0.9532992243766785, "learning_rate": 3.725137955132697e-06, "loss": 0.0143, "step": 17648 }, { "epoch": 8.205485820548581, "grad_norm": 0.20569568872451782, "learning_rate": 3.6839958911477135e-06, "loss": 0.0066, "step": 17650 }, { "epoch": 8.206415620641563, "grad_norm": 0.4012102484703064, "learning_rate": 3.6430642075176635e-06, "loss": 0.0119, "step": 17652 }, { "epoch": 8.207345420734542, "grad_norm": 0.18334725499153137, "learning_rate": 3.6023433082216984e-06, "loss": 0.0089, "step": 17654 }, { "epoch": 8.208275220827522, "grad_norm": 0.8305543065071106, "learning_rate": 3.5618335951587394e-06, "loss": 0.01, "step": 17656 }, { "epoch": 8.209205020920502, "grad_norm": 0.16081476211547852, "learning_rate": 3.5215354681432163e-06, "loss": 0.0245, "step": 17658 }, { "epoch": 8.210134821013481, "grad_norm": 0.5743051171302795, "learning_rate": 3.4814493249014138e-06, "loss": 0.0126, "step": 17660 }, { "epoch": 8.211064621106463, "grad_norm": 0.517603874206543, "learning_rate": 3.44157556106743e-06, "loss": 0.0112, "step": 17662 }, { "epoch": 8.211994421199442, "grad_norm": 1.0091816186904907, "learning_rate": 3.401914570179128e-06, "loss": 0.0194, "step": 17664 }, { "epoch": 8.212924221292422, "grad_norm": 0.40123724937438965, "learning_rate": 3.36246674367456e-06, "loss": 0.0115, "step": 17666 }, { "epoch": 8.213854021385401, "grad_norm": 0.8017920255661011, "learning_rate": 3.3232324708877647e-06, "loss": 0.0363, "step": 17668 }, { "epoch": 8.214783821478383, "grad_norm": 0.5246613621711731, "learning_rate": 3.2842121390452573e-06, "loss": 0.0104, "step": 17670 }, { "epoch": 8.215713621571362, "grad_norm": 0.540646493434906, "learning_rate": 3.2454061332618784e-06, "loss": 0.0109, "step": 17672 }, { "epoch": 8.216643421664342, "grad_norm": 0.6165062785148621, "learning_rate": 3.206814836537279e-06, "loss": 0.0317, "step": 17674 }, { "epoch": 8.217573221757322, "grad_norm": 1.3676393032073975, "learning_rate": 3.1684386297520194e-06, "loss": 0.019, "step": 17676 }, { "epoch": 8.218503021850303, "grad_norm": 0.4194423258304596, "learning_rate": 3.1302778916636836e-06, "loss": 0.0138, "step": 17678 }, { "epoch": 8.219432821943283, "grad_norm": 0.2968924641609192, "learning_rate": 3.0923329989034386e-06, "loss": 0.0058, "step": 17680 }, { "epoch": 8.220362622036262, "grad_norm": 0.27652356028556824, "learning_rate": 3.0546043259719664e-06, "loss": 0.008, "step": 17682 }, { "epoch": 8.221292422129242, "grad_norm": 0.5333066582679749, "learning_rate": 3.0170922452361253e-06, "loss": 0.0141, "step": 17684 }, { "epoch": 8.222222222222221, "grad_norm": 0.573591411113739, "learning_rate": 2.979797126924966e-06, "loss": 0.0091, "step": 17686 }, { "epoch": 8.223152022315203, "grad_norm": 0.5420673489570618, "learning_rate": 2.9427193391261628e-06, "loss": 0.0108, "step": 17688 }, { "epoch": 8.224081822408182, "grad_norm": 0.7742589116096497, "learning_rate": 2.9058592477826827e-06, "loss": 0.0136, "step": 17690 }, { "epoch": 8.225011622501162, "grad_norm": 0.23759600520133972, "learning_rate": 2.8692172166886185e-06, "loss": 0.0052, "step": 17692 }, { "epoch": 8.225941422594142, "grad_norm": 1.3703210353851318, "learning_rate": 2.8327936074861084e-06, "loss": 0.0188, "step": 17694 }, { "epoch": 8.226871222687123, "grad_norm": 0.7229518890380859, "learning_rate": 2.7965887796614295e-06, "loss": 0.0112, "step": 17696 }, { "epoch": 8.227801022780103, "grad_norm": 0.617352306842804, "learning_rate": 2.760603090541577e-06, "loss": 0.0122, "step": 17698 }, { "epoch": 8.228730822873082, "grad_norm": 0.6196868419647217, "learning_rate": 2.7248368952908104e-06, "loss": 0.0088, "step": 17700 }, { "epoch": 8.229660622966062, "grad_norm": 0.10430555790662766, "learning_rate": 2.689290546907041e-06, "loss": 0.0048, "step": 17702 }, { "epoch": 8.230590423059041, "grad_norm": 0.2838115692138672, "learning_rate": 2.653964396218416e-06, "loss": 0.0092, "step": 17704 }, { "epoch": 8.231520223152023, "grad_norm": 0.29632750153541565, "learning_rate": 2.6188587918797404e-06, "loss": 0.0152, "step": 17706 }, { "epoch": 8.232450023245002, "grad_norm": 0.4695179760456085, "learning_rate": 2.583974080369119e-06, "loss": 0.0127, "step": 17708 }, { "epoch": 8.233379823337982, "grad_norm": 0.3395276665687561, "learning_rate": 2.5493106059846467e-06, "loss": 0.008, "step": 17710 }, { "epoch": 8.234309623430962, "grad_norm": 0.8410750031471252, "learning_rate": 2.514868710840744e-06, "loss": 0.0198, "step": 17712 }, { "epoch": 8.235239423523943, "grad_norm": 0.26458272337913513, "learning_rate": 2.4806487348650506e-06, "loss": 0.0118, "step": 17714 }, { "epoch": 8.236169223616923, "grad_norm": 0.4875960648059845, "learning_rate": 2.4466510157949143e-06, "loss": 0.0062, "step": 17716 }, { "epoch": 8.237099023709902, "grad_norm": 0.627413809299469, "learning_rate": 2.4128758891741367e-06, "loss": 0.0099, "step": 17718 }, { "epoch": 8.238028823802882, "grad_norm": 0.1866789311170578, "learning_rate": 2.3793236883495406e-06, "loss": 0.0063, "step": 17720 }, { "epoch": 8.238958623895863, "grad_norm": 0.9212197661399841, "learning_rate": 2.345994744467766e-06, "loss": 0.0239, "step": 17722 }, { "epoch": 8.239888423988843, "grad_norm": 0.4814314544200897, "learning_rate": 2.312889386472108e-06, "loss": 0.0141, "step": 17724 }, { "epoch": 8.240818224081822, "grad_norm": 0.20033684372901917, "learning_rate": 2.2800079410990123e-06, "loss": 0.0073, "step": 17726 }, { "epoch": 8.241748024174802, "grad_norm": 0.7449390292167664, "learning_rate": 2.247350732875105e-06, "loss": 0.0142, "step": 17728 }, { "epoch": 8.242677824267782, "grad_norm": 0.4901965856552124, "learning_rate": 2.2149180841138862e-06, "loss": 0.0121, "step": 17730 }, { "epoch": 8.243607624360763, "grad_norm": 0.2746110260486603, "learning_rate": 2.182710314912433e-06, "loss": 0.0071, "step": 17732 }, { "epoch": 8.244537424453743, "grad_norm": 0.9765421152114868, "learning_rate": 2.1507277431484916e-06, "loss": 0.0157, "step": 17734 }, { "epoch": 8.245467224546722, "grad_norm": 0.44326427578926086, "learning_rate": 2.1189706844770667e-06, "loss": 0.0182, "step": 17736 }, { "epoch": 8.246397024639702, "grad_norm": 0.16650740802288055, "learning_rate": 2.087439452327577e-06, "loss": 0.0099, "step": 17738 }, { "epoch": 8.247326824732683, "grad_norm": 0.17663101851940155, "learning_rate": 2.056134357900518e-06, "loss": 0.0078, "step": 17740 }, { "epoch": 8.248256624825663, "grad_norm": 0.8567728996276855, "learning_rate": 2.0250557101644603e-06, "loss": 0.0092, "step": 17742 }, { "epoch": 8.249186424918642, "grad_norm": 0.6101460456848145, "learning_rate": 1.9942038158532564e-06, "loss": 0.0228, "step": 17744 }, { "epoch": 8.250116225011622, "grad_norm": 0.2254439741373062, "learning_rate": 1.963578979462537e-06, "loss": 0.0051, "step": 17746 }, { "epoch": 8.251046025104603, "grad_norm": 0.4442736804485321, "learning_rate": 1.9331815032471463e-06, "loss": 0.0096, "step": 17748 }, { "epoch": 8.251975825197583, "grad_norm": 0.22493349015712738, "learning_rate": 1.9030116872178664e-06, "loss": 0.0067, "step": 17750 }, { "epoch": 8.252905625290563, "grad_norm": 0.4160037636756897, "learning_rate": 1.8730698291385696e-06, "loss": 0.0065, "step": 17752 }, { "epoch": 8.253835425383542, "grad_norm": 0.1505626142024994, "learning_rate": 1.8433562245233722e-06, "loss": 0.0076, "step": 17754 }, { "epoch": 8.254765225476522, "grad_norm": 0.1415056437253952, "learning_rate": 1.8138711666334586e-06, "loss": 0.01, "step": 17756 }, { "epoch": 8.255695025569503, "grad_norm": 0.8619223833084106, "learning_rate": 1.7846149464745759e-06, "loss": 0.0163, "step": 17758 }, { "epoch": 8.256624825662483, "grad_norm": 0.9887003898620605, "learning_rate": 1.7555878527937088e-06, "loss": 0.019, "step": 17760 }, { "epoch": 8.257554625755462, "grad_norm": 0.7316432595252991, "learning_rate": 1.7267901720766178e-06, "loss": 0.0106, "step": 17762 }, { "epoch": 8.258484425848442, "grad_norm": 0.9459018111228943, "learning_rate": 1.6982221885447548e-06, "loss": 0.016, "step": 17764 }, { "epoch": 8.259414225941423, "grad_norm": 0.4879366457462311, "learning_rate": 1.6698841841525516e-06, "loss": 0.0106, "step": 17766 }, { "epoch": 8.260344026034403, "grad_norm": 0.39046788215637207, "learning_rate": 1.6417764385847305e-06, "loss": 0.0098, "step": 17768 }, { "epoch": 8.261273826127383, "grad_norm": 0.4719681143760681, "learning_rate": 1.6138992292533052e-06, "loss": 0.0115, "step": 17770 }, { "epoch": 8.262203626220362, "grad_norm": 0.9416128396987915, "learning_rate": 1.5862528312951963e-06, "loss": 0.0419, "step": 17772 }, { "epoch": 8.263133426313342, "grad_norm": 0.18288907408714294, "learning_rate": 1.5588375175691316e-06, "loss": 0.0126, "step": 17774 }, { "epoch": 8.264063226406323, "grad_norm": 0.3090718984603882, "learning_rate": 1.5316535586531543e-06, "loss": 0.0074, "step": 17776 }, { "epoch": 8.264993026499303, "grad_norm": 0.32009199261665344, "learning_rate": 1.5047012228420316e-06, "loss": 0.0082, "step": 17778 }, { "epoch": 8.265922826592282, "grad_norm": 0.6593506932258606, "learning_rate": 1.4779807761443753e-06, "loss": 0.0109, "step": 17780 }, { "epoch": 8.266852626685262, "grad_norm": 0.6420276165008545, "learning_rate": 1.451492482280262e-06, "loss": 0.0098, "step": 17782 }, { "epoch": 8.267782426778243, "grad_norm": 1.4766091108322144, "learning_rate": 1.4252366026784013e-06, "loss": 0.0223, "step": 17784 }, { "epoch": 8.268712226871223, "grad_norm": 0.5264080166816711, "learning_rate": 1.399213396473759e-06, "loss": 0.01, "step": 17786 }, { "epoch": 8.269642026964203, "grad_norm": 0.3510921001434326, "learning_rate": 1.3734231205048967e-06, "loss": 0.0102, "step": 17788 }, { "epoch": 8.270571827057182, "grad_norm": 0.4211476147174835, "learning_rate": 1.3478660293113707e-06, "loss": 0.0058, "step": 17790 }, { "epoch": 8.271501627150164, "grad_norm": 0.47313904762268066, "learning_rate": 1.322542375131414e-06, "loss": 0.01, "step": 17792 }, { "epoch": 8.272431427243143, "grad_norm": 0.29362860321998596, "learning_rate": 1.297452407899236e-06, "loss": 0.0116, "step": 17794 }, { "epoch": 8.273361227336123, "grad_norm": 0.5022408962249756, "learning_rate": 1.2725963752426298e-06, "loss": 0.0135, "step": 17796 }, { "epoch": 8.274291027429102, "grad_norm": 0.15825581550598145, "learning_rate": 1.2479745224807163e-06, "loss": 0.0084, "step": 17798 }, { "epoch": 8.275220827522082, "grad_norm": 1.0121177434921265, "learning_rate": 1.2235870926211591e-06, "loss": 0.0116, "step": 17800 }, { "epoch": 8.276150627615063, "grad_norm": 0.19118617475032806, "learning_rate": 1.1994343263580983e-06, "loss": 0.0056, "step": 17802 }, { "epoch": 8.277080427708043, "grad_norm": 1.0157015323638916, "learning_rate": 1.1755164620695315e-06, "loss": 0.019, "step": 17804 }, { "epoch": 8.278010227801023, "grad_norm": 0.43593353033065796, "learning_rate": 1.1518337358151775e-06, "loss": 0.0255, "step": 17806 }, { "epoch": 8.278940027894002, "grad_norm": 0.46756434440612793, "learning_rate": 1.1283863813339566e-06, "loss": 0.0154, "step": 17808 }, { "epoch": 8.279869827986984, "grad_norm": 0.5900865197181702, "learning_rate": 1.1051746300417385e-06, "loss": 0.0172, "step": 17810 }, { "epoch": 8.280799628079963, "grad_norm": 0.9309993982315063, "learning_rate": 1.0821987110292443e-06, "loss": 0.0111, "step": 17812 }, { "epoch": 8.281729428172943, "grad_norm": 0.447238028049469, "learning_rate": 1.0594588510594383e-06, "loss": 0.0086, "step": 17814 }, { "epoch": 8.282659228265922, "grad_norm": 0.7265011668205261, "learning_rate": 1.0369552745656117e-06, "loss": 0.0154, "step": 17816 }, { "epoch": 8.283589028358902, "grad_norm": 0.9653324484825134, "learning_rate": 1.014688203648952e-06, "loss": 0.0204, "step": 17818 }, { "epoch": 8.284518828451883, "grad_norm": 0.21877335011959076, "learning_rate": 9.926578580764336e-07, "loss": 0.0112, "step": 17820 }, { "epoch": 8.285448628544863, "grad_norm": 0.19914524257183075, "learning_rate": 9.708644552787266e-07, "loss": 0.0062, "step": 17822 }, { "epoch": 8.286378428637843, "grad_norm": 0.9527505040168762, "learning_rate": 9.493082103478422e-07, "loss": 0.0147, "step": 17824 }, { "epoch": 8.287308228730822, "grad_norm": 0.8049935698509216, "learning_rate": 9.279893360353136e-07, "loss": 0.014, "step": 17826 }, { "epoch": 8.288238028823804, "grad_norm": 0.4282139539718628, "learning_rate": 9.069080427497727e-07, "loss": 0.0105, "step": 17828 }, { "epoch": 8.289167828916783, "grad_norm": 0.8825799822807312, "learning_rate": 8.860645385550524e-07, "loss": 0.0134, "step": 17830 }, { "epoch": 8.290097629009763, "grad_norm": 0.3402288854122162, "learning_rate": 8.654590291681712e-07, "loss": 0.0163, "step": 17832 }, { "epoch": 8.291027429102742, "grad_norm": 1.4455914497375488, "learning_rate": 8.450917179571403e-07, "loss": 0.0242, "step": 17834 }, { "epoch": 8.291957229195724, "grad_norm": 0.22104200720787048, "learning_rate": 8.249628059391433e-07, "loss": 0.0084, "step": 17836 }, { "epoch": 8.292887029288703, "grad_norm": 0.17428936064243317, "learning_rate": 8.050724917783734e-07, "loss": 0.0053, "step": 17838 }, { "epoch": 8.293816829381683, "grad_norm": 0.36988043785095215, "learning_rate": 7.854209717842217e-07, "loss": 0.012, "step": 17840 }, { "epoch": 8.294746629474663, "grad_norm": 0.8014145493507385, "learning_rate": 7.660084399092784e-07, "loss": 0.0098, "step": 17842 }, { "epoch": 8.295676429567642, "grad_norm": 0.5332208871841431, "learning_rate": 7.468350877473564e-07, "loss": 0.0112, "step": 17844 }, { "epoch": 8.296606229660624, "grad_norm": 1.1282687187194824, "learning_rate": 7.279011045317377e-07, "loss": 0.0145, "step": 17846 }, { "epoch": 8.297536029753603, "grad_norm": 1.0272691249847412, "learning_rate": 7.092066771331547e-07, "loss": 0.0142, "step": 17848 }, { "epoch": 8.298465829846583, "grad_norm": 0.662045419216156, "learning_rate": 6.907519900581013e-07, "loss": 0.0125, "step": 17850 }, { "epoch": 8.299395629939562, "grad_norm": 0.26431652903556824, "learning_rate": 6.725372254468411e-07, "loss": 0.0171, "step": 17852 }, { "epoch": 8.300325430032544, "grad_norm": 0.28785422444343567, "learning_rate": 6.545625630717741e-07, "loss": 0.0112, "step": 17854 }, { "epoch": 8.301255230125523, "grad_norm": 0.7324286103248596, "learning_rate": 6.368281803355787e-07, "loss": 0.0141, "step": 17856 }, { "epoch": 8.302185030218503, "grad_norm": 0.16071251034736633, "learning_rate": 6.193342522694141e-07, "loss": 0.0137, "step": 17858 }, { "epoch": 8.303114830311483, "grad_norm": 0.5405944585800171, "learning_rate": 6.020809515313265e-07, "loss": 0.0129, "step": 17860 }, { "epoch": 8.304044630404462, "grad_norm": 0.4435068368911743, "learning_rate": 5.850684484044062e-07, "loss": 0.0094, "step": 17862 }, { "epoch": 8.304974430497444, "grad_norm": 0.1287386119365692, "learning_rate": 5.682969107951752e-07, "loss": 0.0069, "step": 17864 }, { "epoch": 8.305904230590423, "grad_norm": 0.4044678509235382, "learning_rate": 5.517665042319581e-07, "loss": 0.0169, "step": 17866 }, { "epoch": 8.306834030683403, "grad_norm": 0.9673372507095337, "learning_rate": 5.354773918631938e-07, "loss": 0.0139, "step": 17868 }, { "epoch": 8.307763830776382, "grad_norm": 0.8819894790649414, "learning_rate": 5.1942973445586e-07, "loss": 0.0168, "step": 17870 }, { "epoch": 8.308693630869364, "grad_norm": 0.17678675055503845, "learning_rate": 5.036236903938433e-07, "loss": 0.0074, "step": 17872 }, { "epoch": 8.309623430962343, "grad_norm": 0.381384938955307, "learning_rate": 4.88059415676396e-07, "loss": 0.0179, "step": 17874 }, { "epoch": 8.310553231055323, "grad_norm": 0.3469024896621704, "learning_rate": 4.727370639166654e-07, "loss": 0.0089, "step": 17876 }, { "epoch": 8.311483031148303, "grad_norm": 0.2771832048892975, "learning_rate": 4.576567863400363e-07, "loss": 0.0065, "step": 17878 }, { "epoch": 8.312412831241284, "grad_norm": 0.13767631351947784, "learning_rate": 4.428187317827828e-07, "loss": 0.0075, "step": 17880 }, { "epoch": 8.313342631334264, "grad_norm": 0.5039209127426147, "learning_rate": 4.2822304669052984e-07, "loss": 0.0133, "step": 17882 }, { "epoch": 8.314272431427243, "grad_norm": 0.5100387334823608, "learning_rate": 4.1386987511676043e-07, "loss": 0.0139, "step": 17884 }, { "epoch": 8.315202231520223, "grad_norm": 0.3764939308166504, "learning_rate": 3.9975935872151667e-07, "loss": 0.0096, "step": 17886 }, { "epoch": 8.316132031613202, "grad_norm": 0.1416698396205902, "learning_rate": 3.858916367698702e-07, "loss": 0.0226, "step": 17888 }, { "epoch": 8.317061831706184, "grad_norm": 0.5475652813911438, "learning_rate": 3.722668461306651e-07, "loss": 0.0125, "step": 17890 }, { "epoch": 8.317991631799163, "grad_norm": 0.3989063799381256, "learning_rate": 3.588851212750411e-07, "loss": 0.0109, "step": 17892 }, { "epoch": 8.318921431892143, "grad_norm": 1.0452594757080078, "learning_rate": 3.4574659427527916e-07, "loss": 0.0156, "step": 17894 }, { "epoch": 8.319851231985123, "grad_norm": 1.8829686641693115, "learning_rate": 3.328513948033052e-07, "loss": 0.014, "step": 17896 }, { "epoch": 8.320781032078104, "grad_norm": 0.3849094808101654, "learning_rate": 3.201996501295218e-07, "loss": 0.0077, "step": 17898 }, { "epoch": 8.321710832171084, "grad_norm": 0.565604567527771, "learning_rate": 3.077914851215646e-07, "loss": 0.0128, "step": 17900 }, { "epoch": 8.322640632264063, "grad_norm": 1.756973385810852, "learning_rate": 2.9562702224298956e-07, "loss": 0.0414, "step": 17902 }, { "epoch": 8.323570432357043, "grad_norm": 0.7556629776954651, "learning_rate": 2.8370638155216e-07, "loss": 0.0121, "step": 17904 }, { "epoch": 8.324500232450024, "grad_norm": 0.5789260864257812, "learning_rate": 2.7202968070095855e-07, "loss": 0.0173, "step": 17906 }, { "epoch": 8.325430032543004, "grad_norm": 1.0336389541625977, "learning_rate": 2.6059703493372426e-07, "loss": 0.0141, "step": 17908 }, { "epoch": 8.326359832635983, "grad_norm": 0.40943923592567444, "learning_rate": 2.494085570860647e-07, "loss": 0.0059, "step": 17910 }, { "epoch": 8.327289632728963, "grad_norm": 0.5404521822929382, "learning_rate": 2.384643575837179e-07, "loss": 0.0108, "step": 17912 }, { "epoch": 8.328219432821943, "grad_norm": 0.8363484740257263, "learning_rate": 2.2776454444153913e-07, "loss": 0.0147, "step": 17914 }, { "epoch": 8.329149232914924, "grad_norm": 0.23821304738521576, "learning_rate": 2.1730922326234664e-07, "loss": 0.0083, "step": 17916 }, { "epoch": 8.330079033007904, "grad_norm": 0.37457475066185, "learning_rate": 2.0709849723593602e-07, "loss": 0.0075, "step": 17918 }, { "epoch": 8.331008833100883, "grad_norm": 0.818986713886261, "learning_rate": 1.9713246713805612e-07, "loss": 0.0172, "step": 17920 }, { "epoch": 8.331938633193863, "grad_norm": 0.8365544080734253, "learning_rate": 1.874112313294043e-07, "loss": 0.0127, "step": 17922 }, { "epoch": 8.332868433286844, "grad_norm": 0.8209560513496399, "learning_rate": 1.7793488575466055e-07, "loss": 0.0181, "step": 17924 }, { "epoch": 8.333798233379824, "grad_norm": 0.7893480062484741, "learning_rate": 1.6870352394152155e-07, "loss": 0.02, "step": 17926 }, { "epoch": 8.334728033472803, "grad_norm": 1.424383521080017, "learning_rate": 1.597172369997931e-07, "loss": 0.0196, "step": 17928 }, { "epoch": 8.335657833565783, "grad_norm": 0.291000097990036, "learning_rate": 1.5097611362051588e-07, "loss": 0.0075, "step": 17930 }, { "epoch": 8.336587633658763, "grad_norm": 0.7125275135040283, "learning_rate": 1.4248024007503268e-07, "loss": 0.0074, "step": 17932 }, { "epoch": 8.337517433751744, "grad_norm": 0.22345185279846191, "learning_rate": 1.3422970021419198e-07, "loss": 0.0103, "step": 17934 }, { "epoch": 8.338447233844724, "grad_norm": 0.495532751083374, "learning_rate": 1.2622457546749307e-07, "loss": 0.0107, "step": 17936 }, { "epoch": 8.339377033937703, "grad_norm": 0.6901493668556213, "learning_rate": 1.1846494484229216e-07, "loss": 0.0138, "step": 17938 }, { "epoch": 8.340306834030683, "grad_norm": 1.2404756546020508, "learning_rate": 1.1095088492300582e-07, "loss": 0.0108, "step": 17940 }, { "epoch": 8.341236634123664, "grad_norm": 0.5597582459449768, "learning_rate": 1.0368246987035884e-07, "loss": 0.0094, "step": 17942 }, { "epoch": 8.342166434216644, "grad_norm": 0.9578196406364441, "learning_rate": 9.665977142069586e-08, "loss": 0.0169, "step": 17944 }, { "epoch": 8.343096234309623, "grad_norm": 0.19383269548416138, "learning_rate": 8.988285888519314e-08, "loss": 0.0089, "step": 17946 }, { "epoch": 8.344026034402603, "grad_norm": 1.047935128211975, "learning_rate": 8.335179914925343e-08, "loss": 0.0087, "step": 17948 }, { "epoch": 8.344955834495583, "grad_norm": 0.3953966498374939, "learning_rate": 7.706665667180382e-08, "loss": 0.0102, "step": 17950 }, { "epoch": 8.345885634588564, "grad_norm": 0.3471476435661316, "learning_rate": 7.102749348465177e-08, "loss": 0.0134, "step": 17952 }, { "epoch": 8.346815434681544, "grad_norm": 0.23136281967163086, "learning_rate": 6.523436919191338e-08, "loss": 0.0144, "step": 17954 }, { "epoch": 8.347745234774523, "grad_norm": 0.7360634207725525, "learning_rate": 5.968734096936944e-08, "loss": 0.02, "step": 17956 }, { "epoch": 8.348675034867503, "grad_norm": 0.7555460929870605, "learning_rate": 5.4386463563968565e-08, "loss": 0.0131, "step": 17958 }, { "epoch": 8.349604834960484, "grad_norm": 0.686099648475647, "learning_rate": 4.933178929321666e-08, "loss": 0.0154, "step": 17960 }, { "epoch": 8.350534635053464, "grad_norm": 1.1368541717529297, "learning_rate": 4.452336804470498e-08, "loss": 0.0163, "step": 17962 }, { "epoch": 8.351464435146443, "grad_norm": 0.5454837679862976, "learning_rate": 3.996124727562451e-08, "loss": 0.0088, "step": 17964 }, { "epoch": 8.352394235239423, "grad_norm": 0.2890903055667877, "learning_rate": 3.564547201225515e-08, "loss": 0.008, "step": 17966 }, { "epoch": 8.353324035332404, "grad_norm": 0.12706178426742554, "learning_rate": 3.157608484956336e-08, "loss": 0.0055, "step": 17968 }, { "epoch": 8.354253835425384, "grad_norm": 0.7325522303581238, "learning_rate": 2.7753125950755226e-08, "loss": 0.0186, "step": 17970 }, { "epoch": 8.355183635518364, "grad_norm": 0.987224280834198, "learning_rate": 2.4176633046885144e-08, "loss": 0.0185, "step": 17972 }, { "epoch": 8.356113435611343, "grad_norm": 0.6425879597663879, "learning_rate": 2.0846641436500532e-08, "loss": 0.0076, "step": 17974 }, { "epoch": 8.357043235704323, "grad_norm": 0.3774736821651459, "learning_rate": 1.776318398526713e-08, "loss": 0.0066, "step": 17976 }, { "epoch": 8.357973035797304, "grad_norm": 1.0291838645935059, "learning_rate": 1.4926291125677578e-08, "loss": 0.0147, "step": 17978 }, { "epoch": 8.358902835890284, "grad_norm": 0.11746679991483688, "learning_rate": 1.2335990856710015e-08, "loss": 0.0082, "step": 17980 }, { "epoch": 8.359832635983263, "grad_norm": 0.44614219665527344, "learning_rate": 9.992308743586598e-09, "loss": 0.0095, "step": 17982 }, { "epoch": 8.360762436076243, "grad_norm": 0.3654385507106781, "learning_rate": 7.895267917501512e-09, "loss": 0.0054, "step": 17984 }, { "epoch": 8.361692236169224, "grad_norm": 0.23219959437847137, "learning_rate": 6.044889075398914e-09, "loss": 0.0085, "step": 17986 }, { "epoch": 8.362622036262204, "grad_norm": 0.9709481000900269, "learning_rate": 4.441190479775875e-09, "loss": 0.0234, "step": 17988 }, { "epoch": 8.363551836355184, "grad_norm": 0.9518468976020813, "learning_rate": 3.084187958485311e-09, "loss": 0.0184, "step": 17990 }, { "epoch": 8.364481636448163, "grad_norm": 0.14783638715744019, "learning_rate": 1.973894904597209e-09, "loss": 0.0047, "step": 17992 }, { "epoch": 8.365411436541144, "grad_norm": 0.19868218898773193, "learning_rate": 1.1103222762542954e-09, "loss": 0.0088, "step": 17994 }, { "epoch": 8.366341236634124, "grad_norm": 0.5594804883003235, "learning_rate": 4.934785965721172e-10, "loss": 0.0127, "step": 17996 }, { "epoch": 8.367271036727104, "grad_norm": 0.7918022871017456, "learning_rate": 1.233699535446721e-10, "loss": 0.0094, "step": 17998 }, { "epoch": 8.368200836820083, "grad_norm": 0.3376753330230713, "learning_rate": 0.0, "loss": 0.0255, "step": 18000 }, { "epoch": 8.368200836820083, "eval_cer": 0.11403347675516844, "eval_loss": 0.1754252016544342, "eval_runtime": 399.478, "eval_samples_per_second": 31.776, "eval_steps_per_second": 0.994, "step": 18000 }, { "epoch": 8.369130636913063, "grad_norm": 0.0998370498418808, "learning_rate": 1.2336995354467197e-10, "loss": 0.0066, "step": 18002 }, { "epoch": 8.370060437006044, "grad_norm": 1.2554194927215576, "learning_rate": 4.934785965721167e-10, "loss": 0.0216, "step": 18004 }, { "epoch": 8.370990237099024, "grad_norm": 0.6032677292823792, "learning_rate": 1.1103222762542941e-09, "loss": 0.0102, "step": 18006 }, { "epoch": 8.371920037192004, "grad_norm": 0.9819540977478027, "learning_rate": 1.973894904597207e-09, "loss": 0.0119, "step": 18008 }, { "epoch": 8.372849837284983, "grad_norm": 0.3119265139102936, "learning_rate": 3.0841879584853073e-09, "loss": 0.0194, "step": 18010 }, { "epoch": 8.373779637377964, "grad_norm": 1.6597946882247925, "learning_rate": 4.441190479773094e-09, "loss": 0.0189, "step": 18012 }, { "epoch": 8.374709437470944, "grad_norm": 0.33819693326950073, "learning_rate": 6.044889075396132e-09, "loss": 0.0094, "step": 18014 }, { "epoch": 8.375639237563924, "grad_norm": 0.37901604175567627, "learning_rate": 7.895267917501504e-09, "loss": 0.0145, "step": 18016 }, { "epoch": 8.376569037656903, "grad_norm": 0.6409066915512085, "learning_rate": 9.992308743586587e-09, "loss": 0.0137, "step": 18018 }, { "epoch": 8.377498837749883, "grad_norm": 0.09962347149848938, "learning_rate": 1.2335990856712774e-08, "loss": 0.0165, "step": 18020 }, { "epoch": 8.378428637842864, "grad_norm": 0.3324946165084839, "learning_rate": 1.492629112567756e-08, "loss": 0.0064, "step": 18022 }, { "epoch": 8.379358437935844, "grad_norm": 0.5941842198371887, "learning_rate": 1.7763183985269883e-08, "loss": 0.0227, "step": 18024 }, { "epoch": 8.380288238028823, "grad_norm": 0.3917170763015747, "learning_rate": 2.0846641436494953e-08, "loss": 0.0242, "step": 18026 }, { "epoch": 8.381218038121803, "grad_norm": 0.30187562108039856, "learning_rate": 2.417663304687679e-08, "loss": 0.0066, "step": 18028 }, { "epoch": 8.382147838214784, "grad_norm": 0.7304621934890747, "learning_rate": 2.7753125950752423e-08, "loss": 0.0153, "step": 18030 }, { "epoch": 8.383077638307764, "grad_norm": 0.7499760985374451, "learning_rate": 3.1576084849560544e-08, "loss": 0.0075, "step": 18032 }, { "epoch": 8.384007438400744, "grad_norm": 0.8386861681938171, "learning_rate": 3.564547201225789e-08, "loss": 0.0304, "step": 18034 }, { "epoch": 8.384937238493723, "grad_norm": 0.814236044883728, "learning_rate": 3.996124727562169e-08, "loss": 0.0163, "step": 18036 }, { "epoch": 8.385867038586705, "grad_norm": 0.3039742708206177, "learning_rate": 4.4523368044702156e-08, "loss": 0.0085, "step": 18038 }, { "epoch": 8.386796838679684, "grad_norm": 0.26953965425491333, "learning_rate": 4.933178929320827e-08, "loss": 0.0086, "step": 18040 }, { "epoch": 8.387726638772664, "grad_norm": 0.27879562973976135, "learning_rate": 5.4386463563957395e-08, "loss": 0.0074, "step": 18042 }, { "epoch": 8.388656438865643, "grad_norm": 0.16894353926181793, "learning_rate": 5.968734096937214e-08, "loss": 0.0077, "step": 18044 }, { "epoch": 8.389586238958623, "grad_norm": 0.224119633436203, "learning_rate": 6.523436919190774e-08, "loss": 0.008, "step": 18046 }, { "epoch": 8.390516039051604, "grad_norm": 1.354501724243164, "learning_rate": 7.10274934846461e-08, "loss": 0.0146, "step": 18048 }, { "epoch": 8.391445839144584, "grad_norm": 1.3357254266738892, "learning_rate": 7.706665667179814e-08, "loss": 0.0149, "step": 18050 }, { "epoch": 8.392375639237564, "grad_norm": 0.379743367433548, "learning_rate": 8.335179914924775e-08, "loss": 0.0089, "step": 18052 }, { "epoch": 8.393305439330543, "grad_norm": 0.27547797560691833, "learning_rate": 8.988285888518747e-08, "loss": 0.0079, "step": 18054 }, { "epoch": 8.394235239423525, "grad_norm": 0.3849569857120514, "learning_rate": 9.665977142068186e-08, "loss": 0.0092, "step": 18056 }, { "epoch": 8.395165039516504, "grad_norm": 0.5078549981117249, "learning_rate": 1.0368246987035592e-07, "loss": 0.0096, "step": 18058 }, { "epoch": 8.396094839609484, "grad_norm": 0.6471772193908691, "learning_rate": 1.1095088492300012e-07, "loss": 0.0121, "step": 18060 }, { "epoch": 8.397024639702463, "grad_norm": 0.5423402786254883, "learning_rate": 1.1846494484228643e-07, "loss": 0.0104, "step": 18062 }, { "epoch": 8.397954439795445, "grad_norm": 1.0371744632720947, "learning_rate": 1.2622457546749567e-07, "loss": 0.0128, "step": 18064 }, { "epoch": 8.398884239888424, "grad_norm": 0.7831484079360962, "learning_rate": 1.3422970021418626e-07, "loss": 0.0103, "step": 18066 }, { "epoch": 8.399814039981404, "grad_norm": 0.9143016934394836, "learning_rate": 1.4248024007502696e-07, "loss": 0.0129, "step": 18068 }, { "epoch": 8.400743840074384, "grad_norm": 0.46969595551490784, "learning_rate": 1.5097611362050182e-07, "loss": 0.0079, "step": 18070 }, { "epoch": 8.401673640167363, "grad_norm": 0.537137508392334, "learning_rate": 1.597172369997874e-07, "loss": 0.0145, "step": 18072 }, { "epoch": 8.402603440260345, "grad_norm": 0.710637092590332, "learning_rate": 1.6870352394151584e-07, "loss": 0.0137, "step": 18074 }, { "epoch": 8.403533240353324, "grad_norm": 0.20448346436023712, "learning_rate": 1.7793488575465486e-07, "loss": 0.0049, "step": 18076 }, { "epoch": 8.404463040446304, "grad_norm": 0.4487100839614868, "learning_rate": 1.8741123132940696e-07, "loss": 0.0118, "step": 18078 }, { "epoch": 8.405392840539283, "grad_norm": 0.35569679737091064, "learning_rate": 1.9713246713804765e-07, "loss": 0.0076, "step": 18080 }, { "epoch": 8.406322640632265, "grad_norm": 0.32803648710250854, "learning_rate": 2.0709849723591646e-07, "loss": 0.0086, "step": 18082 }, { "epoch": 8.407252440725244, "grad_norm": 0.4444686770439148, "learning_rate": 2.1730922326234095e-07, "loss": 0.0291, "step": 18084 }, { "epoch": 8.408182240818224, "grad_norm": 0.6144905090332031, "learning_rate": 2.2776454444153058e-07, "loss": 0.0088, "step": 18086 }, { "epoch": 8.409112040911204, "grad_norm": 0.2745049297809601, "learning_rate": 2.384643575837232e-07, "loss": 0.0059, "step": 18088 }, { "epoch": 8.410041841004183, "grad_norm": 0.2762964069843292, "learning_rate": 2.4940855708605613e-07, "loss": 0.0104, "step": 18090 }, { "epoch": 8.410971641097165, "grad_norm": 1.2413891553878784, "learning_rate": 2.6059703493371563e-07, "loss": 0.019, "step": 18092 }, { "epoch": 8.411901441190144, "grad_norm": 0.5745894312858582, "learning_rate": 2.7202968070094987e-07, "loss": 0.0114, "step": 18094 }, { "epoch": 8.412831241283124, "grad_norm": 0.5092560648918152, "learning_rate": 2.8370638155213747e-07, "loss": 0.0146, "step": 18096 }, { "epoch": 8.413761041376103, "grad_norm": 0.6780372262001038, "learning_rate": 2.9562702224299475e-07, "loss": 0.0073, "step": 18098 }, { "epoch": 8.414690841469085, "grad_norm": 0.4267478287220001, "learning_rate": 3.077914851215558e-07, "loss": 0.0062, "step": 18100 }, { "epoch": 8.415620641562064, "grad_norm": 0.5156311988830566, "learning_rate": 3.2019965012951014e-07, "loss": 0.0125, "step": 18102 }, { "epoch": 8.416550441655044, "grad_norm": 0.7927403450012207, "learning_rate": 3.328513948032963e-07, "loss": 0.0102, "step": 18104 }, { "epoch": 8.417480241748024, "grad_norm": 0.5195902585983276, "learning_rate": 3.457465942752675e-07, "loss": 0.0181, "step": 18106 }, { "epoch": 8.418410041841003, "grad_norm": 0.6050564050674438, "learning_rate": 3.588851212750461e-07, "loss": 0.0184, "step": 18108 }, { "epoch": 8.419339841933985, "grad_norm": 0.40464407205581665, "learning_rate": 3.7226684613063946e-07, "loss": 0.0189, "step": 18110 }, { "epoch": 8.420269642026964, "grad_norm": 0.7387388944625854, "learning_rate": 3.8589163676987505e-07, "loss": 0.0101, "step": 18112 }, { "epoch": 8.421199442119944, "grad_norm": 0.404746413230896, "learning_rate": 3.9975935872150756e-07, "loss": 0.0113, "step": 18114 }, { "epoch": 8.422129242212923, "grad_norm": 0.3919163644313812, "learning_rate": 4.138698751167485e-07, "loss": 0.009, "step": 18116 }, { "epoch": 8.423059042305905, "grad_norm": 0.5011469125747681, "learning_rate": 4.282230466905179e-07, "loss": 0.0117, "step": 18118 }, { "epoch": 8.423988842398884, "grad_norm": 0.8530802130699158, "learning_rate": 4.4281873178277363e-07, "loss": 0.01, "step": 18120 }, { "epoch": 8.424918642491864, "grad_norm": 0.792198121547699, "learning_rate": 4.576567863400244e-07, "loss": 0.0097, "step": 18122 }, { "epoch": 8.425848442584844, "grad_norm": 0.6009369492530823, "learning_rate": 4.727370639166368e-07, "loss": 0.0223, "step": 18124 }, { "epoch": 8.426778242677825, "grad_norm": 0.602408230304718, "learning_rate": 4.88059415676384e-07, "loss": 0.0105, "step": 18126 }, { "epoch": 8.427708042770805, "grad_norm": 0.15756192803382874, "learning_rate": 5.036236903938311e-07, "loss": 0.0186, "step": 18128 }, { "epoch": 8.428637842863784, "grad_norm": 0.49584102630615234, "learning_rate": 5.194297344558479e-07, "loss": 0.0192, "step": 18130 }, { "epoch": 8.429567642956764, "grad_norm": 0.7364180088043213, "learning_rate": 5.354773918631982e-07, "loss": 0.0085, "step": 18132 }, { "epoch": 8.430497443049743, "grad_norm": 1.111828327178955, "learning_rate": 5.517665042319457e-07, "loss": 0.0161, "step": 18134 }, { "epoch": 8.431427243142725, "grad_norm": 0.5349346995353699, "learning_rate": 5.682969107951434e-07, "loss": 0.0097, "step": 18136 }, { "epoch": 8.432357043235704, "grad_norm": 0.16760462522506714, "learning_rate": 5.850684484043743e-07, "loss": 0.005, "step": 18138 }, { "epoch": 8.433286843328684, "grad_norm": 0.08549199998378754, "learning_rate": 6.020809515313111e-07, "loss": 0.0039, "step": 18140 }, { "epoch": 8.434216643421664, "grad_norm": 0.4202257990837097, "learning_rate": 6.193342522694208e-07, "loss": 0.0115, "step": 18142 }, { "epoch": 8.435146443514645, "grad_norm": 0.40482088923454285, "learning_rate": 6.368281803355659e-07, "loss": 0.0094, "step": 18144 }, { "epoch": 8.436076243607625, "grad_norm": 0.31006067991256714, "learning_rate": 6.54562563071761e-07, "loss": 0.0059, "step": 18146 }, { "epoch": 8.437006043700604, "grad_norm": 0.41633620858192444, "learning_rate": 6.72537225446828e-07, "loss": 0.0065, "step": 18148 }, { "epoch": 8.437935843793584, "grad_norm": 0.8552190065383911, "learning_rate": 6.907519900580659e-07, "loss": 0.0097, "step": 18150 }, { "epoch": 8.438865643886565, "grad_norm": 0.13154394924640656, "learning_rate": 7.092066771331416e-07, "loss": 0.0097, "step": 18152 }, { "epoch": 8.439795443979545, "grad_norm": 0.38132670521736145, "learning_rate": 7.279011045317245e-07, "loss": 0.011, "step": 18154 }, { "epoch": 8.440725244072524, "grad_norm": 0.45592886209487915, "learning_rate": 7.468350877473626e-07, "loss": 0.0077, "step": 18156 }, { "epoch": 8.441655044165504, "grad_norm": 0.28422224521636963, "learning_rate": 7.660084399092652e-07, "loss": 0.0067, "step": 18158 }, { "epoch": 8.442584844258484, "grad_norm": 0.7527597546577454, "learning_rate": 7.854209717842055e-07, "loss": 0.0191, "step": 18160 }, { "epoch": 8.443514644351465, "grad_norm": 0.3854747712612152, "learning_rate": 8.050724917783571e-07, "loss": 0.0121, "step": 18162 }, { "epoch": 8.444444444444445, "grad_norm": 0.16312682628631592, "learning_rate": 8.249628059391045e-07, "loss": 0.0062, "step": 18164 }, { "epoch": 8.445374244537424, "grad_norm": 0.44979262351989746, "learning_rate": 8.450917179571209e-07, "loss": 0.0094, "step": 18166 }, { "epoch": 8.446304044630404, "grad_norm": 0.5982494950294495, "learning_rate": 8.654590291681545e-07, "loss": 0.0176, "step": 18168 }, { "epoch": 8.447233844723385, "grad_norm": 0.7038086652755737, "learning_rate": 8.860645385550357e-07, "loss": 0.0166, "step": 18170 }, { "epoch": 8.448163644816365, "grad_norm": 0.4201935827732086, "learning_rate": 9.069080427497558e-07, "loss": 0.0067, "step": 18172 }, { "epoch": 8.449093444909344, "grad_norm": 0.6639384627342224, "learning_rate": 9.279893360352939e-07, "loss": 0.0156, "step": 18174 }, { "epoch": 8.450023245002324, "grad_norm": 1.177718997001648, "learning_rate": 9.493082103478476e-07, "loss": 0.0186, "step": 18176 }, { "epoch": 8.450953045095304, "grad_norm": 0.7489449977874756, "learning_rate": 9.708644552786847e-07, "loss": 0.01, "step": 18178 }, { "epoch": 8.451882845188285, "grad_norm": 0.301455557346344, "learning_rate": 9.926578580763916e-07, "loss": 0.0127, "step": 18180 }, { "epoch": 8.452812645281265, "grad_norm": 0.6811731457710266, "learning_rate": 1.0146882036489326e-06, "loss": 0.0121, "step": 18182 }, { "epoch": 8.453742445374244, "grad_norm": 0.2549739480018616, "learning_rate": 1.0369552745655918e-06, "loss": 0.0095, "step": 18184 }, { "epoch": 8.454672245467224, "grad_norm": 0.37914666533470154, "learning_rate": 1.059458851059446e-06, "loss": 0.0076, "step": 18186 }, { "epoch": 8.455602045560205, "grad_norm": 0.9363092184066772, "learning_rate": 1.082198711029224e-06, "loss": 0.0087, "step": 18188 }, { "epoch": 8.456531845653185, "grad_norm": 0.30847758054733276, "learning_rate": 1.105174630041718e-06, "loss": 0.0162, "step": 18190 }, { "epoch": 8.457461645746164, "grad_norm": 1.209774374961853, "learning_rate": 1.1283863813339081e-06, "loss": 0.0172, "step": 18192 }, { "epoch": 8.458391445839144, "grad_norm": 0.35819879174232483, "learning_rate": 1.1518337358151566e-06, "loss": 0.0238, "step": 18194 }, { "epoch": 8.459321245932125, "grad_norm": 0.38186636567115784, "learning_rate": 1.1755164620695353e-06, "loss": 0.0103, "step": 18196 }, { "epoch": 8.460251046025105, "grad_norm": 0.3559301495552063, "learning_rate": 1.1994343263580742e-06, "loss": 0.0083, "step": 18198 }, { "epoch": 8.461180846118085, "grad_norm": 0.45756638050079346, "learning_rate": 1.2235870926211627e-06, "loss": 0.0094, "step": 18200 }, { "epoch": 8.462110646211064, "grad_norm": 0.2081354856491089, "learning_rate": 1.2479745224806947e-06, "loss": 0.0078, "step": 18202 }, { "epoch": 8.463040446304044, "grad_norm": 0.6516788601875305, "learning_rate": 1.2725963752426084e-06, "loss": 0.0111, "step": 18204 }, { "epoch": 8.463970246397025, "grad_norm": 0.4891257882118225, "learning_rate": 1.297452407899184e-06, "loss": 0.0124, "step": 18206 }, { "epoch": 8.464900046490005, "grad_norm": 0.4944410026073456, "learning_rate": 1.3225423751313898e-06, "loss": 0.0106, "step": 18208 }, { "epoch": 8.465829846582984, "grad_norm": 0.20907746255397797, "learning_rate": 1.347866029311377e-06, "loss": 0.0062, "step": 18210 }, { "epoch": 8.466759646675964, "grad_norm": 0.343549519777298, "learning_rate": 1.3734231205048726e-06, "loss": 0.0124, "step": 18212 }, { "epoch": 8.467689446768945, "grad_norm": 0.6120157241821289, "learning_rate": 1.3992133964737345e-06, "loss": 0.0085, "step": 18214 }, { "epoch": 8.468619246861925, "grad_norm": 0.5826553702354431, "learning_rate": 1.4252366026783795e-06, "loss": 0.0147, "step": 18216 }, { "epoch": 8.469549046954905, "grad_norm": 0.3662453889846802, "learning_rate": 1.451492482280207e-06, "loss": 0.0134, "step": 18218 }, { "epoch": 8.470478847047884, "grad_norm": 0.24559229612350464, "learning_rate": 1.4779807761443507e-06, "loss": 0.0176, "step": 18220 }, { "epoch": 8.471408647140866, "grad_norm": 0.19339723885059357, "learning_rate": 1.5047012228420098e-06, "loss": 0.0116, "step": 18222 }, { "epoch": 8.472338447233845, "grad_norm": 0.6463824510574341, "learning_rate": 1.5316535586531327e-06, "loss": 0.0079, "step": 18224 }, { "epoch": 8.473268247326825, "grad_norm": 0.6508440375328064, "learning_rate": 1.558837517569107e-06, "loss": 0.0127, "step": 18226 }, { "epoch": 8.474198047419804, "grad_norm": 0.21000754833221436, "learning_rate": 1.5862528312951718e-06, "loss": 0.0055, "step": 18228 }, { "epoch": 8.475127847512784, "grad_norm": 0.5374513864517212, "learning_rate": 1.613899229253311e-06, "loss": 0.0101, "step": 18230 }, { "epoch": 8.476057647605765, "grad_norm": 0.23908939957618713, "learning_rate": 1.6417764385846753e-06, "loss": 0.0062, "step": 18232 }, { "epoch": 8.476987447698745, "grad_norm": 0.4512489140033722, "learning_rate": 1.6698841841524931e-06, "loss": 0.018, "step": 18234 }, { "epoch": 8.477917247791725, "grad_norm": 0.9203043580055237, "learning_rate": 1.6982221885447298e-06, "loss": 0.0136, "step": 18236 }, { "epoch": 8.478847047884704, "grad_norm": 0.6563382744789124, "learning_rate": 1.7267901720765926e-06, "loss": 0.0108, "step": 18238 }, { "epoch": 8.479776847977686, "grad_norm": 0.7132609486579895, "learning_rate": 1.755587852793714e-06, "loss": 0.0146, "step": 18240 }, { "epoch": 8.480706648070665, "grad_norm": 0.22748571634292603, "learning_rate": 1.78461494647455e-06, "loss": 0.0064, "step": 18242 }, { "epoch": 8.481636448163645, "grad_norm": 0.18994271755218506, "learning_rate": 1.8138711666334632e-06, "loss": 0.0071, "step": 18244 }, { "epoch": 8.482566248256624, "grad_norm": 0.7985705733299255, "learning_rate": 1.8433562245233129e-06, "loss": 0.0147, "step": 18246 }, { "epoch": 8.483496048349604, "grad_norm": 0.7791063189506531, "learning_rate": 1.8730698291385073e-06, "loss": 0.014, "step": 18248 }, { "epoch": 8.484425848442585, "grad_norm": 0.38557112216949463, "learning_rate": 1.9030116872178397e-06, "loss": 0.011, "step": 18250 }, { "epoch": 8.485355648535565, "grad_norm": 0.2463228702545166, "learning_rate": 1.9331815032471167e-06, "loss": 0.0063, "step": 18252 }, { "epoch": 8.486285448628545, "grad_norm": 0.29197266697883606, "learning_rate": 1.9635789794625437e-06, "loss": 0.0075, "step": 18254 }, { "epoch": 8.487215248721524, "grad_norm": 0.18821941316127777, "learning_rate": 1.9942038158532267e-06, "loss": 0.0055, "step": 18256 }, { "epoch": 8.488145048814506, "grad_norm": 0.3671514093875885, "learning_rate": 2.025055710164431e-06, "loss": 0.0082, "step": 18258 }, { "epoch": 8.489074848907485, "grad_norm": 0.3627327084541321, "learning_rate": 2.056134357900453e-06, "loss": 0.0158, "step": 18260 }, { "epoch": 8.490004649000465, "grad_norm": 0.3422948718070984, "learning_rate": 2.0874394523275113e-06, "loss": 0.0106, "step": 18262 }, { "epoch": 8.490934449093444, "grad_norm": 0.3186364769935608, "learning_rate": 2.118970684477073e-06, "loss": 0.0069, "step": 18264 }, { "epoch": 8.491864249186424, "grad_norm": 0.7359154224395752, "learning_rate": 2.150727743148462e-06, "loss": 0.0107, "step": 18266 }, { "epoch": 8.492794049279405, "grad_norm": 0.32429736852645874, "learning_rate": 2.182710314912404e-06, "loss": 0.0085, "step": 18268 }, { "epoch": 8.493723849372385, "grad_norm": 0.6846494674682617, "learning_rate": 2.214918084113857e-06, "loss": 0.0148, "step": 18270 }, { "epoch": 8.494653649465365, "grad_norm": 0.34867429733276367, "learning_rate": 2.247350732875076e-06, "loss": 0.0084, "step": 18272 }, { "epoch": 8.495583449558344, "grad_norm": 1.0757765769958496, "learning_rate": 2.2800079410989835e-06, "loss": 0.0241, "step": 18274 }, { "epoch": 8.496513249651326, "grad_norm": 0.6008700132369995, "learning_rate": 2.3128893864720787e-06, "loss": 0.0124, "step": 18276 }, { "epoch": 8.497443049744305, "grad_norm": 0.2842503488063812, "learning_rate": 2.3459947444677338e-06, "loss": 0.0075, "step": 18278 }, { "epoch": 8.498372849837285, "grad_norm": 0.5121694803237915, "learning_rate": 2.3793236883495084e-06, "loss": 0.0164, "step": 18280 }, { "epoch": 8.499302649930264, "grad_norm": 0.9328449964523315, "learning_rate": 2.412875889174105e-06, "loss": 0.0235, "step": 18282 }, { "epoch": 8.500232450023246, "grad_norm": 0.7219820022583008, "learning_rate": 2.4466510157949215e-06, "loss": 0.0107, "step": 18284 }, { "epoch": 8.501162250116225, "grad_norm": 0.4377410411834717, "learning_rate": 2.4806487348650193e-06, "loss": 0.0114, "step": 18286 }, { "epoch": 8.502092050209205, "grad_norm": 0.2948792576789856, "learning_rate": 2.5148687108407133e-06, "loss": 0.0106, "step": 18288 }, { "epoch": 8.503021850302185, "grad_norm": 0.6174516677856445, "learning_rate": 2.549310605984616e-06, "loss": 0.0108, "step": 18290 }, { "epoch": 8.503951650395164, "grad_norm": 0.615913987159729, "learning_rate": 2.583974080369088e-06, "loss": 0.0082, "step": 18292 }, { "epoch": 8.504881450488146, "grad_norm": 0.46958646178245544, "learning_rate": 2.6188587918797125e-06, "loss": 0.0117, "step": 18294 }, { "epoch": 8.505811250581125, "grad_norm": 0.40030917525291443, "learning_rate": 2.653964396218385e-06, "loss": 0.0099, "step": 18296 }, { "epoch": 8.506741050674105, "grad_norm": 0.28585872054100037, "learning_rate": 2.6892905469070516e-06, "loss": 0.0088, "step": 18298 }, { "epoch": 8.507670850767084, "grad_norm": 0.46420589089393616, "learning_rate": 2.7248368952907786e-06, "loss": 0.0085, "step": 18300 }, { "epoch": 8.508600650860066, "grad_norm": 0.24343866109848022, "learning_rate": 2.7606030905415034e-06, "loss": 0.0079, "step": 18302 }, { "epoch": 8.509530450953045, "grad_norm": 1.1444145441055298, "learning_rate": 2.7965887796613973e-06, "loss": 0.0117, "step": 18304 }, { "epoch": 8.510460251046025, "grad_norm": 0.9106658101081848, "learning_rate": 2.8327936074860762e-06, "loss": 0.0092, "step": 18306 }, { "epoch": 8.511390051139005, "grad_norm": 0.4467015266418457, "learning_rate": 2.869217216688628e-06, "loss": 0.007, "step": 18308 }, { "epoch": 8.512319851231986, "grad_norm": 0.9453433752059937, "learning_rate": 2.9058592477826475e-06, "loss": 0.0166, "step": 18310 }, { "epoch": 8.513249651324966, "grad_norm": 0.5955293774604797, "learning_rate": 2.9427193391261297e-06, "loss": 0.0123, "step": 18312 }, { "epoch": 8.514179451417945, "grad_norm": 0.6230605244636536, "learning_rate": 2.979797126924888e-06, "loss": 0.0127, "step": 18314 }, { "epoch": 8.515109251510925, "grad_norm": 0.5132296085357666, "learning_rate": 3.0170922452360465e-06, "loss": 0.0057, "step": 18316 }, { "epoch": 8.516039051603904, "grad_norm": 0.4921969473361969, "learning_rate": 3.054604325971973e-06, "loss": 0.0102, "step": 18318 }, { "epoch": 8.516968851696886, "grad_norm": 0.957073450088501, "learning_rate": 3.092332998903401e-06, "loss": 0.0116, "step": 18320 }, { "epoch": 8.517898651789865, "grad_norm": 0.20846331119537354, "learning_rate": 3.130277891663648e-06, "loss": 0.0078, "step": 18322 }, { "epoch": 8.518828451882845, "grad_norm": 0.4101690351963043, "learning_rate": 3.1684386297519813e-06, "loss": 0.009, "step": 18324 }, { "epoch": 8.519758251975825, "grad_norm": 0.4630262553691864, "learning_rate": 3.2068148365372408e-06, "loss": 0.0083, "step": 18326 }, { "epoch": 8.520688052068806, "grad_norm": 0.10531336069107056, "learning_rate": 3.2454061332618403e-06, "loss": 0.0038, "step": 18328 }, { "epoch": 8.521617852161786, "grad_norm": 0.22335590422153473, "learning_rate": 3.2842121390451743e-06, "loss": 0.0083, "step": 18330 }, { "epoch": 8.522547652254765, "grad_norm": 0.45501840114593506, "learning_rate": 3.3232324708877698e-06, "loss": 0.0097, "step": 18332 }, { "epoch": 8.523477452347745, "grad_norm": 0.12630854547023773, "learning_rate": 3.3624667436745204e-06, "loss": 0.0067, "step": 18334 }, { "epoch": 8.524407252440724, "grad_norm": 0.23966678977012634, "learning_rate": 3.4019145701790887e-06, "loss": 0.0073, "step": 18336 }, { "epoch": 8.525337052533706, "grad_norm": 0.44451090693473816, "learning_rate": 3.4415755610673903e-06, "loss": 0.011, "step": 18338 }, { "epoch": 8.526266852626685, "grad_norm": 0.29431018233299255, "learning_rate": 3.481449324901374e-06, "loss": 0.0066, "step": 18340 }, { "epoch": 8.527196652719665, "grad_norm": 0.32508009672164917, "learning_rate": 3.5215354681431757e-06, "loss": 0.0069, "step": 18342 }, { "epoch": 8.528126452812645, "grad_norm": 0.3236723244190216, "learning_rate": 3.5618335951586543e-06, "loss": 0.005, "step": 18344 }, { "epoch": 8.529056252905626, "grad_norm": 0.5884679555892944, "learning_rate": 3.6023433082216543e-06, "loss": 0.0098, "step": 18346 }, { "epoch": 8.529986052998606, "grad_norm": 0.8526958227157593, "learning_rate": 3.643064207517622e-06, "loss": 0.0146, "step": 18348 }, { "epoch": 8.530915853091585, "grad_norm": 0.33831194043159485, "learning_rate": 3.683995891147668e-06, "loss": 0.0166, "step": 18350 }, { "epoch": 8.531845653184565, "grad_norm": 0.175294890999794, "learning_rate": 3.725137955132699e-06, "loss": 0.0095, "step": 18352 }, { "epoch": 8.532775453277544, "grad_norm": 0.6291095614433289, "learning_rate": 3.766489993417056e-06, "loss": 0.0324, "step": 18354 }, { "epoch": 8.533705253370526, "grad_norm": 0.39497458934783936, "learning_rate": 3.808051597872861e-06, "loss": 0.0102, "step": 18356 }, { "epoch": 8.534635053463505, "grad_norm": 0.36380767822265625, "learning_rate": 3.8498223583039095e-06, "loss": 0.0079, "step": 18358 }, { "epoch": 8.535564853556485, "grad_norm": 0.7141277194023132, "learning_rate": 3.891801862449607e-06, "loss": 0.011, "step": 18360 }, { "epoch": 8.536494653649465, "grad_norm": 0.2844918668270111, "learning_rate": 3.9339896959891985e-06, "loss": 0.01, "step": 18362 }, { "epoch": 8.537424453742446, "grad_norm": 0.5186052322387695, "learning_rate": 3.976385442545751e-06, "loss": 0.0109, "step": 18364 }, { "epoch": 8.538354253835426, "grad_norm": 0.9245190024375916, "learning_rate": 4.018988683690408e-06, "loss": 0.0162, "step": 18366 }, { "epoch": 8.539284053928405, "grad_norm": 0.5209224224090576, "learning_rate": 4.06179899894643e-06, "loss": 0.007, "step": 18368 }, { "epoch": 8.540213854021385, "grad_norm": 0.4294033646583557, "learning_rate": 4.10481596579319e-06, "loss": 0.0156, "step": 18370 }, { "epoch": 8.541143654114366, "grad_norm": 0.25534042716026306, "learning_rate": 4.148039159670738e-06, "loss": 0.0087, "step": 18372 }, { "epoch": 8.542073454207346, "grad_norm": 0.573744535446167, "learning_rate": 4.191468153983404e-06, "loss": 0.014, "step": 18374 }, { "epoch": 8.543003254300325, "grad_norm": 0.2005297988653183, "learning_rate": 4.235102520104694e-06, "loss": 0.0048, "step": 18376 }, { "epoch": 8.543933054393305, "grad_norm": 0.5627358555793762, "learning_rate": 4.278941827380935e-06, "loss": 0.0108, "step": 18378 }, { "epoch": 8.544862854486286, "grad_norm": 0.9244483113288879, "learning_rate": 4.3229856431359095e-06, "loss": 0.0297, "step": 18380 }, { "epoch": 8.545792654579266, "grad_norm": 0.4180949330329895, "learning_rate": 4.367233532674987e-06, "loss": 0.0117, "step": 18382 }, { "epoch": 8.546722454672246, "grad_norm": 0.6372652649879456, "learning_rate": 4.411685059289262e-06, "loss": 0.006, "step": 18384 }, { "epoch": 8.547652254765225, "grad_norm": 1.0012112855911255, "learning_rate": 4.456339784260265e-06, "loss": 0.0192, "step": 18386 }, { "epoch": 8.548582054858205, "grad_norm": 1.0023584365844727, "learning_rate": 4.501197266863687e-06, "loss": 0.0093, "step": 18388 }, { "epoch": 8.549511854951186, "grad_norm": 0.222940132021904, "learning_rate": 4.54625706437438e-06, "loss": 0.0125, "step": 18390 }, { "epoch": 8.550441655044166, "grad_norm": 0.8069586157798767, "learning_rate": 4.591518732070388e-06, "loss": 0.0213, "step": 18392 }, { "epoch": 8.551371455137145, "grad_norm": 0.3546835780143738, "learning_rate": 4.636981823237225e-06, "loss": 0.0115, "step": 18394 }, { "epoch": 8.552301255230125, "grad_norm": 0.3538694977760315, "learning_rate": 4.682645889172633e-06, "loss": 0.0078, "step": 18396 }, { "epoch": 8.553231055323106, "grad_norm": 0.732047975063324, "learning_rate": 4.728510479190616e-06, "loss": 0.0094, "step": 18398 }, { "epoch": 8.554160855416086, "grad_norm": 0.3285782039165497, "learning_rate": 4.774575140626289e-06, "loss": 0.0074, "step": 18400 }, { "epoch": 8.555090655509066, "grad_norm": 0.2980027496814728, "learning_rate": 4.820839418839992e-06, "loss": 0.0056, "step": 18402 }, { "epoch": 8.556020455602045, "grad_norm": 0.3284580409526825, "learning_rate": 4.867302857221921e-06, "loss": 0.0059, "step": 18404 }, { "epoch": 8.556950255695025, "grad_norm": 1.1409138441085815, "learning_rate": 4.913964997196803e-06, "loss": 0.0206, "step": 18406 }, { "epoch": 8.557880055788006, "grad_norm": 0.19755685329437256, "learning_rate": 4.960825378228049e-06, "loss": 0.0075, "step": 18408 }, { "epoch": 8.558809855880986, "grad_norm": 0.16672684252262115, "learning_rate": 5.0078835378226696e-06, "loss": 0.0049, "step": 18410 }, { "epoch": 8.559739655973965, "grad_norm": 0.6461138129234314, "learning_rate": 5.055139011535681e-06, "loss": 0.0133, "step": 18412 }, { "epoch": 8.560669456066945, "grad_norm": 0.6491758823394775, "learning_rate": 5.102591332974585e-06, "loss": 0.0165, "step": 18414 }, { "epoch": 8.561599256159926, "grad_norm": 0.9313555359840393, "learning_rate": 5.150240033804121e-06, "loss": 0.0166, "step": 18416 }, { "epoch": 8.562529056252906, "grad_norm": 0.6389481425285339, "learning_rate": 5.1980846437507996e-06, "loss": 0.0082, "step": 18418 }, { "epoch": 8.563458856345886, "grad_norm": 0.39314040541648865, "learning_rate": 5.2461246906077386e-06, "loss": 0.0264, "step": 18420 }, { "epoch": 8.564388656438865, "grad_norm": 0.57613205909729, "learning_rate": 5.294359700238966e-06, "loss": 0.0121, "step": 18422 }, { "epoch": 8.565318456531845, "grad_norm": 0.20188476145267487, "learning_rate": 5.342789196584464e-06, "loss": 0.0088, "step": 18424 }, { "epoch": 8.566248256624826, "grad_norm": 0.2145439237356186, "learning_rate": 5.391412701664709e-06, "loss": 0.0074, "step": 18426 }, { "epoch": 8.567178056717806, "grad_norm": 0.8788465857505798, "learning_rate": 5.440229735585282e-06, "loss": 0.0118, "step": 18428 }, { "epoch": 8.568107856810785, "grad_norm": 0.29749664664268494, "learning_rate": 5.489239816541767e-06, "loss": 0.0144, "step": 18430 }, { "epoch": 8.569037656903765, "grad_norm": 0.12523576617240906, "learning_rate": 5.538442460824392e-06, "loss": 0.0066, "step": 18432 }, { "epoch": 8.569967456996746, "grad_norm": 0.33940476179122925, "learning_rate": 5.5878371828229845e-06, "loss": 0.0133, "step": 18434 }, { "epoch": 8.570897257089726, "grad_norm": 0.5732578635215759, "learning_rate": 5.637423495031628e-06, "loss": 0.0383, "step": 18436 }, { "epoch": 8.571827057182706, "grad_norm": 0.6768832802772522, "learning_rate": 5.6872009080533324e-06, "loss": 0.0176, "step": 18438 }, { "epoch": 8.572756857275685, "grad_norm": 0.6598361730575562, "learning_rate": 5.7371689306052385e-06, "loss": 0.012, "step": 18440 }, { "epoch": 8.573686657368667, "grad_norm": 0.32618215680122375, "learning_rate": 5.787327069523076e-06, "loss": 0.0078, "step": 18442 }, { "epoch": 8.574616457461646, "grad_norm": 0.43221673369407654, "learning_rate": 5.837674829766214e-06, "loss": 0.0063, "step": 18444 }, { "epoch": 8.575546257554626, "grad_norm": 0.6031776666641235, "learning_rate": 5.888211714422697e-06, "loss": 0.0201, "step": 18446 }, { "epoch": 8.576476057647605, "grad_norm": 1.239077091217041, "learning_rate": 5.938937224713758e-06, "loss": 0.0231, "step": 18448 }, { "epoch": 8.577405857740585, "grad_norm": 1.0556238889694214, "learning_rate": 5.989850859999206e-06, "loss": 0.0256, "step": 18450 }, { "epoch": 8.578335657833566, "grad_norm": 0.5209099054336548, "learning_rate": 6.040952117781901e-06, "loss": 0.0112, "step": 18452 }, { "epoch": 8.579265457926546, "grad_norm": 0.7713376879692078, "learning_rate": 6.092240493713173e-06, "loss": 0.0145, "step": 18454 }, { "epoch": 8.580195258019526, "grad_norm": 0.5543222427368164, "learning_rate": 6.143715481597405e-06, "loss": 0.0162, "step": 18456 }, { "epoch": 8.581125058112505, "grad_norm": 0.3284603953361511, "learning_rate": 6.195376573397182e-06, "loss": 0.0095, "step": 18458 }, { "epoch": 8.582054858205487, "grad_norm": 0.5856446623802185, "learning_rate": 6.247223259238504e-06, "loss": 0.0125, "step": 18460 }, { "epoch": 8.582984658298466, "grad_norm": 0.14169315993785858, "learning_rate": 6.299255027415398e-06, "loss": 0.0101, "step": 18462 }, { "epoch": 8.583914458391446, "grad_norm": 0.559542715549469, "learning_rate": 6.351471364395432e-06, "loss": 0.0054, "step": 18464 }, { "epoch": 8.584844258484425, "grad_norm": 0.40687331557273865, "learning_rate": 6.403871754824326e-06, "loss": 0.0072, "step": 18466 }, { "epoch": 8.585774058577407, "grad_norm": 0.5757378339767456, "learning_rate": 6.4564556815314365e-06, "loss": 0.0081, "step": 18468 }, { "epoch": 8.586703858670386, "grad_norm": 0.22609588503837585, "learning_rate": 6.50922262553476e-06, "loss": 0.0066, "step": 18470 }, { "epoch": 8.587633658763366, "grad_norm": 1.830762267112732, "learning_rate": 6.562172066045624e-06, "loss": 0.0119, "step": 18472 }, { "epoch": 8.588563458856346, "grad_norm": 0.5785975456237793, "learning_rate": 6.615303480474602e-06, "loss": 0.0071, "step": 18474 }, { "epoch": 8.589493258949325, "grad_norm": 0.456662654876709, "learning_rate": 6.668616344435962e-06, "loss": 0.025, "step": 18476 }, { "epoch": 8.590423059042307, "grad_norm": 0.2486099898815155, "learning_rate": 6.722110131753329e-06, "loss": 0.0064, "step": 18478 }, { "epoch": 8.591352859135286, "grad_norm": 0.6654187440872192, "learning_rate": 6.7757843144646705e-06, "loss": 0.0135, "step": 18480 }, { "epoch": 8.592282659228266, "grad_norm": 0.5772547125816345, "learning_rate": 6.829638362827414e-06, "loss": 0.0101, "step": 18482 }, { "epoch": 8.593212459321245, "grad_norm": 0.31339508295059204, "learning_rate": 6.883671745323847e-06, "loss": 0.0083, "step": 18484 }, { "epoch": 8.594142259414227, "grad_norm": 0.6250996589660645, "learning_rate": 6.937883928666233e-06, "loss": 0.0087, "step": 18486 }, { "epoch": 8.595072059507206, "grad_norm": 0.3842235207557678, "learning_rate": 6.99227437780227e-06, "loss": 0.0081, "step": 18488 }, { "epoch": 8.596001859600186, "grad_norm": 0.24912849068641663, "learning_rate": 7.046842555920248e-06, "loss": 0.0057, "step": 18490 }, { "epoch": 8.596931659693166, "grad_norm": 1.0374751091003418, "learning_rate": 7.101587924454157e-06, "loss": 0.0137, "step": 18492 }, { "epoch": 8.597861459786145, "grad_norm": 0.5346579551696777, "learning_rate": 7.156509943089429e-06, "loss": 0.0072, "step": 18494 }, { "epoch": 8.598791259879127, "grad_norm": 1.2203031778335571, "learning_rate": 7.2116080697678575e-06, "loss": 0.011, "step": 18496 }, { "epoch": 8.599721059972106, "grad_norm": 0.29450109601020813, "learning_rate": 7.266881760693172e-06, "loss": 0.0117, "step": 18498 }, { "epoch": 8.600650860065086, "grad_norm": 0.24959875643253326, "learning_rate": 7.322330470336297e-06, "loss": 0.0091, "step": 18500 }, { "epoch": 8.601580660158065, "grad_norm": 0.4495514929294586, "learning_rate": 7.377953651440914e-06, "loss": 0.0171, "step": 18502 }, { "epoch": 8.602510460251047, "grad_norm": 0.5094116926193237, "learning_rate": 7.433750755028749e-06, "loss": 0.0088, "step": 18504 }, { "epoch": 8.603440260344026, "grad_norm": 0.9812142848968506, "learning_rate": 7.489721230404786e-06, "loss": 0.0148, "step": 18506 }, { "epoch": 8.604370060437006, "grad_norm": 0.14271560311317444, "learning_rate": 7.545864525163156e-06, "loss": 0.0087, "step": 18508 }, { "epoch": 8.605299860529986, "grad_norm": 0.5152167677879333, "learning_rate": 7.602180085192142e-06, "loss": 0.0098, "step": 18510 }, { "epoch": 8.606229660622965, "grad_norm": 0.34025707840919495, "learning_rate": 7.658667354679842e-06, "loss": 0.0147, "step": 18512 }, { "epoch": 8.607159460715947, "grad_norm": 0.34312716126441956, "learning_rate": 7.715325776119837e-06, "loss": 0.0111, "step": 18514 }, { "epoch": 8.608089260808926, "grad_norm": 0.40656614303588867, "learning_rate": 7.772154790316249e-06, "loss": 0.0053, "step": 18516 }, { "epoch": 8.609019060901906, "grad_norm": 0.23949943482875824, "learning_rate": 7.829153836389784e-06, "loss": 0.0083, "step": 18518 }, { "epoch": 8.609948860994885, "grad_norm": 0.240158349275589, "learning_rate": 7.886322351782737e-06, "loss": 0.0063, "step": 18520 }, { "epoch": 8.610878661087867, "grad_norm": 0.582207202911377, "learning_rate": 7.943659772265008e-06, "loss": 0.0102, "step": 18522 }, { "epoch": 8.611808461180846, "grad_norm": 0.5084753632545471, "learning_rate": 8.001165531939532e-06, "loss": 0.0259, "step": 18524 }, { "epoch": 8.612738261273826, "grad_norm": 0.5665385723114014, "learning_rate": 8.058839063247416e-06, "loss": 0.0185, "step": 18526 }, { "epoch": 8.613668061366806, "grad_norm": 0.35372811555862427, "learning_rate": 8.11667979697439e-06, "loss": 0.0076, "step": 18528 }, { "epoch": 8.614597861459787, "grad_norm": 1.1222703456878662, "learning_rate": 8.17468716225563e-06, "loss": 0.011, "step": 18530 }, { "epoch": 8.615527661552767, "grad_norm": 1.3346903324127197, "learning_rate": 8.232860586581925e-06, "loss": 0.02, "step": 18532 }, { "epoch": 8.616457461645746, "grad_norm": 0.5899669528007507, "learning_rate": 8.291199495805128e-06, "loss": 0.0242, "step": 18534 }, { "epoch": 8.617387261738726, "grad_norm": 0.23785847425460815, "learning_rate": 8.349703314143626e-06, "loss": 0.0066, "step": 18536 }, { "epoch": 8.618317061831707, "grad_norm": 0.41007572412490845, "learning_rate": 8.408371464188551e-06, "loss": 0.0116, "step": 18538 }, { "epoch": 8.619246861924687, "grad_norm": 0.3581368923187256, "learning_rate": 8.467203366908678e-06, "loss": 0.0084, "step": 18540 }, { "epoch": 8.620176662017666, "grad_norm": 0.7579193115234375, "learning_rate": 8.526198441657083e-06, "loss": 0.011, "step": 18542 }, { "epoch": 8.621106462110646, "grad_norm": 0.27354905009269714, "learning_rate": 8.585356106176058e-06, "loss": 0.0085, "step": 18544 }, { "epoch": 8.622036262203626, "grad_norm": 1.1781283617019653, "learning_rate": 8.644675776603403e-06, "loss": 0.0111, "step": 18546 }, { "epoch": 8.622966062296607, "grad_norm": 0.5472902059555054, "learning_rate": 8.704156867477992e-06, "loss": 0.0077, "step": 18548 }, { "epoch": 8.623895862389586, "grad_norm": 0.2729807198047638, "learning_rate": 8.76379879174533e-06, "loss": 0.0115, "step": 18550 }, { "epoch": 8.624825662482566, "grad_norm": 1.1645569801330566, "learning_rate": 8.82360096076391e-06, "loss": 0.0201, "step": 18552 }, { "epoch": 8.625755462575546, "grad_norm": 0.628226101398468, "learning_rate": 8.883562784310197e-06, "loss": 0.0099, "step": 18554 }, { "epoch": 8.626685262668527, "grad_norm": 0.5326452851295471, "learning_rate": 8.943683670585334e-06, "loss": 0.0094, "step": 18556 }, { "epoch": 8.627615062761507, "grad_norm": 0.4802183508872986, "learning_rate": 9.003963026220512e-06, "loss": 0.0122, "step": 18558 }, { "epoch": 8.628544862854486, "grad_norm": 0.5728672742843628, "learning_rate": 9.064400256282694e-06, "loss": 0.0097, "step": 18560 }, { "epoch": 8.629474662947466, "grad_norm": 1.2461813688278198, "learning_rate": 9.12499476428095e-06, "loss": 0.0126, "step": 18562 }, { "epoch": 8.630404463040446, "grad_norm": 0.18715818226337433, "learning_rate": 9.185745952171885e-06, "loss": 0.0108, "step": 18564 }, { "epoch": 8.631334263133427, "grad_norm": 0.8943883180618286, "learning_rate": 9.246653220365732e-06, "loss": 0.0224, "step": 18566 }, { "epoch": 8.632264063226406, "grad_norm": 0.7393452525138855, "learning_rate": 9.307715967732476e-06, "loss": 0.0215, "step": 18568 }, { "epoch": 8.633193863319386, "grad_norm": 0.7878137230873108, "learning_rate": 9.36893359160732e-06, "loss": 0.0184, "step": 18570 }, { "epoch": 8.634123663412366, "grad_norm": 0.7077828049659729, "learning_rate": 9.430305487797166e-06, "loss": 0.0188, "step": 18572 }, { "epoch": 8.635053463505347, "grad_norm": 0.24556921422481537, "learning_rate": 9.491831050586038e-06, "loss": 0.0097, "step": 18574 }, { "epoch": 8.635983263598327, "grad_norm": 0.2744632661342621, "learning_rate": 9.553509672741541e-06, "loss": 0.0059, "step": 18576 }, { "epoch": 8.636913063691306, "grad_norm": 0.19045904278755188, "learning_rate": 9.615340745520713e-06, "loss": 0.007, "step": 18578 }, { "epoch": 8.637842863784286, "grad_norm": 0.2839250862598419, "learning_rate": 9.677323658675547e-06, "loss": 0.0057, "step": 18580 }, { "epoch": 8.638772663877266, "grad_norm": 0.3974611759185791, "learning_rate": 9.739457800459926e-06, "loss": 0.0164, "step": 18582 }, { "epoch": 8.639702463970247, "grad_norm": 1.030067801475525, "learning_rate": 9.801742557634815e-06, "loss": 0.0142, "step": 18584 }, { "epoch": 8.640632264063226, "grad_norm": 0.25457051396369934, "learning_rate": 9.864177315474946e-06, "loss": 0.0114, "step": 18586 }, { "epoch": 8.641562064156206, "grad_norm": 1.5280755758285522, "learning_rate": 9.926761457774323e-06, "loss": 0.0143, "step": 18588 }, { "epoch": 8.642491864249186, "grad_norm": 0.8609879612922668, "learning_rate": 9.989494366852802e-06, "loss": 0.0214, "step": 18590 }, { "epoch": 8.643421664342167, "grad_norm": 0.5907185077667236, "learning_rate": 1.0052375423562034e-05, "loss": 0.0113, "step": 18592 }, { "epoch": 8.644351464435147, "grad_norm": 0.40891483426094055, "learning_rate": 1.0115404007291093e-05, "loss": 0.0099, "step": 18594 }, { "epoch": 8.645281264528126, "grad_norm": 0.5462383031845093, "learning_rate": 1.0178579495973517e-05, "loss": 0.0117, "step": 18596 }, { "epoch": 8.646211064621106, "grad_norm": 0.4083329439163208, "learning_rate": 1.0241901266092597e-05, "loss": 0.0097, "step": 18598 }, { "epoch": 8.647140864714087, "grad_norm": 0.22684301435947418, "learning_rate": 1.0305368692688091e-05, "loss": 0.0098, "step": 18600 }, { "epoch": 8.648070664807067, "grad_norm": 0.17847207188606262, "learning_rate": 1.0368981149362202e-05, "loss": 0.0051, "step": 18602 }, { "epoch": 8.649000464900046, "grad_norm": 0.21054081618785858, "learning_rate": 1.0432738008285508e-05, "loss": 0.0078, "step": 18604 }, { "epoch": 8.649930264993026, "grad_norm": 1.5578925609588623, "learning_rate": 1.0496638640203781e-05, "loss": 0.0301, "step": 18606 }, { "epoch": 8.650860065086006, "grad_norm": 0.3579559624195099, "learning_rate": 1.0560682414443283e-05, "loss": 0.0088, "step": 18608 }, { "epoch": 8.651789865178987, "grad_norm": 0.7909919023513794, "learning_rate": 1.0624868698917969e-05, "loss": 0.017, "step": 18610 }, { "epoch": 8.652719665271967, "grad_norm": 0.7138289213180542, "learning_rate": 1.0689196860135195e-05, "loss": 0.0121, "step": 18612 }, { "epoch": 8.653649465364946, "grad_norm": 0.4003133475780487, "learning_rate": 1.0753666263201831e-05, "loss": 0.0142, "step": 18614 }, { "epoch": 8.654579265457926, "grad_norm": 0.45509153604507446, "learning_rate": 1.0818276271831047e-05, "loss": 0.0165, "step": 18616 }, { "epoch": 8.655509065550907, "grad_norm": 0.23786310851573944, "learning_rate": 1.0883026248347993e-05, "loss": 0.0052, "step": 18618 }, { "epoch": 8.656438865643887, "grad_norm": 0.47991615533828735, "learning_rate": 1.0947915553696688e-05, "loss": 0.0124, "step": 18620 }, { "epoch": 8.657368665736866, "grad_norm": 0.3632163107395172, "learning_rate": 1.1012943547445808e-05, "loss": 0.0063, "step": 18622 }, { "epoch": 8.658298465829846, "grad_norm": 1.6774474382400513, "learning_rate": 1.1078109587795242e-05, "loss": 0.0207, "step": 18624 }, { "epoch": 8.659228265922827, "grad_norm": 0.2204028069972992, "learning_rate": 1.1143413031582623e-05, "loss": 0.0077, "step": 18626 }, { "epoch": 8.660158066015807, "grad_norm": 0.9535561800003052, "learning_rate": 1.120885323428918e-05, "loss": 0.0127, "step": 18628 }, { "epoch": 8.661087866108787, "grad_norm": 0.8448850512504578, "learning_rate": 1.1274429550046675e-05, "loss": 0.0191, "step": 18630 }, { "epoch": 8.662017666201766, "grad_norm": 0.7928578853607178, "learning_rate": 1.13401413316432e-05, "loss": 0.0231, "step": 18632 }, { "epoch": 8.662947466294746, "grad_norm": 0.191503643989563, "learning_rate": 1.1405987930530143e-05, "loss": 0.014, "step": 18634 }, { "epoch": 8.663877266387727, "grad_norm": 0.20190280675888062, "learning_rate": 1.1471968696828095e-05, "loss": 0.0118, "step": 18636 }, { "epoch": 8.664807066480707, "grad_norm": 0.586471438407898, "learning_rate": 1.1538082979333442e-05, "loss": 0.0121, "step": 18638 }, { "epoch": 8.665736866573686, "grad_norm": 0.11840967833995819, "learning_rate": 1.1604330125525064e-05, "loss": 0.0074, "step": 18640 }, { "epoch": 8.666666666666666, "grad_norm": 0.27681830525398254, "learning_rate": 1.1670709481570226e-05, "loss": 0.0055, "step": 18642 }, { "epoch": 8.667596466759647, "grad_norm": 0.61053067445755, "learning_rate": 1.1737220392331548e-05, "loss": 0.0256, "step": 18644 }, { "epoch": 8.668526266852627, "grad_norm": 1.1254792213439941, "learning_rate": 1.1803862201373274e-05, "loss": 0.0288, "step": 18646 }, { "epoch": 8.669456066945607, "grad_norm": 0.23443901538848877, "learning_rate": 1.1870634250967574e-05, "loss": 0.0054, "step": 18648 }, { "epoch": 8.670385867038586, "grad_norm": 0.1942889243364334, "learning_rate": 1.193753588210128e-05, "loss": 0.0057, "step": 18650 }, { "epoch": 8.671315667131566, "grad_norm": 0.43826615810394287, "learning_rate": 1.2004566434482224e-05, "loss": 0.011, "step": 18652 }, { "epoch": 8.672245467224547, "grad_norm": 0.8193718791007996, "learning_rate": 1.2071725246545988e-05, "loss": 0.0113, "step": 18654 }, { "epoch": 8.673175267317527, "grad_norm": 0.23979762196540833, "learning_rate": 1.2139011655462291e-05, "loss": 0.0069, "step": 18656 }, { "epoch": 8.674105067410506, "grad_norm": 0.4357919991016388, "learning_rate": 1.2206424997141286e-05, "loss": 0.0155, "step": 18658 }, { "epoch": 8.675034867503486, "grad_norm": 0.27941659092903137, "learning_rate": 1.2273964606240738e-05, "loss": 0.0081, "step": 18660 }, { "epoch": 8.675964667596467, "grad_norm": 0.5541241765022278, "learning_rate": 1.2341629816171666e-05, "loss": 0.01, "step": 18662 }, { "epoch": 8.676894467689447, "grad_norm": 1.069329023361206, "learning_rate": 1.2409419959105917e-05, "loss": 0.0148, "step": 18664 }, { "epoch": 8.677824267782427, "grad_norm": 0.9636040329933167, "learning_rate": 1.2477334365982224e-05, "loss": 0.0099, "step": 18666 }, { "epoch": 8.678754067875406, "grad_norm": 1.9851148128509521, "learning_rate": 1.2545372366512657e-05, "loss": 0.017, "step": 18668 }, { "epoch": 8.679683867968386, "grad_norm": 1.2627743482589722, "learning_rate": 1.2613533289189775e-05, "loss": 0.0108, "step": 18670 }, { "epoch": 8.680613668061367, "grad_norm": 0.2866056263446808, "learning_rate": 1.2681816461292632e-05, "loss": 0.0093, "step": 18672 }, { "epoch": 8.681543468154347, "grad_norm": 0.7266020178794861, "learning_rate": 1.2750221208894118e-05, "loss": 0.0136, "step": 18674 }, { "epoch": 8.682473268247326, "grad_norm": 0.28619202971458435, "learning_rate": 1.2818746856866682e-05, "loss": 0.0121, "step": 18676 }, { "epoch": 8.683403068340306, "grad_norm": 0.7991352081298828, "learning_rate": 1.288739272889e-05, "loss": 0.0108, "step": 18678 }, { "epoch": 8.684332868433287, "grad_norm": 0.2720010578632355, "learning_rate": 1.29561581474571e-05, "loss": 0.0097, "step": 18680 }, { "epoch": 8.685262668526267, "grad_norm": 0.5921216011047363, "learning_rate": 1.3025042433880914e-05, "loss": 0.012, "step": 18682 }, { "epoch": 8.686192468619247, "grad_norm": 0.8171347379684448, "learning_rate": 1.3094044908301496e-05, "loss": 0.0083, "step": 18684 }, { "epoch": 8.687122268712226, "grad_norm": 0.9512056708335876, "learning_rate": 1.3163164889692126e-05, "loss": 0.021, "step": 18686 }, { "epoch": 8.688052068805208, "grad_norm": 0.48529359698295593, "learning_rate": 1.3232401695866655e-05, "loss": 0.0102, "step": 18688 }, { "epoch": 8.688981868898187, "grad_norm": 0.19871382415294647, "learning_rate": 1.3301754643485666e-05, "loss": 0.007, "step": 18690 }, { "epoch": 8.689911668991167, "grad_norm": 0.43366196751594543, "learning_rate": 1.3371223048063497e-05, "loss": 0.0066, "step": 18692 }, { "epoch": 8.690841469084146, "grad_norm": 0.883922815322876, "learning_rate": 1.3440806223975196e-05, "loss": 0.0104, "step": 18694 }, { "epoch": 8.691771269177128, "grad_norm": 1.2094959020614624, "learning_rate": 1.3510503484462748e-05, "loss": 0.0189, "step": 18696 }, { "epoch": 8.692701069270107, "grad_norm": 0.4645620286464691, "learning_rate": 1.3580314141642416e-05, "loss": 0.0137, "step": 18698 }, { "epoch": 8.693630869363087, "grad_norm": 0.7651345133781433, "learning_rate": 1.3650237506511269e-05, "loss": 0.0173, "step": 18700 }, { "epoch": 8.694560669456067, "grad_norm": 0.24246768653392792, "learning_rate": 1.3720272888953846e-05, "loss": 0.0064, "step": 18702 }, { "epoch": 8.695490469549046, "grad_norm": 0.3086315393447876, "learning_rate": 1.3790419597749204e-05, "loss": 0.0096, "step": 18704 }, { "epoch": 8.696420269642028, "grad_norm": 0.7757980227470398, "learning_rate": 1.3860676940577557e-05, "loss": 0.0124, "step": 18706 }, { "epoch": 8.697350069735007, "grad_norm": 0.26124468445777893, "learning_rate": 1.3931044224027385e-05, "loss": 0.0087, "step": 18708 }, { "epoch": 8.698279869827987, "grad_norm": 0.34352636337280273, "learning_rate": 1.4001520753602079e-05, "loss": 0.0096, "step": 18710 }, { "epoch": 8.699209669920966, "grad_norm": 1.1598552465438843, "learning_rate": 1.4072105833726592e-05, "loss": 0.023, "step": 18712 }, { "epoch": 8.700139470013948, "grad_norm": 0.45221254229545593, "learning_rate": 1.4142798767754838e-05, "loss": 0.0106, "step": 18714 }, { "epoch": 8.701069270106927, "grad_norm": 1.1257003545761108, "learning_rate": 1.4213598857976012e-05, "loss": 0.0167, "step": 18716 }, { "epoch": 8.701999070199907, "grad_norm": 0.6435662508010864, "learning_rate": 1.4284505405621817e-05, "loss": 0.0111, "step": 18718 }, { "epoch": 8.702928870292887, "grad_norm": 0.783541202545166, "learning_rate": 1.4355517710873161e-05, "loss": 0.0104, "step": 18720 }, { "epoch": 8.703858670385866, "grad_norm": 1.2678191661834717, "learning_rate": 1.4426635072867355e-05, "loss": 0.0281, "step": 18722 }, { "epoch": 8.704788470478848, "grad_norm": 0.4767380952835083, "learning_rate": 1.4497856789704816e-05, "loss": 0.0079, "step": 18724 }, { "epoch": 8.705718270571827, "grad_norm": 0.6912004351615906, "learning_rate": 1.4569182158455794e-05, "loss": 0.0104, "step": 18726 }, { "epoch": 8.706648070664807, "grad_norm": 0.5850211977958679, "learning_rate": 1.4640610475167857e-05, "loss": 0.0071, "step": 18728 }, { "epoch": 8.707577870757786, "grad_norm": 0.08455940335988998, "learning_rate": 1.4712141034872278e-05, "loss": 0.0041, "step": 18730 }, { "epoch": 8.708507670850768, "grad_norm": 0.16031970083713531, "learning_rate": 1.4783773131591227e-05, "loss": 0.0076, "step": 18732 }, { "epoch": 8.709437470943747, "grad_norm": 0.14976108074188232, "learning_rate": 1.485550605834499e-05, "loss": 0.0111, "step": 18734 }, { "epoch": 8.710367271036727, "grad_norm": 0.42795976996421814, "learning_rate": 1.492733910715838e-05, "loss": 0.0098, "step": 18736 }, { "epoch": 8.711297071129707, "grad_norm": 0.39370542764663696, "learning_rate": 1.4999271569068367e-05, "loss": 0.0133, "step": 18738 }, { "epoch": 8.712226871222686, "grad_norm": 0.7138265371322632, "learning_rate": 1.5071302734130428e-05, "loss": 0.0207, "step": 18740 }, { "epoch": 8.713156671315668, "grad_norm": 0.3408103585243225, "learning_rate": 1.514343189142612e-05, "loss": 0.0086, "step": 18742 }, { "epoch": 8.714086471408647, "grad_norm": 0.37872621417045593, "learning_rate": 1.5215658329069955e-05, "loss": 0.0076, "step": 18744 }, { "epoch": 8.715016271501627, "grad_norm": 0.4830138683319092, "learning_rate": 1.5287981334215814e-05, "loss": 0.0093, "step": 18746 }, { "epoch": 8.715946071594606, "grad_norm": 1.7359827756881714, "learning_rate": 1.5360400193065087e-05, "loss": 0.0179, "step": 18748 }, { "epoch": 8.716875871687588, "grad_norm": 0.8339054584503174, "learning_rate": 1.5432914190872716e-05, "loss": 0.0119, "step": 18750 }, { "epoch": 8.717805671780567, "grad_norm": 0.2656497657299042, "learning_rate": 1.5505522611954886e-05, "loss": 0.0168, "step": 18752 }, { "epoch": 8.718735471873547, "grad_norm": 0.8396019339561462, "learning_rate": 1.5578224739695884e-05, "loss": 0.0124, "step": 18754 }, { "epoch": 8.719665271966527, "grad_norm": 0.3650313913822174, "learning_rate": 1.5651019856554894e-05, "loss": 0.0121, "step": 18756 }, { "epoch": 8.720595072059508, "grad_norm": 0.8937464952468872, "learning_rate": 1.5723907244073746e-05, "loss": 0.017, "step": 18758 }, { "epoch": 8.721524872152488, "grad_norm": 1.5076735019683838, "learning_rate": 1.5796886182883025e-05, "loss": 0.0167, "step": 18760 }, { "epoch": 8.722454672245467, "grad_norm": 0.21416690945625305, "learning_rate": 1.5869955952710315e-05, "loss": 0.0091, "step": 18762 }, { "epoch": 8.723384472338447, "grad_norm": 0.5317308306694031, "learning_rate": 1.5943115832386322e-05, "loss": 0.0129, "step": 18764 }, { "epoch": 8.724314272431426, "grad_norm": 0.6105255484580994, "learning_rate": 1.6016365099852654e-05, "loss": 0.0314, "step": 18766 }, { "epoch": 8.725244072524408, "grad_norm": 0.3930653929710388, "learning_rate": 1.6089703032168693e-05, "loss": 0.0128, "step": 18768 }, { "epoch": 8.726173872617387, "grad_norm": 0.9607301950454712, "learning_rate": 1.6163128905518568e-05, "loss": 0.0184, "step": 18770 }, { "epoch": 8.727103672710367, "grad_norm": 0.4294564127922058, "learning_rate": 1.6236641995218558e-05, "loss": 0.0179, "step": 18772 }, { "epoch": 8.728033472803347, "grad_norm": 0.3429594039916992, "learning_rate": 1.6310241575724063e-05, "loss": 0.0078, "step": 18774 }, { "epoch": 8.728963272896328, "grad_norm": 0.6365881562232971, "learning_rate": 1.6383926920637016e-05, "loss": 0.0129, "step": 18776 }, { "epoch": 8.729893072989308, "grad_norm": 1.0653396844863892, "learning_rate": 1.6457697302712897e-05, "loss": 0.0188, "step": 18778 }, { "epoch": 8.730822873082287, "grad_norm": 0.3228603005409241, "learning_rate": 1.653155199386765e-05, "loss": 0.0144, "step": 18780 }, { "epoch": 8.731752673175267, "grad_norm": 0.6441537737846375, "learning_rate": 1.6605490265185458e-05, "loss": 0.0092, "step": 18782 }, { "epoch": 8.732682473268248, "grad_norm": 0.44932976365089417, "learning_rate": 1.6679511386925347e-05, "loss": 0.0145, "step": 18784 }, { "epoch": 8.733612273361228, "grad_norm": 0.6598551869392395, "learning_rate": 1.6753614628528642e-05, "loss": 0.0092, "step": 18786 }, { "epoch": 8.734542073454207, "grad_norm": 0.35898175835609436, "learning_rate": 1.682779925862644e-05, "loss": 0.0103, "step": 18788 }, { "epoch": 8.735471873547187, "grad_norm": 0.6888323426246643, "learning_rate": 1.690206454504622e-05, "loss": 0.0112, "step": 18790 }, { "epoch": 8.736401673640167, "grad_norm": 0.7675220966339111, "learning_rate": 1.6976409754819754e-05, "loss": 0.0238, "step": 18792 }, { "epoch": 8.737331473733148, "grad_norm": 0.3467024862766266, "learning_rate": 1.7050834154189665e-05, "loss": 0.0085, "step": 18794 }, { "epoch": 8.738261273826128, "grad_norm": 0.6338108777999878, "learning_rate": 1.7125337008617274e-05, "loss": 0.0122, "step": 18796 }, { "epoch": 8.739191073919107, "grad_norm": 1.0815224647521973, "learning_rate": 1.7199917582789646e-05, "loss": 0.0326, "step": 18798 }, { "epoch": 8.740120874012087, "grad_norm": 0.6567382216453552, "learning_rate": 1.7274575140626284e-05, "loss": 0.0142, "step": 18800 }, { "epoch": 8.741050674105068, "grad_norm": 0.4198664426803589, "learning_rate": 1.734930894528749e-05, "loss": 0.0081, "step": 18802 }, { "epoch": 8.741980474198048, "grad_norm": 0.470819890499115, "learning_rate": 1.7424118259180614e-05, "loss": 0.0167, "step": 18804 }, { "epoch": 8.742910274291027, "grad_norm": 0.7943920493125916, "learning_rate": 1.749900234396809e-05, "loss": 0.0138, "step": 18806 }, { "epoch": 8.743840074384007, "grad_norm": 0.2656831443309784, "learning_rate": 1.7573960460574076e-05, "loss": 0.0075, "step": 18808 }, { "epoch": 8.744769874476987, "grad_norm": 1.7824608087539673, "learning_rate": 1.76489918691923e-05, "loss": 0.0166, "step": 18810 }, { "epoch": 8.745699674569968, "grad_norm": 0.5090926289558411, "learning_rate": 1.7724095829293223e-05, "loss": 0.0147, "step": 18812 }, { "epoch": 8.746629474662948, "grad_norm": 0.6745421886444092, "learning_rate": 1.7799271599630742e-05, "loss": 0.0116, "step": 18814 }, { "epoch": 8.747559274755927, "grad_norm": 1.3707329034805298, "learning_rate": 1.7874518438250604e-05, "loss": 0.0162, "step": 18816 }, { "epoch": 8.748489074848907, "grad_norm": 0.4361168146133423, "learning_rate": 1.7949835602496725e-05, "loss": 0.0066, "step": 18818 }, { "epoch": 8.749418874941888, "grad_norm": 0.3155043125152588, "learning_rate": 1.802522234901918e-05, "loss": 0.0116, "step": 18820 }, { "epoch": 8.750348675034868, "grad_norm": 0.6815221905708313, "learning_rate": 1.8100677933781306e-05, "loss": 0.0128, "step": 18822 }, { "epoch": 8.751278475127847, "grad_norm": 0.5864207744598389, "learning_rate": 1.817620161206677e-05, "loss": 0.01, "step": 18824 }, { "epoch": 8.752208275220827, "grad_norm": 0.32180094718933105, "learning_rate": 1.8251792638487617e-05, "loss": 0.0092, "step": 18826 }, { "epoch": 8.753138075313807, "grad_norm": 0.20978984236717224, "learning_rate": 1.8327450266990586e-05, "loss": 0.0191, "step": 18828 }, { "epoch": 8.754067875406788, "grad_norm": 1.1767075061798096, "learning_rate": 1.8403173750865607e-05, "loss": 0.0316, "step": 18830 }, { "epoch": 8.754997675499768, "grad_norm": 1.2891286611557007, "learning_rate": 1.8478962342752546e-05, "loss": 0.0185, "step": 18832 }, { "epoch": 8.755927475592747, "grad_norm": 0.6250652074813843, "learning_rate": 1.8554815294648425e-05, "loss": 0.0074, "step": 18834 }, { "epoch": 8.756857275685727, "grad_norm": 0.8308655023574829, "learning_rate": 1.8630731857915405e-05, "loss": 0.0132, "step": 18836 }, { "epoch": 8.757787075778708, "grad_norm": 0.2999294698238373, "learning_rate": 1.8706711283287484e-05, "loss": 0.0114, "step": 18838 }, { "epoch": 8.758716875871688, "grad_norm": 1.5858982801437378, "learning_rate": 1.8782752820878584e-05, "loss": 0.0284, "step": 18840 }, { "epoch": 8.759646675964667, "grad_norm": 0.7753662467002869, "learning_rate": 1.8858855720189333e-05, "loss": 0.0135, "step": 18842 }, { "epoch": 8.760576476057647, "grad_norm": 1.1296958923339844, "learning_rate": 1.8935019230114758e-05, "loss": 0.0081, "step": 18844 }, { "epoch": 8.761506276150628, "grad_norm": 1.3074697256088257, "learning_rate": 1.9011242598951935e-05, "loss": 0.0244, "step": 18846 }, { "epoch": 8.762436076243608, "grad_norm": 0.585860013961792, "learning_rate": 1.9087525074406815e-05, "loss": 0.0064, "step": 18848 }, { "epoch": 8.763365876336588, "grad_norm": 0.4790462851524353, "learning_rate": 1.9163865903602333e-05, "loss": 0.0109, "step": 18850 }, { "epoch": 8.764295676429567, "grad_norm": 1.0342762470245361, "learning_rate": 1.924026433308524e-05, "loss": 0.0151, "step": 18852 }, { "epoch": 8.765225476522549, "grad_norm": 1.139570713043213, "learning_rate": 1.93167196088338e-05, "loss": 0.0138, "step": 18854 }, { "epoch": 8.766155276615528, "grad_norm": 1.1973538398742676, "learning_rate": 1.939323097626547e-05, "loss": 0.0111, "step": 18856 }, { "epoch": 8.767085076708508, "grad_norm": 1.1735458374023438, "learning_rate": 1.9469797680243774e-05, "loss": 0.0339, "step": 18858 }, { "epoch": 8.768014876801487, "grad_norm": 0.3847063481807709, "learning_rate": 1.9546418965086425e-05, "loss": 0.0085, "step": 18860 }, { "epoch": 8.768944676894467, "grad_norm": 0.16731014847755432, "learning_rate": 1.962309407457211e-05, "loss": 0.0147, "step": 18862 }, { "epoch": 8.769874476987448, "grad_norm": 0.4357019066810608, "learning_rate": 1.9699822251948528e-05, "loss": 0.0107, "step": 18864 }, { "epoch": 8.770804277080428, "grad_norm": 1.1383777856826782, "learning_rate": 1.977660273993973e-05, "loss": 0.0139, "step": 18866 }, { "epoch": 8.771734077173408, "grad_norm": 0.7753674387931824, "learning_rate": 1.985343478075294e-05, "loss": 0.019, "step": 18868 }, { "epoch": 8.772663877266387, "grad_norm": 0.6225096583366394, "learning_rate": 1.99303176160872e-05, "loss": 0.0139, "step": 18870 }, { "epoch": 8.773593677359369, "grad_norm": 0.7435780167579651, "learning_rate": 2.000725048713979e-05, "loss": 0.0179, "step": 18872 }, { "epoch": 8.774523477452348, "grad_norm": 0.5387886166572571, "learning_rate": 2.008423263461441e-05, "loss": 0.0106, "step": 18874 }, { "epoch": 8.775453277545328, "grad_norm": 1.2867004871368408, "learning_rate": 2.016126329872844e-05, "loss": 0.0327, "step": 18876 }, { "epoch": 8.776383077638307, "grad_norm": 0.7280334234237671, "learning_rate": 2.0238341719220152e-05, "loss": 0.0145, "step": 18878 }, { "epoch": 8.777312877731287, "grad_norm": 1.0669865608215332, "learning_rate": 2.031546713535691e-05, "loss": 0.0278, "step": 18880 }, { "epoch": 8.778242677824268, "grad_norm": 0.574143648147583, "learning_rate": 2.0392638785941644e-05, "loss": 0.0092, "step": 18882 }, { "epoch": 8.779172477917248, "grad_norm": 1.3481614589691162, "learning_rate": 2.0469855909321493e-05, "loss": 0.0267, "step": 18884 }, { "epoch": 8.780102278010228, "grad_norm": 0.3515703082084656, "learning_rate": 2.054711774339471e-05, "loss": 0.0093, "step": 18886 }, { "epoch": 8.781032078103207, "grad_norm": 1.6217191219329834, "learning_rate": 2.0624423525618016e-05, "loss": 0.0374, "step": 18888 }, { "epoch": 8.781961878196189, "grad_norm": 1.118937373161316, "learning_rate": 2.0701772493014716e-05, "loss": 0.0173, "step": 18890 }, { "epoch": 8.782891678289168, "grad_norm": 0.24183140695095062, "learning_rate": 2.077916388218156e-05, "loss": 0.0254, "step": 18892 }, { "epoch": 8.783821478382148, "grad_norm": 0.6477972865104675, "learning_rate": 2.0856596929297036e-05, "loss": 0.0117, "step": 18894 }, { "epoch": 8.784751278475127, "grad_norm": 1.311829924583435, "learning_rate": 2.0934070870127895e-05, "loss": 0.011, "step": 18896 }, { "epoch": 8.785681078568107, "grad_norm": 1.157372236251831, "learning_rate": 2.101158494003778e-05, "loss": 0.0138, "step": 18898 }, { "epoch": 8.786610878661088, "grad_norm": 1.4416285753250122, "learning_rate": 2.1089138373994203e-05, "loss": 0.0171, "step": 18900 }, { "epoch": 8.787540678754068, "grad_norm": 0.8955532908439636, "learning_rate": 2.116673040657591e-05, "loss": 0.0194, "step": 18902 }, { "epoch": 8.788470478847048, "grad_norm": 0.8470731973648071, "learning_rate": 2.1244360271981043e-05, "loss": 0.0291, "step": 18904 }, { "epoch": 8.789400278940027, "grad_norm": 0.43173936009407043, "learning_rate": 2.1322027204033985e-05, "loss": 0.0067, "step": 18906 }, { "epoch": 8.790330079033009, "grad_norm": 0.26300615072250366, "learning_rate": 2.1399730436193653e-05, "loss": 0.0073, "step": 18908 }, { "epoch": 8.791259879125988, "grad_norm": 0.7885962724685669, "learning_rate": 2.1477469201560425e-05, "loss": 0.0149, "step": 18910 }, { "epoch": 8.792189679218968, "grad_norm": 1.0756864547729492, "learning_rate": 2.1555242732883987e-05, "loss": 0.016, "step": 18912 }, { "epoch": 8.793119479311947, "grad_norm": 0.26608043909072876, "learning_rate": 2.1633050262571165e-05, "loss": 0.0081, "step": 18914 }, { "epoch": 8.794049279404929, "grad_norm": 1.0143383741378784, "learning_rate": 2.1710891022692864e-05, "loss": 0.0147, "step": 18916 }, { "epoch": 8.794979079497908, "grad_norm": 0.42376509308815, "learning_rate": 2.1788764244992305e-05, "loss": 0.012, "step": 18918 }, { "epoch": 8.795908879590888, "grad_norm": 0.9024015069007874, "learning_rate": 2.1866669160892306e-05, "loss": 0.013, "step": 18920 }, { "epoch": 8.796838679683868, "grad_norm": 1.2491276264190674, "learning_rate": 2.194460500150271e-05, "loss": 0.0288, "step": 18922 }, { "epoch": 8.797768479776847, "grad_norm": 0.2500351071357727, "learning_rate": 2.202257099762824e-05, "loss": 0.0128, "step": 18924 }, { "epoch": 8.798698279869829, "grad_norm": 0.6826364994049072, "learning_rate": 2.21005663797759e-05, "loss": 0.0146, "step": 18926 }, { "epoch": 8.799628079962808, "grad_norm": 1.0970258712768555, "learning_rate": 2.2178590378162844e-05, "loss": 0.0122, "step": 18928 }, { "epoch": 8.800557880055788, "grad_norm": 0.5835118889808655, "learning_rate": 2.2256642222723794e-05, "loss": 0.0167, "step": 18930 }, { "epoch": 8.801487680148767, "grad_norm": 0.6861298084259033, "learning_rate": 2.2334721143118382e-05, "loss": 0.0141, "step": 18932 }, { "epoch": 8.802417480241749, "grad_norm": 1.3891133069992065, "learning_rate": 2.2412826368739352e-05, "loss": 0.0165, "step": 18934 }, { "epoch": 8.803347280334728, "grad_norm": 1.1409987211227417, "learning_rate": 2.249095712871958e-05, "loss": 0.0114, "step": 18936 }, { "epoch": 8.804277080427708, "grad_norm": 1.3321962356567383, "learning_rate": 2.256911265194001e-05, "loss": 0.0203, "step": 18938 }, { "epoch": 8.805206880520688, "grad_norm": 0.23192793130874634, "learning_rate": 2.2647292167037083e-05, "loss": 0.007, "step": 18940 }, { "epoch": 8.806136680613669, "grad_norm": 0.4143274426460266, "learning_rate": 2.272549490241064e-05, "loss": 0.0117, "step": 18942 }, { "epoch": 8.807066480706649, "grad_norm": 1.4478180408477783, "learning_rate": 2.2803720086231347e-05, "loss": 0.0284, "step": 18944 }, { "epoch": 8.807996280799628, "grad_norm": 0.5854945182800293, "learning_rate": 2.288196694644804e-05, "loss": 0.0133, "step": 18946 }, { "epoch": 8.808926080892608, "grad_norm": 1.6418784856796265, "learning_rate": 2.296023471079607e-05, "loss": 0.0387, "step": 18948 }, { "epoch": 8.809855880985587, "grad_norm": 1.7459475994110107, "learning_rate": 2.3038522606803836e-05, "loss": 0.0225, "step": 18950 }, { "epoch": 8.810785681078569, "grad_norm": 0.37317103147506714, "learning_rate": 2.3116829861801595e-05, "loss": 0.0086, "step": 18952 }, { "epoch": 8.811715481171548, "grad_norm": 2.194606304168701, "learning_rate": 2.3195155702928436e-05, "loss": 0.0291, "step": 18954 }, { "epoch": 8.812645281264528, "grad_norm": 0.31874287128448486, "learning_rate": 2.3273499357139757e-05, "loss": 0.0221, "step": 18956 }, { "epoch": 8.813575081357508, "grad_norm": 2.5914740562438965, "learning_rate": 2.3351860051215495e-05, "loss": 0.0384, "step": 18958 }, { "epoch": 8.814504881450489, "grad_norm": 0.5648574233055115, "learning_rate": 2.3430237011767056e-05, "loss": 0.0216, "step": 18960 }, { "epoch": 8.815434681543469, "grad_norm": 1.204838514328003, "learning_rate": 2.350862946524566e-05, "loss": 0.0147, "step": 18962 }, { "epoch": 8.816364481636448, "grad_norm": 1.094655990600586, "learning_rate": 2.3587036637949354e-05, "loss": 0.025, "step": 18964 }, { "epoch": 8.817294281729428, "grad_norm": 0.48452863097190857, "learning_rate": 2.366545775603091e-05, "loss": 0.0099, "step": 18966 }, { "epoch": 8.818224081822407, "grad_norm": 1.3655674457550049, "learning_rate": 2.3743892045505727e-05, "loss": 0.0193, "step": 18968 }, { "epoch": 8.819153881915389, "grad_norm": 0.7627215385437012, "learning_rate": 2.3822338732258846e-05, "loss": 0.0138, "step": 18970 }, { "epoch": 8.820083682008368, "grad_norm": 1.3363744020462036, "learning_rate": 2.3900797042053246e-05, "loss": 0.0209, "step": 18972 }, { "epoch": 8.821013482101348, "grad_norm": 0.3151357173919678, "learning_rate": 2.397926620053716e-05, "loss": 0.0063, "step": 18974 }, { "epoch": 8.821943282194328, "grad_norm": 0.25433775782585144, "learning_rate": 2.4057745433251584e-05, "loss": 0.0093, "step": 18976 }, { "epoch": 8.822873082287309, "grad_norm": 1.1012845039367676, "learning_rate": 2.413623396563818e-05, "loss": 0.0247, "step": 18978 }, { "epoch": 8.823802882380289, "grad_norm": 1.5812060832977295, "learning_rate": 2.4214731023046735e-05, "loss": 0.027, "step": 18980 }, { "epoch": 8.824732682473268, "grad_norm": 1.0703904628753662, "learning_rate": 2.4293235830743154e-05, "loss": 0.0145, "step": 18982 }, { "epoch": 8.825662482566248, "grad_norm": 0.7830193638801575, "learning_rate": 2.4371747613916498e-05, "loss": 0.0187, "step": 18984 }, { "epoch": 8.826592282659227, "grad_norm": 0.31506118178367615, "learning_rate": 2.445026559768726e-05, "loss": 0.0052, "step": 18986 }, { "epoch": 8.827522082752209, "grad_norm": 0.4774673581123352, "learning_rate": 2.4528789007114722e-05, "loss": 0.0129, "step": 18988 }, { "epoch": 8.828451882845188, "grad_norm": 1.7586910724639893, "learning_rate": 2.460731706720445e-05, "loss": 0.0317, "step": 18990 }, { "epoch": 8.829381682938168, "grad_norm": 0.5646217465400696, "learning_rate": 2.468584900291619e-05, "loss": 0.0198, "step": 18992 }, { "epoch": 8.830311483031148, "grad_norm": 0.7229178547859192, "learning_rate": 2.476438403917136e-05, "loss": 0.0211, "step": 18994 }, { "epoch": 8.831241283124129, "grad_norm": 0.7963686585426331, "learning_rate": 2.4842921400860934e-05, "loss": 0.0121, "step": 18996 }, { "epoch": 8.832171083217109, "grad_norm": 1.3656247854232788, "learning_rate": 2.4921460312852956e-05, "loss": 0.028, "step": 18998 }, { "epoch": 8.833100883310088, "grad_norm": 0.8107460141181946, "learning_rate": 2.4999999999999896e-05, "loss": 0.0265, "step": 19000 }, { "epoch": 8.833100883310088, "eval_cer": 0.1233926450046344, "eval_loss": 0.19770674407482147, "eval_runtime": 402.5109, "eval_samples_per_second": 31.537, "eval_steps_per_second": 0.986, "step": 19000 }, { "epoch": 8.834030683403068, "grad_norm": 0.8634414076805115, "learning_rate": 2.5078539687146924e-05, "loss": 0.0221, "step": 19002 }, { "epoch": 8.83496048349605, "grad_norm": 0.42401808500289917, "learning_rate": 2.5157078599138946e-05, "loss": 0.0116, "step": 19004 }, { "epoch": 8.835890283589029, "grad_norm": 1.4263211488723755, "learning_rate": 2.5235615960828524e-05, "loss": 0.0187, "step": 19006 }, { "epoch": 8.836820083682008, "grad_norm": 1.5671281814575195, "learning_rate": 2.531415099708378e-05, "loss": 0.0223, "step": 19008 }, { "epoch": 8.837749883774988, "grad_norm": 1.4600075483322144, "learning_rate": 2.5392682932795425e-05, "loss": 0.0263, "step": 19010 }, { "epoch": 8.83867968386797, "grad_norm": 1.2596783638000488, "learning_rate": 2.5471210992885158e-05, "loss": 0.0297, "step": 19012 }, { "epoch": 8.839609483960949, "grad_norm": 0.6303223371505737, "learning_rate": 2.5549734402312532e-05, "loss": 0.0153, "step": 19014 }, { "epoch": 8.840539284053929, "grad_norm": 0.9648757576942444, "learning_rate": 2.5628252386083294e-05, "loss": 0.0348, "step": 19016 }, { "epoch": 8.841469084146908, "grad_norm": 1.450901746749878, "learning_rate": 2.570676416925682e-05, "loss": 0.0334, "step": 19018 }, { "epoch": 8.842398884239888, "grad_norm": 0.4811369478702545, "learning_rate": 2.5785268976953156e-05, "loss": 0.0172, "step": 19020 }, { "epoch": 8.84332868433287, "grad_norm": 0.14667493104934692, "learning_rate": 2.5863766034361794e-05, "loss": 0.0277, "step": 19022 }, { "epoch": 8.844258484425849, "grad_norm": 0.6215162873268127, "learning_rate": 2.59422545667483e-05, "loss": 0.0085, "step": 19024 }, { "epoch": 8.845188284518828, "grad_norm": 0.3348989486694336, "learning_rate": 2.6020733799462733e-05, "loss": 0.0148, "step": 19026 }, { "epoch": 8.846118084611808, "grad_norm": 0.6670844554901123, "learning_rate": 2.609920295794656e-05, "loss": 0.0113, "step": 19028 }, { "epoch": 8.84704788470479, "grad_norm": 0.6829396486282349, "learning_rate": 2.6177661267740953e-05, "loss": 0.0138, "step": 19030 }, { "epoch": 8.847977684797769, "grad_norm": 0.9698818325996399, "learning_rate": 2.6256107954494252e-05, "loss": 0.0318, "step": 19032 }, { "epoch": 8.848907484890749, "grad_norm": 1.5780352354049683, "learning_rate": 2.633454224396898e-05, "loss": 0.0401, "step": 19034 }, { "epoch": 8.849837284983728, "grad_norm": 0.646979033946991, "learning_rate": 2.641296336205062e-05, "loss": 0.0194, "step": 19036 }, { "epoch": 8.850767085076708, "grad_norm": 0.7547963857650757, "learning_rate": 2.6491370534754227e-05, "loss": 0.0183, "step": 19038 }, { "epoch": 8.851696885169689, "grad_norm": 0.9673738479614258, "learning_rate": 2.6569762988232747e-05, "loss": 0.0188, "step": 19040 }, { "epoch": 8.852626685262669, "grad_norm": 0.5667545199394226, "learning_rate": 2.66481399487844e-05, "loss": 0.0123, "step": 19042 }, { "epoch": 8.853556485355648, "grad_norm": 0.8792577981948853, "learning_rate": 2.672650064286005e-05, "loss": 0.0483, "step": 19044 }, { "epoch": 8.854486285448628, "grad_norm": 1.6386288404464722, "learning_rate": 2.6804844297071546e-05, "loss": 0.0229, "step": 19046 }, { "epoch": 8.85541608554161, "grad_norm": 0.40793973207473755, "learning_rate": 2.6883170138198303e-05, "loss": 0.0098, "step": 19048 }, { "epoch": 8.856345885634589, "grad_norm": 0.7365944981575012, "learning_rate": 2.696147739319606e-05, "loss": 0.0252, "step": 19050 }, { "epoch": 8.857275685727569, "grad_norm": 1.0305737257003784, "learning_rate": 2.7039765289203915e-05, "loss": 0.0169, "step": 19052 }, { "epoch": 8.858205485820548, "grad_norm": 1.4145512580871582, "learning_rate": 2.7118033053551765e-05, "loss": 0.035, "step": 19054 }, { "epoch": 8.859135285913528, "grad_norm": 0.7824587821960449, "learning_rate": 2.7196279913768547e-05, "loss": 0.0158, "step": 19056 }, { "epoch": 8.860065086006509, "grad_norm": 0.4523605704307556, "learning_rate": 2.7274505097589256e-05, "loss": 0.0138, "step": 19058 }, { "epoch": 8.860994886099489, "grad_norm": 1.799522876739502, "learning_rate": 2.7352707832962818e-05, "loss": 0.0293, "step": 19060 }, { "epoch": 8.861924686192468, "grad_norm": 1.2795600891113281, "learning_rate": 2.7430887348059983e-05, "loss": 0.0253, "step": 19062 }, { "epoch": 8.862854486285448, "grad_norm": 1.8865971565246582, "learning_rate": 2.7509042871280324e-05, "loss": 0.0482, "step": 19064 }, { "epoch": 8.86378428637843, "grad_norm": 0.5302127599716187, "learning_rate": 2.7587173631260555e-05, "loss": 0.0195, "step": 19066 }, { "epoch": 8.864714086471409, "grad_norm": 0.3533684313297272, "learning_rate": 2.766527885688144e-05, "loss": 0.0086, "step": 19068 }, { "epoch": 8.865643886564389, "grad_norm": 1.5074983835220337, "learning_rate": 2.774335777727611e-05, "loss": 0.0273, "step": 19070 }, { "epoch": 8.866573686657368, "grad_norm": 0.7394856214523315, "learning_rate": 2.7821409621837057e-05, "loss": 0.0162, "step": 19072 }, { "epoch": 8.86750348675035, "grad_norm": 1.3666428327560425, "learning_rate": 2.7899433620224005e-05, "loss": 0.0247, "step": 19074 }, { "epoch": 8.868433286843329, "grad_norm": 1.2752918004989624, "learning_rate": 2.7977429002371753e-05, "loss": 0.0599, "step": 19076 }, { "epoch": 8.869363086936309, "grad_norm": 0.8050642013549805, "learning_rate": 2.8055394998497193e-05, "loss": 0.0182, "step": 19078 }, { "epoch": 8.870292887029288, "grad_norm": 1.4131008386611938, "learning_rate": 2.81333308391076e-05, "loss": 0.0344, "step": 19080 }, { "epoch": 8.871222687122268, "grad_norm": 1.7192656993865967, "learning_rate": 2.8211235755007514e-05, "loss": 0.0228, "step": 19082 }, { "epoch": 8.87215248721525, "grad_norm": 0.9129322171211243, "learning_rate": 2.828910897730696e-05, "loss": 0.017, "step": 19084 }, { "epoch": 8.873082287308229, "grad_norm": 0.9182037115097046, "learning_rate": 2.8366949737428837e-05, "loss": 0.0335, "step": 19086 }, { "epoch": 8.874012087401209, "grad_norm": 1.012812852859497, "learning_rate": 2.844475726711592e-05, "loss": 0.0238, "step": 19088 }, { "epoch": 8.874941887494188, "grad_norm": 0.5896819233894348, "learning_rate": 2.8522530798439577e-05, "loss": 0.0125, "step": 19090 }, { "epoch": 8.87587168758717, "grad_norm": 2.1167168617248535, "learning_rate": 2.8600269563806265e-05, "loss": 0.0336, "step": 19092 }, { "epoch": 8.876801487680149, "grad_norm": 0.840055525302887, "learning_rate": 2.8677972795965845e-05, "loss": 0.0149, "step": 19094 }, { "epoch": 8.877731287773129, "grad_norm": 0.5856977701187134, "learning_rate": 2.875563972801887e-05, "loss": 0.0419, "step": 19096 }, { "epoch": 8.878661087866108, "grad_norm": 1.3483539819717407, "learning_rate": 2.8833269593423916e-05, "loss": 0.0295, "step": 19098 }, { "epoch": 8.87959088795909, "grad_norm": 1.4251476526260376, "learning_rate": 2.8910861626005796e-05, "loss": 0.0151, "step": 19100 }, { "epoch": 8.88052068805207, "grad_norm": 1.030753493309021, "learning_rate": 2.8988415059962136e-05, "loss": 0.0482, "step": 19102 }, { "epoch": 8.881450488145049, "grad_norm": 0.6616517305374146, "learning_rate": 2.9065929129872023e-05, "loss": 0.0192, "step": 19104 }, { "epoch": 8.882380288238028, "grad_norm": 2.2448537349700928, "learning_rate": 2.9143403070702966e-05, "loss": 0.0356, "step": 19106 }, { "epoch": 8.883310088331008, "grad_norm": 0.7755854725837708, "learning_rate": 2.9220836117818273e-05, "loss": 0.0144, "step": 19108 }, { "epoch": 8.88423988842399, "grad_norm": 0.82660973072052, "learning_rate": 2.9298227506985198e-05, "loss": 0.0237, "step": 19110 }, { "epoch": 8.885169688516969, "grad_norm": 0.737791895866394, "learning_rate": 2.9375576474381813e-05, "loss": 0.0222, "step": 19112 }, { "epoch": 8.886099488609949, "grad_norm": 1.5987619161605835, "learning_rate": 2.9452882256605295e-05, "loss": 0.0818, "step": 19114 }, { "epoch": 8.887029288702928, "grad_norm": 0.9555467963218689, "learning_rate": 2.9530144090678432e-05, "loss": 0.0176, "step": 19116 }, { "epoch": 8.88795908879591, "grad_norm": 2.4543545246124268, "learning_rate": 2.9607361214058284e-05, "loss": 0.0469, "step": 19118 }, { "epoch": 8.88888888888889, "grad_norm": 1.7623370885849, "learning_rate": 2.9684532864643105e-05, "loss": 0.031, "step": 19120 }, { "epoch": 8.889818688981869, "grad_norm": 0.3392040729522705, "learning_rate": 2.976165828077969e-05, "loss": 0.0186, "step": 19122 }, { "epoch": 8.890748489074848, "grad_norm": 1.515297293663025, "learning_rate": 2.9838736701271484e-05, "loss": 0.0312, "step": 19124 }, { "epoch": 8.891678289167828, "grad_norm": 0.6231454610824585, "learning_rate": 2.991576736538543e-05, "loss": 0.0124, "step": 19126 }, { "epoch": 8.89260808926081, "grad_norm": 0.5089408755302429, "learning_rate": 2.9992749512860143e-05, "loss": 0.0217, "step": 19128 }, { "epoch": 8.893537889353789, "grad_norm": 1.0176223516464233, "learning_rate": 3.0069682383912823e-05, "loss": 0.024, "step": 19130 }, { "epoch": 8.894467689446769, "grad_norm": 0.9297718405723572, "learning_rate": 3.0146565219246992e-05, "loss": 0.0322, "step": 19132 }, { "epoch": 8.895397489539748, "grad_norm": 0.6683495044708252, "learning_rate": 3.0223397260060292e-05, "loss": 0.021, "step": 19134 }, { "epoch": 8.89632728963273, "grad_norm": 0.6777088642120361, "learning_rate": 3.0300177748051315e-05, "loss": 0.0152, "step": 19136 }, { "epoch": 8.89725708972571, "grad_norm": 1.218921422958374, "learning_rate": 3.0376905925427738e-05, "loss": 0.049, "step": 19138 }, { "epoch": 8.898186889818689, "grad_norm": 1.3562716245651245, "learning_rate": 3.0453581034913594e-05, "loss": 0.0259, "step": 19140 }, { "epoch": 8.899116689911668, "grad_norm": 1.0050638914108276, "learning_rate": 3.053020231975616e-05, "loss": 0.0506, "step": 19142 }, { "epoch": 8.900046490004648, "grad_norm": 0.7573022842407227, "learning_rate": 3.060676902373455e-05, "loss": 0.0471, "step": 19144 }, { "epoch": 8.90097629009763, "grad_norm": 1.0358248949050903, "learning_rate": 3.068328039116613e-05, "loss": 0.0423, "step": 19146 }, { "epoch": 8.901906090190609, "grad_norm": 0.7852777242660522, "learning_rate": 3.075973566691468e-05, "loss": 0.0411, "step": 19148 }, { "epoch": 8.902835890283589, "grad_norm": 2.028893232345581, "learning_rate": 3.083613409639759e-05, "loss": 0.0289, "step": 19150 }, { "epoch": 8.903765690376568, "grad_norm": 0.8476003408432007, "learning_rate": 3.091247492559302e-05, "loss": 0.0214, "step": 19152 }, { "epoch": 8.90469549046955, "grad_norm": 0.41573387384414673, "learning_rate": 3.0988757401048064e-05, "loss": 0.0123, "step": 19154 }, { "epoch": 8.90562529056253, "grad_norm": 0.9103931784629822, "learning_rate": 3.106498076988515e-05, "loss": 0.0256, "step": 19156 }, { "epoch": 8.906555090655509, "grad_norm": 0.7745035886764526, "learning_rate": 3.1141144279810666e-05, "loss": 0.0229, "step": 19158 }, { "epoch": 8.907484890748488, "grad_norm": 1.1666393280029297, "learning_rate": 3.121724717912133e-05, "loss": 0.0297, "step": 19160 }, { "epoch": 8.90841469084147, "grad_norm": 1.7190920114517212, "learning_rate": 3.1293288716712335e-05, "loss": 0.0469, "step": 19162 }, { "epoch": 8.90934449093445, "grad_norm": 1.8989577293395996, "learning_rate": 3.13692681420845e-05, "loss": 0.0371, "step": 19164 }, { "epoch": 8.910274291027429, "grad_norm": 1.3148741722106934, "learning_rate": 3.14451847053514e-05, "loss": 0.0334, "step": 19166 }, { "epoch": 8.911204091120409, "grad_norm": 1.5425939559936523, "learning_rate": 3.152103765724744e-05, "loss": 0.0324, "step": 19168 }, { "epoch": 8.91213389121339, "grad_norm": 0.24590161442756653, "learning_rate": 3.15968262491343e-05, "loss": 0.0165, "step": 19170 }, { "epoch": 8.91306369130637, "grad_norm": 1.339402675628662, "learning_rate": 3.167254973300931e-05, "loss": 0.0246, "step": 19172 }, { "epoch": 8.91399349139935, "grad_norm": 1.4961845874786377, "learning_rate": 3.174820736151237e-05, "loss": 0.0203, "step": 19174 }, { "epoch": 8.914923291492329, "grad_norm": 2.4135971069335938, "learning_rate": 3.1823798387933046e-05, "loss": 0.0386, "step": 19176 }, { "epoch": 8.915853091585308, "grad_norm": 1.1543720960617065, "learning_rate": 3.1899322066218605e-05, "loss": 0.0276, "step": 19178 }, { "epoch": 8.91678289167829, "grad_norm": 1.746376395225525, "learning_rate": 3.1974777650980654e-05, "loss": 0.0531, "step": 19180 }, { "epoch": 8.91771269177127, "grad_norm": 0.6006530523300171, "learning_rate": 3.205016439750319e-05, "loss": 0.0378, "step": 19182 }, { "epoch": 8.918642491864249, "grad_norm": 0.9325423836708069, "learning_rate": 3.2125481561749395e-05, "loss": 0.0298, "step": 19184 }, { "epoch": 8.919572291957229, "grad_norm": 1.4105584621429443, "learning_rate": 3.220072840036918e-05, "loss": 0.0194, "step": 19186 }, { "epoch": 8.92050209205021, "grad_norm": 0.6967313885688782, "learning_rate": 3.227590417070677e-05, "loss": 0.022, "step": 19188 }, { "epoch": 8.92143189214319, "grad_norm": 1.359034538269043, "learning_rate": 3.2351008130807526e-05, "loss": 0.0404, "step": 19190 }, { "epoch": 8.92236169223617, "grad_norm": 0.8065986633300781, "learning_rate": 3.2426039539425754e-05, "loss": 0.0254, "step": 19192 }, { "epoch": 8.923291492329149, "grad_norm": 1.5466684103012085, "learning_rate": 3.250099765603182e-05, "loss": 0.0381, "step": 19194 }, { "epoch": 8.924221292422128, "grad_norm": 1.6400090456008911, "learning_rate": 3.25758817408193e-05, "loss": 0.0385, "step": 19196 }, { "epoch": 8.92515109251511, "grad_norm": 0.7669448256492615, "learning_rate": 3.2650691054712505e-05, "loss": 0.0204, "step": 19198 }, { "epoch": 8.92608089260809, "grad_norm": 1.6985182762145996, "learning_rate": 3.2725424859373623e-05, "loss": 0.0672, "step": 19200 }, { "epoch": 8.927010692701069, "grad_norm": 2.040221929550171, "learning_rate": 3.280008241721034e-05, "loss": 0.059, "step": 19202 }, { "epoch": 8.927940492794049, "grad_norm": 1.76161789894104, "learning_rate": 3.2874662991382555e-05, "loss": 0.03, "step": 19204 }, { "epoch": 8.92887029288703, "grad_norm": 1.4744688272476196, "learning_rate": 3.294916584581016e-05, "loss": 0.0243, "step": 19206 }, { "epoch": 8.92980009298001, "grad_norm": 1.2398635149002075, "learning_rate": 3.3023590245180164e-05, "loss": 0.0258, "step": 19208 }, { "epoch": 8.93072989307299, "grad_norm": 0.9253876805305481, "learning_rate": 3.30979354549537e-05, "loss": 0.0232, "step": 19210 }, { "epoch": 8.931659693165969, "grad_norm": 1.3242868185043335, "learning_rate": 3.317220074137356e-05, "loss": 0.026, "step": 19212 }, { "epoch": 8.932589493258948, "grad_norm": 0.689095139503479, "learning_rate": 3.324638537147127e-05, "loss": 0.0347, "step": 19214 }, { "epoch": 8.93351929335193, "grad_norm": 0.5229110717773438, "learning_rate": 3.332048861307456e-05, "loss": 0.0251, "step": 19216 }, { "epoch": 8.93444909344491, "grad_norm": 0.5276861786842346, "learning_rate": 3.339450973481445e-05, "loss": 0.0214, "step": 19218 }, { "epoch": 8.935378893537889, "grad_norm": 0.4781796336174011, "learning_rate": 3.346844800613218e-05, "loss": 0.0284, "step": 19220 }, { "epoch": 8.936308693630869, "grad_norm": 1.5895287990570068, "learning_rate": 3.354230269728701e-05, "loss": 0.0507, "step": 19222 }, { "epoch": 8.93723849372385, "grad_norm": 1.5067156553268433, "learning_rate": 3.3616073079362885e-05, "loss": 0.0526, "step": 19224 }, { "epoch": 8.93816829381683, "grad_norm": 2.4887197017669678, "learning_rate": 3.368975842427583e-05, "loss": 0.0445, "step": 19226 }, { "epoch": 8.93909809390981, "grad_norm": 1.0042238235473633, "learning_rate": 3.376335800478142e-05, "loss": 0.0364, "step": 19228 }, { "epoch": 8.940027894002789, "grad_norm": 2.312317371368408, "learning_rate": 3.383687109448132e-05, "loss": 0.0229, "step": 19230 }, { "epoch": 8.94095769409577, "grad_norm": 0.4087238311767578, "learning_rate": 3.39102969678312e-05, "loss": 0.015, "step": 19232 }, { "epoch": 8.94188749418875, "grad_norm": 1.0070791244506836, "learning_rate": 3.398363490014715e-05, "loss": 0.0284, "step": 19234 }, { "epoch": 8.94281729428173, "grad_norm": 1.2415618896484375, "learning_rate": 3.405688416761348e-05, "loss": 0.0333, "step": 19236 }, { "epoch": 8.943747094374709, "grad_norm": 0.8539531826972961, "learning_rate": 3.413004404728966e-05, "loss": 0.0163, "step": 19238 }, { "epoch": 8.944676894467689, "grad_norm": 0.4585426449775696, "learning_rate": 3.4203113817116875e-05, "loss": 0.0275, "step": 19240 }, { "epoch": 8.94560669456067, "grad_norm": 1.6122636795043945, "learning_rate": 3.4276092755926246e-05, "loss": 0.0311, "step": 19242 }, { "epoch": 8.94653649465365, "grad_norm": 1.2675695419311523, "learning_rate": 3.434898014344493e-05, "loss": 0.0224, "step": 19244 }, { "epoch": 8.94746629474663, "grad_norm": 1.651882529258728, "learning_rate": 3.442177526030402e-05, "loss": 0.0363, "step": 19246 }, { "epoch": 8.948396094839609, "grad_norm": 0.3514522314071655, "learning_rate": 3.449447738804493e-05, "loss": 0.0209, "step": 19248 }, { "epoch": 8.94932589493259, "grad_norm": 1.2589606046676636, "learning_rate": 3.456708580912719e-05, "loss": 0.0564, "step": 19250 }, { "epoch": 8.95025569502557, "grad_norm": 1.2321878671646118, "learning_rate": 3.463959980693489e-05, "loss": 0.0265, "step": 19252 }, { "epoch": 8.95118549511855, "grad_norm": 2.1792259216308594, "learning_rate": 3.471201866578409e-05, "loss": 0.0395, "step": 19254 }, { "epoch": 8.952115295211529, "grad_norm": 1.8555238246917725, "learning_rate": 3.4784341670930024e-05, "loss": 0.0498, "step": 19256 }, { "epoch": 8.95304509530451, "grad_norm": 1.4664264917373657, "learning_rate": 3.4856568108573686e-05, "loss": 0.0261, "step": 19258 }, { "epoch": 8.95397489539749, "grad_norm": 1.9235327243804932, "learning_rate": 3.492869726586938e-05, "loss": 0.0418, "step": 19260 }, { "epoch": 8.95490469549047, "grad_norm": 0.8126788139343262, "learning_rate": 3.500072843093152e-05, "loss": 0.0182, "step": 19262 }, { "epoch": 8.95583449558345, "grad_norm": 1.2217668294906616, "learning_rate": 3.5072660892841496e-05, "loss": 0.0482, "step": 19264 }, { "epoch": 8.956764295676429, "grad_norm": 1.9387232065200806, "learning_rate": 3.5144493941654974e-05, "loss": 0.0547, "step": 19266 }, { "epoch": 8.95769409576941, "grad_norm": 0.7158524990081787, "learning_rate": 3.521622686840865e-05, "loss": 0.0334, "step": 19268 }, { "epoch": 8.95862389586239, "grad_norm": 1.0076615810394287, "learning_rate": 3.5287858965127605e-05, "loss": 0.0292, "step": 19270 }, { "epoch": 8.95955369595537, "grad_norm": 1.1536496877670288, "learning_rate": 3.5359389524832025e-05, "loss": 0.0259, "step": 19272 }, { "epoch": 8.960483496048349, "grad_norm": 0.4529200792312622, "learning_rate": 3.5430817841544004e-05, "loss": 0.0247, "step": 19274 }, { "epoch": 8.96141329614133, "grad_norm": 1.0493732690811157, "learning_rate": 3.550214321029506e-05, "loss": 0.0361, "step": 19276 }, { "epoch": 8.96234309623431, "grad_norm": 0.594561755657196, "learning_rate": 3.557336492713252e-05, "loss": 0.0194, "step": 19278 }, { "epoch": 8.96327289632729, "grad_norm": 0.4989914894104004, "learning_rate": 3.5644482289126705e-05, "loss": 0.0255, "step": 19280 }, { "epoch": 8.96420269642027, "grad_norm": 1.3460129499435425, "learning_rate": 3.5715494594378134e-05, "loss": 0.0514, "step": 19282 }, { "epoch": 8.965132496513249, "grad_norm": 0.5685331225395203, "learning_rate": 3.578640114202386e-05, "loss": 0.0523, "step": 19284 }, { "epoch": 8.96606229660623, "grad_norm": 1.325848937034607, "learning_rate": 3.585720123224503e-05, "loss": 0.0675, "step": 19286 }, { "epoch": 8.96699209669921, "grad_norm": 1.9965033531188965, "learning_rate": 3.592789416627319e-05, "loss": 0.0378, "step": 19288 }, { "epoch": 8.96792189679219, "grad_norm": 1.375480055809021, "learning_rate": 3.5998479246397785e-05, "loss": 0.0231, "step": 19290 }, { "epoch": 8.968851696885169, "grad_norm": 2.1392526626586914, "learning_rate": 3.606895577597248e-05, "loss": 0.0387, "step": 19292 }, { "epoch": 8.96978149697815, "grad_norm": 1.2966108322143555, "learning_rate": 3.6139323059422307e-05, "loss": 0.037, "step": 19294 }, { "epoch": 8.97071129707113, "grad_norm": 1.4078243970870972, "learning_rate": 3.620958040225075e-05, "loss": 0.0359, "step": 19296 }, { "epoch": 8.97164109716411, "grad_norm": 0.8199180960655212, "learning_rate": 3.6279727111046024e-05, "loss": 0.0271, "step": 19298 }, { "epoch": 8.97257089725709, "grad_norm": 1.2366857528686523, "learning_rate": 3.63497624934886e-05, "loss": 0.0602, "step": 19300 }, { "epoch": 8.973500697350069, "grad_norm": 1.2685308456420898, "learning_rate": 3.641968585835737e-05, "loss": 0.0626, "step": 19302 }, { "epoch": 8.97443049744305, "grad_norm": 1.699058175086975, "learning_rate": 3.6489496515537035e-05, "loss": 0.0375, "step": 19304 }, { "epoch": 8.97536029753603, "grad_norm": 0.8688063621520996, "learning_rate": 3.655919377602474e-05, "loss": 0.012, "step": 19306 }, { "epoch": 8.97629009762901, "grad_norm": 0.8755934834480286, "learning_rate": 3.662877695193637e-05, "loss": 0.0313, "step": 19308 }, { "epoch": 8.977219897721989, "grad_norm": 1.2967616319656372, "learning_rate": 3.669824535651428e-05, "loss": 0.0232, "step": 19310 }, { "epoch": 8.97814969781497, "grad_norm": 2.002453327178955, "learning_rate": 3.6767598304133215e-05, "loss": 0.0416, "step": 19312 }, { "epoch": 8.97907949790795, "grad_norm": 2.8150179386138916, "learning_rate": 3.683683511030766e-05, "loss": 0.0614, "step": 19314 }, { "epoch": 8.98000929800093, "grad_norm": 1.6781150102615356, "learning_rate": 3.690595509169837e-05, "loss": 0.0441, "step": 19316 }, { "epoch": 8.98093909809391, "grad_norm": 1.0236976146697998, "learning_rate": 3.6974957566118875e-05, "loss": 0.0292, "step": 19318 }, { "epoch": 8.98186889818689, "grad_norm": 1.5971964597702026, "learning_rate": 3.7043841852542854e-05, "loss": 0.0329, "step": 19320 }, { "epoch": 8.98279869827987, "grad_norm": 1.9271663427352905, "learning_rate": 3.7112607271109876e-05, "loss": 0.0242, "step": 19322 }, { "epoch": 8.98372849837285, "grad_norm": 1.5560886859893799, "learning_rate": 3.71812531431332e-05, "loss": 0.0356, "step": 19324 }, { "epoch": 8.98465829846583, "grad_norm": 0.527670681476593, "learning_rate": 3.7249778791105845e-05, "loss": 0.035, "step": 19326 }, { "epoch": 8.98558809855881, "grad_norm": 1.0440651178359985, "learning_rate": 3.7318183538707166e-05, "loss": 0.0548, "step": 19328 }, { "epoch": 8.98651789865179, "grad_norm": 0.739889919757843, "learning_rate": 3.73864667108101e-05, "loss": 0.0194, "step": 19330 }, { "epoch": 8.98744769874477, "grad_norm": 1.2905550003051758, "learning_rate": 3.745462763348714e-05, "loss": 0.0324, "step": 19332 }, { "epoch": 8.98837749883775, "grad_norm": 0.916727602481842, "learning_rate": 3.752266563401773e-05, "loss": 0.0304, "step": 19334 }, { "epoch": 8.98930729893073, "grad_norm": 1.6454992294311523, "learning_rate": 3.759058004089396e-05, "loss": 0.0433, "step": 19336 }, { "epoch": 8.99023709902371, "grad_norm": 1.2856595516204834, "learning_rate": 3.765837018382821e-05, "loss": 0.0219, "step": 19338 }, { "epoch": 8.99116689911669, "grad_norm": 1.9598420858383179, "learning_rate": 3.7726035393759224e-05, "loss": 0.0552, "step": 19340 }, { "epoch": 8.99209669920967, "grad_norm": 2.479684591293335, "learning_rate": 3.779357500285852e-05, "loss": 0.0611, "step": 19342 }, { "epoch": 8.99302649930265, "grad_norm": 0.6496137380599976, "learning_rate": 3.7860988344537586e-05, "loss": 0.0381, "step": 19344 }, { "epoch": 8.99395629939563, "grad_norm": 0.6659356951713562, "learning_rate": 3.792827475345389e-05, "loss": 0.0475, "step": 19346 }, { "epoch": 8.99488609948861, "grad_norm": 1.3873718976974487, "learning_rate": 3.799543356551766e-05, "loss": 0.042, "step": 19348 }, { "epoch": 8.99581589958159, "grad_norm": 1.1520490646362305, "learning_rate": 3.806246411789867e-05, "loss": 0.0312, "step": 19350 }, { "epoch": 8.99674569967457, "grad_norm": 0.7496092319488525, "learning_rate": 3.81293657490323e-05, "loss": 0.0274, "step": 19352 }, { "epoch": 8.99767549976755, "grad_norm": 0.6732524633407593, "learning_rate": 3.8196137798626606e-05, "loss": 0.0341, "step": 19354 }, { "epoch": 8.99860529986053, "grad_norm": 1.2108392715454102, "learning_rate": 3.8262779607668265e-05, "loss": 0.0269, "step": 19356 }, { "epoch": 8.99953509995351, "grad_norm": 0.6853455901145935, "learning_rate": 3.8329290518429583e-05, "loss": 0.0166, "step": 19358 }, { "epoch": 9.00046490004649, "grad_norm": 2.5961642265319824, "learning_rate": 3.839566987447489e-05, "loss": 0.0418, "step": 19360 }, { "epoch": 9.00139470013947, "grad_norm": 0.8756305575370789, "learning_rate": 3.846191702066643e-05, "loss": 0.0362, "step": 19362 }, { "epoch": 9.00232450023245, "grad_norm": 0.8798677921295166, "learning_rate": 3.8528031303171855e-05, "loss": 0.0293, "step": 19364 }, { "epoch": 9.00325430032543, "grad_norm": 0.723216712474823, "learning_rate": 3.8594012069469735e-05, "loss": 0.0202, "step": 19366 }, { "epoch": 9.00418410041841, "grad_norm": 2.1004817485809326, "learning_rate": 3.865985866835661e-05, "loss": 0.0564, "step": 19368 }, { "epoch": 9.00511390051139, "grad_norm": 1.905982255935669, "learning_rate": 3.872557044995321e-05, "loss": 0.0277, "step": 19370 }, { "epoch": 9.00604370060437, "grad_norm": 1.0723546743392944, "learning_rate": 3.879114676571063e-05, "loss": 0.0255, "step": 19372 }, { "epoch": 9.00697350069735, "grad_norm": 2.763387441635132, "learning_rate": 3.8856586968417334e-05, "loss": 0.0486, "step": 19374 }, { "epoch": 9.00790330079033, "grad_norm": 1.497247338294983, "learning_rate": 3.892189041220464e-05, "loss": 0.0412, "step": 19376 }, { "epoch": 9.00883310088331, "grad_norm": 1.14888334274292, "learning_rate": 3.8987056452554136e-05, "loss": 0.0185, "step": 19378 }, { "epoch": 9.00976290097629, "grad_norm": 2.0961790084838867, "learning_rate": 3.905208444630319e-05, "loss": 0.0276, "step": 19380 }, { "epoch": 9.01069270106927, "grad_norm": 0.7435700297355652, "learning_rate": 3.911697375165181e-05, "loss": 0.044, "step": 19382 }, { "epoch": 9.01162250116225, "grad_norm": 1.2774840593338013, "learning_rate": 3.918172372816883e-05, "loss": 0.0287, "step": 19384 }, { "epoch": 9.01255230125523, "grad_norm": 0.5849133729934692, "learning_rate": 3.924633373679796e-05, "loss": 0.026, "step": 19386 }, { "epoch": 9.01348210134821, "grad_norm": 2.104022741317749, "learning_rate": 3.931080313986474e-05, "loss": 0.041, "step": 19388 }, { "epoch": 9.014411901441191, "grad_norm": 2.107818603515625, "learning_rate": 3.9375131301081886e-05, "loss": 0.0367, "step": 19390 }, { "epoch": 9.01534170153417, "grad_norm": 0.6889903545379639, "learning_rate": 3.943931758555658e-05, "loss": 0.0164, "step": 19392 }, { "epoch": 9.01627150162715, "grad_norm": 0.8808338642120361, "learning_rate": 3.950336135979615e-05, "loss": 0.0298, "step": 19394 }, { "epoch": 9.01720130172013, "grad_norm": 0.5464959144592285, "learning_rate": 3.956726199171429e-05, "loss": 0.0224, "step": 19396 }, { "epoch": 9.01813110181311, "grad_norm": 1.5963166952133179, "learning_rate": 3.9631018850637665e-05, "loss": 0.0465, "step": 19398 }, { "epoch": 9.01906090190609, "grad_norm": 2.099656343460083, "learning_rate": 3.969463130731171e-05, "loss": 0.0376, "step": 19400 }, { "epoch": 9.01999070199907, "grad_norm": 2.5800461769104004, "learning_rate": 3.9758098733907275e-05, "loss": 0.0546, "step": 19402 }, { "epoch": 9.02092050209205, "grad_norm": 1.451999306678772, "learning_rate": 3.982142050402643e-05, "loss": 0.0319, "step": 19404 }, { "epoch": 9.02185030218503, "grad_norm": 2.4851605892181396, "learning_rate": 3.988459599270879e-05, "loss": 0.0578, "step": 19406 }, { "epoch": 9.022780102278011, "grad_norm": 1.554766297340393, "learning_rate": 3.994762457643793e-05, "loss": 0.0263, "step": 19408 }, { "epoch": 9.02370990237099, "grad_norm": 1.8886349201202393, "learning_rate": 4.0010505633147024e-05, "loss": 0.0457, "step": 19410 }, { "epoch": 9.02463970246397, "grad_norm": 1.1282062530517578, "learning_rate": 4.00732385422255e-05, "loss": 0.0353, "step": 19412 }, { "epoch": 9.02556950255695, "grad_norm": 1.7104754447937012, "learning_rate": 4.0135822684524956e-05, "loss": 0.0286, "step": 19414 }, { "epoch": 9.02649930264993, "grad_norm": 0.9018343687057495, "learning_rate": 4.0198257442365084e-05, "loss": 0.0305, "step": 19416 }, { "epoch": 9.02742910274291, "grad_norm": 1.145666480064392, "learning_rate": 4.0260542199540044e-05, "loss": 0.0399, "step": 19418 }, { "epoch": 9.02835890283589, "grad_norm": 1.1070016622543335, "learning_rate": 4.032267634132435e-05, "loss": 0.0377, "step": 19420 }, { "epoch": 9.02928870292887, "grad_norm": 0.44160881638526917, "learning_rate": 4.038465925447926e-05, "loss": 0.0269, "step": 19422 }, { "epoch": 9.03021850302185, "grad_norm": 1.679652214050293, "learning_rate": 4.0446490327258287e-05, "loss": 0.0707, "step": 19424 }, { "epoch": 9.031148303114831, "grad_norm": 1.4617141485214233, "learning_rate": 4.0508168949413784e-05, "loss": 0.0451, "step": 19426 }, { "epoch": 9.03207810320781, "grad_norm": 1.2388648986816406, "learning_rate": 4.05696945122028e-05, "loss": 0.0503, "step": 19428 }, { "epoch": 9.03300790330079, "grad_norm": 0.6856862306594849, "learning_rate": 4.063106640839257e-05, "loss": 0.0218, "step": 19430 }, { "epoch": 9.03393770339377, "grad_norm": 1.9741708040237427, "learning_rate": 4.069228403226749e-05, "loss": 0.0216, "step": 19432 }, { "epoch": 9.034867503486751, "grad_norm": 3.07196044921875, "learning_rate": 4.075334677963417e-05, "loss": 0.0911, "step": 19434 }, { "epoch": 9.03579730357973, "grad_norm": 2.8349478244781494, "learning_rate": 4.081425404782802e-05, "loss": 0.0798, "step": 19436 }, { "epoch": 9.03672710367271, "grad_norm": 1.4181585311889648, "learning_rate": 4.0875005235718954e-05, "loss": 0.0499, "step": 19438 }, { "epoch": 9.03765690376569, "grad_norm": 1.607405662536621, "learning_rate": 4.0935599743717146e-05, "loss": 0.0369, "step": 19440 }, { "epoch": 9.03858670385867, "grad_norm": 3.044781446456909, "learning_rate": 4.0996036973779465e-05, "loss": 0.0687, "step": 19442 }, { "epoch": 9.039516503951651, "grad_norm": 1.3758821487426758, "learning_rate": 4.105631632941458e-05, "loss": 0.0291, "step": 19444 }, { "epoch": 9.04044630404463, "grad_norm": 1.913702130317688, "learning_rate": 4.111643721568972e-05, "loss": 0.0544, "step": 19446 }, { "epoch": 9.04137610413761, "grad_norm": 0.7199245691299438, "learning_rate": 4.117639903923608e-05, "loss": 0.0316, "step": 19448 }, { "epoch": 9.04230590423059, "grad_norm": 0.9942136406898499, "learning_rate": 4.123620120825452e-05, "loss": 0.0333, "step": 19450 }, { "epoch": 9.043235704323571, "grad_norm": 1.8989827632904053, "learning_rate": 4.129584313252193e-05, "loss": 0.0475, "step": 19452 }, { "epoch": 9.04416550441655, "grad_norm": 1.1064109802246094, "learning_rate": 4.135532422339646e-05, "loss": 0.04, "step": 19454 }, { "epoch": 9.04509530450953, "grad_norm": 1.141329050064087, "learning_rate": 4.141464389382388e-05, "loss": 0.0241, "step": 19456 }, { "epoch": 9.04602510460251, "grad_norm": 1.5964341163635254, "learning_rate": 4.147380155834292e-05, "loss": 0.0558, "step": 19458 }, { "epoch": 9.046954904695491, "grad_norm": 1.7425909042358398, "learning_rate": 4.1532796633091255e-05, "loss": 0.0437, "step": 19460 }, { "epoch": 9.047884704788471, "grad_norm": 1.8676042556762695, "learning_rate": 4.159162853581145e-05, "loss": 0.0463, "step": 19462 }, { "epoch": 9.04881450488145, "grad_norm": 1.3065277338027954, "learning_rate": 4.165029668585624e-05, "loss": 0.0395, "step": 19464 }, { "epoch": 9.04974430497443, "grad_norm": 1.4954978227615356, "learning_rate": 4.1708800504194806e-05, "loss": 0.0572, "step": 19466 }, { "epoch": 9.05067410506741, "grad_norm": 1.2950178384780884, "learning_rate": 4.1767139413417946e-05, "loss": 0.0272, "step": 19468 }, { "epoch": 9.051603905160391, "grad_norm": 2.85676908493042, "learning_rate": 4.182531283774431e-05, "loss": 0.0607, "step": 19470 }, { "epoch": 9.05253370525337, "grad_norm": 2.487128257751465, "learning_rate": 4.188332020302561e-05, "loss": 0.085, "step": 19472 }, { "epoch": 9.05346350534635, "grad_norm": 1.2555171251296997, "learning_rate": 4.194116093675253e-05, "loss": 0.0395, "step": 19474 }, { "epoch": 9.05439330543933, "grad_norm": 0.779502809047699, "learning_rate": 4.1998834468060474e-05, "loss": 0.0253, "step": 19476 }, { "epoch": 9.055323105532311, "grad_norm": 2.2739837169647217, "learning_rate": 4.2056340227734866e-05, "loss": 0.0504, "step": 19478 }, { "epoch": 9.056252905625291, "grad_norm": 0.8632343411445618, "learning_rate": 4.211367764821714e-05, "loss": 0.0251, "step": 19480 }, { "epoch": 9.05718270571827, "grad_norm": 1.272339105606079, "learning_rate": 4.2170846163610145e-05, "loss": 0.0539, "step": 19482 }, { "epoch": 9.05811250581125, "grad_norm": 1.4435780048370361, "learning_rate": 4.222784520968368e-05, "loss": 0.0268, "step": 19484 }, { "epoch": 9.05904230590423, "grad_norm": 1.22370183467865, "learning_rate": 4.228467422388016e-05, "loss": 0.0539, "step": 19486 }, { "epoch": 9.059972105997211, "grad_norm": 2.030949831008911, "learning_rate": 4.234133264532009e-05, "loss": 0.0355, "step": 19488 }, { "epoch": 9.06090190609019, "grad_norm": 1.4431766271591187, "learning_rate": 4.2397819914807795e-05, "loss": 0.0199, "step": 19490 }, { "epoch": 9.06183170618317, "grad_norm": 1.134390115737915, "learning_rate": 4.2454135474836777e-05, "loss": 0.0331, "step": 19492 }, { "epoch": 9.06276150627615, "grad_norm": 1.021855354309082, "learning_rate": 4.251027876959507e-05, "loss": 0.0547, "step": 19494 }, { "epoch": 9.063691306369131, "grad_norm": 1.0057854652404785, "learning_rate": 4.256624924497118e-05, "loss": 0.0329, "step": 19496 }, { "epoch": 9.064621106462111, "grad_norm": 2.286583423614502, "learning_rate": 4.262204634855902e-05, "loss": 0.0513, "step": 19498 }, { "epoch": 9.06555090655509, "grad_norm": 1.6657540798187256, "learning_rate": 4.26776695296637e-05, "loss": 0.0464, "step": 19500 }, { "epoch": 9.06648070664807, "grad_norm": 2.2747371196746826, "learning_rate": 4.2733118239306826e-05, "loss": 0.0358, "step": 19502 }, { "epoch": 9.067410506741052, "grad_norm": 1.3256624937057495, "learning_rate": 4.278839193023208e-05, "loss": 0.0134, "step": 19504 }, { "epoch": 9.068340306834031, "grad_norm": 2.7391364574432373, "learning_rate": 4.28434900569105e-05, "loss": 0.0433, "step": 19506 }, { "epoch": 9.06927010692701, "grad_norm": 1.609521746635437, "learning_rate": 4.289841207554572e-05, "loss": 0.0549, "step": 19508 }, { "epoch": 9.07019990701999, "grad_norm": 1.2272440195083618, "learning_rate": 4.295315744407968e-05, "loss": 0.047, "step": 19510 }, { "epoch": 9.07112970711297, "grad_norm": 1.2627254724502563, "learning_rate": 4.300772562219766e-05, "loss": 0.0299, "step": 19512 }, { "epoch": 9.072059507205951, "grad_norm": 1.311449408531189, "learning_rate": 4.30621160713337e-05, "loss": 0.0384, "step": 19514 }, { "epoch": 9.072989307298931, "grad_norm": 0.6989126801490784, "learning_rate": 4.3116328254676145e-05, "loss": 0.0293, "step": 19516 }, { "epoch": 9.07391910739191, "grad_norm": 1.0999436378479004, "learning_rate": 4.317036163717253e-05, "loss": 0.0194, "step": 19518 }, { "epoch": 9.07484890748489, "grad_norm": 1.3060680627822876, "learning_rate": 4.322421568553528e-05, "loss": 0.0476, "step": 19520 }, { "epoch": 9.075778707577872, "grad_norm": 1.2519437074661255, "learning_rate": 4.327788986824656e-05, "loss": 0.0345, "step": 19522 }, { "epoch": 9.076708507670851, "grad_norm": 2.0444347858428955, "learning_rate": 4.333138365556393e-05, "loss": 0.0488, "step": 19524 }, { "epoch": 9.07763830776383, "grad_norm": 1.7037544250488281, "learning_rate": 4.3384696519525416e-05, "loss": 0.0339, "step": 19526 }, { "epoch": 9.07856810785681, "grad_norm": 0.8426714539527893, "learning_rate": 4.3437827933954337e-05, "loss": 0.0238, "step": 19528 }, { "epoch": 9.07949790794979, "grad_norm": 1.0590170621871948, "learning_rate": 4.3490777374465254e-05, "loss": 0.0467, "step": 19530 }, { "epoch": 9.080427708042771, "grad_norm": 1.977126121520996, "learning_rate": 4.354354431846845e-05, "loss": 0.0469, "step": 19532 }, { "epoch": 9.081357508135751, "grad_norm": 2.561209201812744, "learning_rate": 4.359612824517557e-05, "loss": 0.07, "step": 19534 }, { "epoch": 9.08228730822873, "grad_norm": 2.04925799369812, "learning_rate": 4.364852863560452e-05, "loss": 0.0546, "step": 19536 }, { "epoch": 9.08321710832171, "grad_norm": 2.226876974105835, "learning_rate": 4.3700744972584555e-05, "loss": 0.0309, "step": 19538 }, { "epoch": 9.084146908414692, "grad_norm": 1.3228257894515991, "learning_rate": 4.3752776740761503e-05, "loss": 0.0538, "step": 19540 }, { "epoch": 9.085076708507671, "grad_norm": 0.9192014336585999, "learning_rate": 4.380462342660277e-05, "loss": 0.027, "step": 19542 }, { "epoch": 9.08600650860065, "grad_norm": 1.3382647037506104, "learning_rate": 4.385628451840261e-05, "loss": 0.0325, "step": 19544 }, { "epoch": 9.08693630869363, "grad_norm": 3.304307460784912, "learning_rate": 4.390775950628678e-05, "loss": 0.053, "step": 19546 }, { "epoch": 9.087866108786612, "grad_norm": 1.1730669736862183, "learning_rate": 4.3959047882218e-05, "loss": 0.0208, "step": 19548 }, { "epoch": 9.088795908879591, "grad_norm": 0.9123339653015137, "learning_rate": 4.401014914000074e-05, "loss": 0.0258, "step": 19550 }, { "epoch": 9.089725708972571, "grad_norm": 1.8125238418579102, "learning_rate": 4.4061062775286194e-05, "loss": 0.0612, "step": 19552 }, { "epoch": 9.09065550906555, "grad_norm": 0.7015509605407715, "learning_rate": 4.411178828557731e-05, "loss": 0.0192, "step": 19554 }, { "epoch": 9.09158530915853, "grad_norm": 0.9075837135314941, "learning_rate": 4.416232517023373e-05, "loss": 0.0192, "step": 19556 }, { "epoch": 9.092515109251512, "grad_norm": 0.9431002140045166, "learning_rate": 4.4212672930476875e-05, "loss": 0.0122, "step": 19558 }, { "epoch": 9.093444909344491, "grad_norm": 1.8374319076538086, "learning_rate": 4.42628310693947e-05, "loss": 0.0525, "step": 19560 }, { "epoch": 9.09437470943747, "grad_norm": 2.070871591567993, "learning_rate": 4.4312799091946556e-05, "loss": 0.031, "step": 19562 }, { "epoch": 9.09530450953045, "grad_norm": 1.228134274482727, "learning_rate": 4.4362576504968316e-05, "loss": 0.0273, "step": 19564 }, { "epoch": 9.096234309623432, "grad_norm": 2.517421007156372, "learning_rate": 4.4412162817176965e-05, "loss": 0.0529, "step": 19566 }, { "epoch": 9.097164109716411, "grad_norm": 1.62723708152771, "learning_rate": 4.446155753917555e-05, "loss": 0.0399, "step": 19568 }, { "epoch": 9.098093909809391, "grad_norm": 2.849031925201416, "learning_rate": 4.451076018345822e-05, "loss": 0.0625, "step": 19570 }, { "epoch": 9.09902370990237, "grad_norm": 2.627747058868408, "learning_rate": 4.455977026441465e-05, "loss": 0.0554, "step": 19572 }, { "epoch": 9.09995350999535, "grad_norm": 1.2404197454452515, "learning_rate": 4.4608587298335226e-05, "loss": 0.0373, "step": 19574 }, { "epoch": 9.100883310088332, "grad_norm": 1.6774786710739136, "learning_rate": 4.465721080341541e-05, "loss": 0.0443, "step": 19576 }, { "epoch": 9.101813110181311, "grad_norm": 1.6615867614746094, "learning_rate": 4.47056402997609e-05, "loss": 0.0494, "step": 19578 }, { "epoch": 9.10274291027429, "grad_norm": 1.1362700462341309, "learning_rate": 4.475387530939224e-05, "loss": 0.0301, "step": 19580 }, { "epoch": 9.10367271036727, "grad_norm": 1.7035225629806519, "learning_rate": 4.480191535624912e-05, "loss": 0.0544, "step": 19582 }, { "epoch": 9.104602510460252, "grad_norm": 2.8072242736816406, "learning_rate": 4.484975996619585e-05, "loss": 0.0585, "step": 19584 }, { "epoch": 9.105532310553231, "grad_norm": 2.6471195220947266, "learning_rate": 4.489740866702533e-05, "loss": 0.0841, "step": 19586 }, { "epoch": 9.106462110646211, "grad_norm": 2.391364336013794, "learning_rate": 4.4944860988464235e-05, "loss": 0.0379, "step": 19588 }, { "epoch": 9.10739191073919, "grad_norm": 1.1853379011154175, "learning_rate": 4.499211646217719e-05, "loss": 0.0253, "step": 19590 }, { "epoch": 9.108321710832172, "grad_norm": 1.13008451461792, "learning_rate": 4.503917462177181e-05, "loss": 0.0353, "step": 19592 }, { "epoch": 9.109251510925152, "grad_norm": 1.388935923576355, "learning_rate": 4.508603500280315e-05, "loss": 0.0355, "step": 19594 }, { "epoch": 9.110181311018131, "grad_norm": 1.1448655128479004, "learning_rate": 4.5132697142777975e-05, "loss": 0.0281, "step": 19596 }, { "epoch": 9.11111111111111, "grad_norm": 2.9932713508605957, "learning_rate": 4.5179160581159964e-05, "loss": 0.0488, "step": 19598 }, { "epoch": 9.11204091120409, "grad_norm": 1.9778082370758057, "learning_rate": 4.522542485937361e-05, "loss": 0.0403, "step": 19600 }, { "epoch": 9.112970711297072, "grad_norm": 1.523220181465149, "learning_rate": 4.5271489520809245e-05, "loss": 0.0731, "step": 19602 }, { "epoch": 9.113900511390051, "grad_norm": 2.126333236694336, "learning_rate": 4.531735411082727e-05, "loss": 0.0528, "step": 19604 }, { "epoch": 9.114830311483031, "grad_norm": 0.9178290963172913, "learning_rate": 4.5363018176762625e-05, "loss": 0.0233, "step": 19606 }, { "epoch": 9.11576011157601, "grad_norm": 2.382244825363159, "learning_rate": 4.5408481267929564e-05, "loss": 0.0657, "step": 19608 }, { "epoch": 9.116689911668992, "grad_norm": 1.932220697402954, "learning_rate": 4.5453742935625515e-05, "loss": 0.05, "step": 19610 }, { "epoch": 9.117619711761972, "grad_norm": 2.782447576522827, "learning_rate": 4.549880273313622e-05, "loss": 0.0518, "step": 19612 }, { "epoch": 9.118549511854951, "grad_norm": 2.3336875438690186, "learning_rate": 4.554366021573969e-05, "loss": 0.048, "step": 19614 }, { "epoch": 9.11947931194793, "grad_norm": 1.817337989807129, "learning_rate": 4.55883149407106e-05, "loss": 0.0546, "step": 19616 }, { "epoch": 9.12040911204091, "grad_norm": 0.9406951069831848, "learning_rate": 4.5632766467324924e-05, "loss": 0.0306, "step": 19618 }, { "epoch": 9.121338912133892, "grad_norm": 1.8501763343811035, "learning_rate": 4.567701435686396e-05, "loss": 0.0842, "step": 19620 }, { "epoch": 9.122268712226871, "grad_norm": 1.675536870956421, "learning_rate": 4.572105817261899e-05, "loss": 0.0676, "step": 19622 }, { "epoch": 9.123198512319851, "grad_norm": 2.2468438148498535, "learning_rate": 4.576489747989528e-05, "loss": 0.0804, "step": 19624 }, { "epoch": 9.12412831241283, "grad_norm": 1.9688880443572998, "learning_rate": 4.580853184601652e-05, "loss": 0.0285, "step": 19626 }, { "epoch": 9.125058112505812, "grad_norm": 0.660153329372406, "learning_rate": 4.585196084032923e-05, "loss": 0.0303, "step": 19628 }, { "epoch": 9.125987912598791, "grad_norm": 1.6498452425003052, "learning_rate": 4.589518403420668e-05, "loss": 0.0408, "step": 19630 }, { "epoch": 9.126917712691771, "grad_norm": 1.9195735454559326, "learning_rate": 4.593820100105349e-05, "loss": 0.0381, "step": 19632 }, { "epoch": 9.12784751278475, "grad_norm": 2.5001325607299805, "learning_rate": 4.59810113163095e-05, "loss": 0.051, "step": 19634 }, { "epoch": 9.128777312877732, "grad_norm": 1.9939905405044556, "learning_rate": 4.602361455745416e-05, "loss": 0.0538, "step": 19636 }, { "epoch": 9.129707112970712, "grad_norm": 2.3368496894836426, "learning_rate": 4.606601030401076e-05, "loss": 0.051, "step": 19638 }, { "epoch": 9.130636913063691, "grad_norm": 3.2418112754821777, "learning_rate": 4.6108198137550305e-05, "loss": 0.0787, "step": 19640 }, { "epoch": 9.131566713156671, "grad_norm": 1.9768880605697632, "learning_rate": 4.6150177641696e-05, "loss": 0.0595, "step": 19642 }, { "epoch": 9.13249651324965, "grad_norm": 1.6805342435836792, "learning_rate": 4.6191948402127007e-05, "loss": 0.0392, "step": 19644 }, { "epoch": 9.133426313342632, "grad_norm": 1.5151842832565308, "learning_rate": 4.6233510006582806e-05, "loss": 0.0685, "step": 19646 }, { "epoch": 9.134356113435611, "grad_norm": 2.8470394611358643, "learning_rate": 4.627486204486725e-05, "loss": 0.07, "step": 19648 }, { "epoch": 9.135285913528591, "grad_norm": 1.3940562009811401, "learning_rate": 4.631600410885223e-05, "loss": 0.054, "step": 19650 }, { "epoch": 9.13621571362157, "grad_norm": 2.6387217044830322, "learning_rate": 4.6356935792482324e-05, "loss": 0.0498, "step": 19652 }, { "epoch": 9.137145513714552, "grad_norm": 1.9328675270080566, "learning_rate": 4.639765669177824e-05, "loss": 0.0468, "step": 19654 }, { "epoch": 9.138075313807532, "grad_norm": 1.6234488487243652, "learning_rate": 4.6438166404841205e-05, "loss": 0.0439, "step": 19656 }, { "epoch": 9.139005113900511, "grad_norm": 1.5209842920303345, "learning_rate": 4.647846453185674e-05, "loss": 0.0527, "step": 19658 }, { "epoch": 9.139934913993491, "grad_norm": 1.7451918125152588, "learning_rate": 4.65185506750985e-05, "loss": 0.0439, "step": 19660 }, { "epoch": 9.140864714086472, "grad_norm": 1.1865668296813965, "learning_rate": 4.6558424438932585e-05, "loss": 0.0299, "step": 19662 }, { "epoch": 9.141794514179452, "grad_norm": 0.9543920755386353, "learning_rate": 4.659808542982084e-05, "loss": 0.0424, "step": 19664 }, { "epoch": 9.142724314272431, "grad_norm": 1.7682058811187744, "learning_rate": 4.663753325632542e-05, "loss": 0.0414, "step": 19666 }, { "epoch": 9.143654114365411, "grad_norm": 0.8452409505844116, "learning_rate": 4.667676752911222e-05, "loss": 0.0394, "step": 19668 }, { "epoch": 9.14458391445839, "grad_norm": 2.5303070545196533, "learning_rate": 4.671578786095473e-05, "loss": 0.0728, "step": 19670 }, { "epoch": 9.145513714551372, "grad_norm": 1.1533738374710083, "learning_rate": 4.67545938667381e-05, "loss": 0.0472, "step": 19672 }, { "epoch": 9.146443514644352, "grad_norm": 1.8915891647338867, "learning_rate": 4.679318516346265e-05, "loss": 0.0359, "step": 19674 }, { "epoch": 9.147373314737331, "grad_norm": 1.593941569328308, "learning_rate": 4.6831561370247995e-05, "loss": 0.0761, "step": 19676 }, { "epoch": 9.148303114830311, "grad_norm": 1.8702785968780518, "learning_rate": 4.686972210833628e-05, "loss": 0.0512, "step": 19678 }, { "epoch": 9.149232914923292, "grad_norm": 1.5093005895614624, "learning_rate": 4.690766700109652e-05, "loss": 0.0689, "step": 19680 }, { "epoch": 9.150162715016272, "grad_norm": 2.909717321395874, "learning_rate": 4.6945395674027985e-05, "loss": 0.0752, "step": 19682 }, { "epoch": 9.151092515109251, "grad_norm": 1.8239295482635498, "learning_rate": 4.698290775476383e-05, "loss": 0.0693, "step": 19684 }, { "epoch": 9.152022315202231, "grad_norm": 0.705441415309906, "learning_rate": 4.702020287307502e-05, "loss": 0.0287, "step": 19686 }, { "epoch": 9.15295211529521, "grad_norm": 1.0853033065795898, "learning_rate": 4.7057280660873746e-05, "loss": 0.0214, "step": 19688 }, { "epoch": 9.153881915388192, "grad_norm": 1.0042366981506348, "learning_rate": 4.709414075221726e-05, "loss": 0.0359, "step": 19690 }, { "epoch": 9.154811715481172, "grad_norm": 1.3722503185272217, "learning_rate": 4.7130782783311326e-05, "loss": 0.0455, "step": 19692 }, { "epoch": 9.155741515574151, "grad_norm": 2.5999600887298584, "learning_rate": 4.7167206392513836e-05, "loss": 0.097, "step": 19694 }, { "epoch": 9.15667131566713, "grad_norm": 1.7142376899719238, "learning_rate": 4.720341122033856e-05, "loss": 0.0489, "step": 19696 }, { "epoch": 9.157601115760112, "grad_norm": 0.9697198271751404, "learning_rate": 4.7239396909458364e-05, "loss": 0.0539, "step": 19698 }, { "epoch": 9.158530915853092, "grad_norm": 0.6597159504890442, "learning_rate": 4.727516310470908e-05, "loss": 0.0261, "step": 19700 }, { "epoch": 9.159460715946071, "grad_norm": 1.0600043535232544, "learning_rate": 4.731070945309285e-05, "loss": 0.0382, "step": 19702 }, { "epoch": 9.160390516039051, "grad_norm": 0.8447337746620178, "learning_rate": 4.73460356037815e-05, "loss": 0.0324, "step": 19704 }, { "epoch": 9.161320316132032, "grad_norm": 1.3700295686721802, "learning_rate": 4.7381141208120215e-05, "loss": 0.0303, "step": 19706 }, { "epoch": 9.162250116225012, "grad_norm": 2.0874199867248535, "learning_rate": 4.74160259196308e-05, "loss": 0.0432, "step": 19708 }, { "epoch": 9.163179916317992, "grad_norm": 0.8399603962898254, "learning_rate": 4.7450689394015285e-05, "loss": 0.0266, "step": 19710 }, { "epoch": 9.164109716410971, "grad_norm": 1.8388196229934692, "learning_rate": 4.7485131289159195e-05, "loss": 0.0879, "step": 19712 }, { "epoch": 9.16503951650395, "grad_norm": 2.1509270668029785, "learning_rate": 4.751935126513485e-05, "loss": 0.0618, "step": 19714 }, { "epoch": 9.165969316596932, "grad_norm": 1.1535817384719849, "learning_rate": 4.755334898420503e-05, "loss": 0.04, "step": 19716 }, { "epoch": 9.166899116689912, "grad_norm": 1.9787687063217163, "learning_rate": 4.75871241108258e-05, "loss": 0.0414, "step": 19718 }, { "epoch": 9.167828916782891, "grad_norm": 1.2560322284698486, "learning_rate": 4.7620676311650436e-05, "loss": 0.0437, "step": 19720 }, { "epoch": 9.168758716875871, "grad_norm": 2.154110908508301, "learning_rate": 4.765400525553217e-05, "loss": 0.0497, "step": 19722 }, { "epoch": 9.169688516968852, "grad_norm": 1.754974126815796, "learning_rate": 4.768711061352782e-05, "loss": 0.0484, "step": 19724 }, { "epoch": 9.170618317061832, "grad_norm": 2.969097852706909, "learning_rate": 4.771999205890091e-05, "loss": 0.0806, "step": 19726 }, { "epoch": 9.171548117154812, "grad_norm": 1.9594734907150269, "learning_rate": 4.775264926712477e-05, "loss": 0.0438, "step": 19728 }, { "epoch": 9.172477917247791, "grad_norm": 2.8486106395721436, "learning_rate": 4.778508191588606e-05, "loss": 0.0893, "step": 19730 }, { "epoch": 9.17340771734077, "grad_norm": 1.2783622741699219, "learning_rate": 4.7817289685087475e-05, "loss": 0.0348, "step": 19732 }, { "epoch": 9.174337517433752, "grad_norm": 1.4500586986541748, "learning_rate": 4.784927225685141e-05, "loss": 0.0486, "step": 19734 }, { "epoch": 9.175267317526732, "grad_norm": 1.537291169166565, "learning_rate": 4.788102931552283e-05, "loss": 0.0341, "step": 19736 }, { "epoch": 9.176197117619711, "grad_norm": 3.6393775939941406, "learning_rate": 4.791256054767232e-05, "loss": 0.0773, "step": 19738 }, { "epoch": 9.177126917712691, "grad_norm": 1.3785635232925415, "learning_rate": 4.794386564209942e-05, "loss": 0.0542, "step": 19740 }, { "epoch": 9.178056717805672, "grad_norm": 2.2840757369995117, "learning_rate": 4.797494428983541e-05, "loss": 0.0658, "step": 19742 }, { "epoch": 9.178986517898652, "grad_norm": 1.1859431266784668, "learning_rate": 4.800579618414665e-05, "loss": 0.0359, "step": 19744 }, { "epoch": 9.179916317991632, "grad_norm": 1.4118704795837402, "learning_rate": 4.803642102053737e-05, "loss": 0.0411, "step": 19746 }, { "epoch": 9.180846118084611, "grad_norm": 1.2386877536773682, "learning_rate": 4.806681849675275e-05, "loss": 0.0354, "step": 19748 }, { "epoch": 9.181775918177593, "grad_norm": 3.553701877593994, "learning_rate": 4.809698831278207e-05, "loss": 0.0406, "step": 19750 }, { "epoch": 9.182705718270572, "grad_norm": 2.645867109298706, "learning_rate": 4.812693017086133e-05, "loss": 0.0719, "step": 19752 }, { "epoch": 9.183635518363552, "grad_norm": 1.4249629974365234, "learning_rate": 4.8156643775476515e-05, "loss": 0.0342, "step": 19754 }, { "epoch": 9.184565318456531, "grad_norm": 1.7353490591049194, "learning_rate": 4.818612883336641e-05, "loss": 0.0626, "step": 19756 }, { "epoch": 9.185495118549511, "grad_norm": 1.9755594730377197, "learning_rate": 4.8215385053525325e-05, "loss": 0.0741, "step": 19758 }, { "epoch": 9.186424918642492, "grad_norm": 1.5193560123443604, "learning_rate": 4.824441214720619e-05, "loss": 0.0465, "step": 19760 }, { "epoch": 9.187354718735472, "grad_norm": 1.5915206670761108, "learning_rate": 4.8273209827923276e-05, "loss": 0.0482, "step": 19762 }, { "epoch": 9.188284518828452, "grad_norm": 3.561314821243286, "learning_rate": 4.830177781145518e-05, "loss": 0.0746, "step": 19764 }, { "epoch": 9.189214318921431, "grad_norm": 1.2462084293365479, "learning_rate": 4.833011581584736e-05, "loss": 0.0526, "step": 19766 }, { "epoch": 9.190144119014413, "grad_norm": 1.582177996635437, "learning_rate": 4.835822356141517e-05, "loss": 0.0292, "step": 19768 }, { "epoch": 9.191073919107392, "grad_norm": 1.8540295362472534, "learning_rate": 4.838610077074657e-05, "loss": 0.0761, "step": 19770 }, { "epoch": 9.192003719200372, "grad_norm": 1.398758053779602, "learning_rate": 4.841374716870471e-05, "loss": 0.0427, "step": 19772 }, { "epoch": 9.192933519293351, "grad_norm": 1.7965232133865356, "learning_rate": 4.844116248243081e-05, "loss": 0.0369, "step": 19774 }, { "epoch": 9.193863319386331, "grad_norm": 1.1725995540618896, "learning_rate": 4.846834644134676e-05, "loss": 0.0214, "step": 19776 }, { "epoch": 9.194793119479312, "grad_norm": 2.1826367378234863, "learning_rate": 4.849529877715789e-05, "loss": 0.0404, "step": 19778 }, { "epoch": 9.195722919572292, "grad_norm": 1.6665202379226685, "learning_rate": 4.8522019223855554e-05, "loss": 0.0388, "step": 19780 }, { "epoch": 9.196652719665272, "grad_norm": 0.9309923052787781, "learning_rate": 4.8548507517719673e-05, "loss": 0.0378, "step": 19782 }, { "epoch": 9.197582519758251, "grad_norm": 2.8538460731506348, "learning_rate": 4.857476339732153e-05, "loss": 0.0622, "step": 19784 }, { "epoch": 9.198512319851233, "grad_norm": 2.949796438217163, "learning_rate": 4.860078660352618e-05, "loss": 0.0933, "step": 19786 }, { "epoch": 9.199442119944212, "grad_norm": 1.7643284797668457, "learning_rate": 4.8626576879495045e-05, "loss": 0.0422, "step": 19788 }, { "epoch": 9.200371920037192, "grad_norm": 1.8676904439926147, "learning_rate": 4.865213397068857e-05, "loss": 0.0494, "step": 19790 }, { "epoch": 9.201301720130171, "grad_norm": 2.543984889984131, "learning_rate": 4.8677457624868536e-05, "loss": 0.1064, "step": 19792 }, { "epoch": 9.202231520223153, "grad_norm": 1.6832352876663208, "learning_rate": 4.870254759210074e-05, "loss": 0.0508, "step": 19794 }, { "epoch": 9.203161320316132, "grad_norm": 2.9841296672821045, "learning_rate": 4.87274036247573e-05, "loss": 0.0962, "step": 19796 }, { "epoch": 9.204091120409112, "grad_norm": 1.5997810363769531, "learning_rate": 4.875202547751921e-05, "loss": 0.0746, "step": 19798 }, { "epoch": 9.205020920502092, "grad_norm": 2.5594897270202637, "learning_rate": 4.8776412907378795e-05, "loss": 0.067, "step": 19800 }, { "epoch": 9.205950720595071, "grad_norm": 1.395261526107788, "learning_rate": 4.880056567364185e-05, "loss": 0.0629, "step": 19802 }, { "epoch": 9.206880520688053, "grad_norm": 1.376625657081604, "learning_rate": 4.8824483537930425e-05, "loss": 0.0532, "step": 19804 }, { "epoch": 9.207810320781032, "grad_norm": 0.9502781629562378, "learning_rate": 4.8848166264184777e-05, "loss": 0.0365, "step": 19806 }, { "epoch": 9.208740120874012, "grad_norm": 1.7562713623046875, "learning_rate": 4.8871613618666024e-05, "loss": 0.0471, "step": 19808 }, { "epoch": 9.209669920966991, "grad_norm": 2.024951457977295, "learning_rate": 4.8894825369958194e-05, "loss": 0.0361, "step": 19810 }, { "epoch": 9.210599721059973, "grad_norm": 1.792415976524353, "learning_rate": 4.891780128897068e-05, "loss": 0.0265, "step": 19812 }, { "epoch": 9.211529521152952, "grad_norm": 1.4206329584121704, "learning_rate": 4.894054114894052e-05, "loss": 0.0386, "step": 19814 }, { "epoch": 9.212459321245932, "grad_norm": 2.14007568359375, "learning_rate": 4.896304472543435e-05, "loss": 0.0751, "step": 19816 }, { "epoch": 9.213389121338912, "grad_norm": 0.8955184817314148, "learning_rate": 4.898531179635104e-05, "loss": 0.0438, "step": 19818 }, { "epoch": 9.214318921431893, "grad_norm": 1.2466697692871094, "learning_rate": 4.900734214192355e-05, "loss": 0.035, "step": 19820 }, { "epoch": 9.215248721524873, "grad_norm": 1.9491078853607178, "learning_rate": 4.902913554472126e-05, "loss": 0.0343, "step": 19822 }, { "epoch": 9.216178521617852, "grad_norm": 1.8776551485061646, "learning_rate": 4.905069178965212e-05, "loss": 0.0365, "step": 19824 }, { "epoch": 9.217108321710832, "grad_norm": 1.3573641777038574, "learning_rate": 4.907201066396468e-05, "loss": 0.0361, "step": 19826 }, { "epoch": 9.218038121803811, "grad_norm": 2.196261405944824, "learning_rate": 4.909309195725025e-05, "loss": 0.0603, "step": 19828 }, { "epoch": 9.218967921896793, "grad_norm": 2.4390947818756104, "learning_rate": 4.9113935461444955e-05, "loss": 0.0671, "step": 19830 }, { "epoch": 9.219897721989772, "grad_norm": 2.3026604652404785, "learning_rate": 4.913454097083184e-05, "loss": 0.0836, "step": 19832 }, { "epoch": 9.220827522082752, "grad_norm": 1.7397691011428833, "learning_rate": 4.915490828204287e-05, "loss": 0.0366, "step": 19834 }, { "epoch": 9.221757322175732, "grad_norm": 2.3150298595428467, "learning_rate": 4.917503719406086e-05, "loss": 0.0698, "step": 19836 }, { "epoch": 9.222687122268713, "grad_norm": 0.9357953667640686, "learning_rate": 4.919492750822163e-05, "loss": 0.0262, "step": 19838 }, { "epoch": 9.223616922361693, "grad_norm": 1.939444899559021, "learning_rate": 4.921457902821577e-05, "loss": 0.0517, "step": 19840 }, { "epoch": 9.224546722454672, "grad_norm": 1.8789390325546265, "learning_rate": 4.923399156009071e-05, "loss": 0.0666, "step": 19842 }, { "epoch": 9.225476522547652, "grad_norm": 2.2513723373413086, "learning_rate": 4.925316491225264e-05, "loss": 0.0479, "step": 19844 }, { "epoch": 9.226406322640631, "grad_norm": 1.333730936050415, "learning_rate": 4.927209889546826e-05, "loss": 0.0337, "step": 19846 }, { "epoch": 9.227336122733613, "grad_norm": 1.570617437362671, "learning_rate": 4.929079332286685e-05, "loss": 0.0575, "step": 19848 }, { "epoch": 9.228265922826592, "grad_norm": 1.1134907007217407, "learning_rate": 4.930924800994191e-05, "loss": 0.0257, "step": 19850 }, { "epoch": 9.229195722919572, "grad_norm": 1.4384477138519287, "learning_rate": 4.9327462774553166e-05, "loss": 0.0374, "step": 19852 }, { "epoch": 9.230125523012552, "grad_norm": 1.4665141105651855, "learning_rate": 4.934543743692824e-05, "loss": 0.0735, "step": 19854 }, { "epoch": 9.231055323105533, "grad_norm": 1.7892396450042725, "learning_rate": 4.936317181966444e-05, "loss": 0.0311, "step": 19856 }, { "epoch": 9.231985123198513, "grad_norm": 2.681940793991089, "learning_rate": 4.93806657477306e-05, "loss": 0.0842, "step": 19858 }, { "epoch": 9.232914923291492, "grad_norm": 1.3780462741851807, "learning_rate": 4.939791904846869e-05, "loss": 0.0355, "step": 19860 }, { "epoch": 9.233844723384472, "grad_norm": 1.3550679683685303, "learning_rate": 4.941493155159563e-05, "loss": 0.0285, "step": 19862 }, { "epoch": 9.234774523477453, "grad_norm": 1.8303117752075195, "learning_rate": 4.9431703089204846e-05, "loss": 0.0466, "step": 19864 }, { "epoch": 9.235704323570433, "grad_norm": 1.6867564916610718, "learning_rate": 4.944823349576804e-05, "loss": 0.0664, "step": 19866 }, { "epoch": 9.236634123663412, "grad_norm": 3.064713478088379, "learning_rate": 4.9464522608136826e-05, "loss": 0.0587, "step": 19868 }, { "epoch": 9.237563923756392, "grad_norm": 2.2822952270507812, "learning_rate": 4.9480570265544165e-05, "loss": 0.0599, "step": 19870 }, { "epoch": 9.238493723849372, "grad_norm": 1.9190198183059692, "learning_rate": 4.94963763096062e-05, "loss": 0.0911, "step": 19872 }, { "epoch": 9.239423523942353, "grad_norm": 2.6585330963134766, "learning_rate": 4.951194058432363e-05, "loss": 0.0821, "step": 19874 }, { "epoch": 9.240353324035333, "grad_norm": 0.9335871338844299, "learning_rate": 4.952726293608336e-05, "loss": 0.0563, "step": 19876 }, { "epoch": 9.241283124128312, "grad_norm": 0.9882644414901733, "learning_rate": 4.954234321365999e-05, "loss": 0.0459, "step": 19878 }, { "epoch": 9.242212924221292, "grad_norm": 1.5344738960266113, "learning_rate": 4.9557181268217227e-05, "loss": 0.0503, "step": 19880 }, { "epoch": 9.243142724314273, "grad_norm": 1.8838088512420654, "learning_rate": 4.957177695330953e-05, "loss": 0.0668, "step": 19882 }, { "epoch": 9.244072524407253, "grad_norm": 1.8513469696044922, "learning_rate": 4.9586130124883277e-05, "loss": 0.0506, "step": 19884 }, { "epoch": 9.245002324500232, "grad_norm": 1.6484225988388062, "learning_rate": 4.960024064127852e-05, "loss": 0.0688, "step": 19886 }, { "epoch": 9.245932124593212, "grad_norm": 1.5881825685501099, "learning_rate": 4.961410836323017e-05, "loss": 0.0471, "step": 19888 }, { "epoch": 9.246861924686192, "grad_norm": 1.6844688653945923, "learning_rate": 4.9627733153869365e-05, "loss": 0.055, "step": 19890 }, { "epoch": 9.247791724779173, "grad_norm": 2.082606792449951, "learning_rate": 4.964111487872497e-05, "loss": 0.0689, "step": 19892 }, { "epoch": 9.248721524872153, "grad_norm": 2.081589460372925, "learning_rate": 4.965425340572473e-05, "loss": 0.0669, "step": 19894 }, { "epoch": 9.249651324965132, "grad_norm": 2.3725552558898926, "learning_rate": 4.966714860519674e-05, "loss": 0.0525, "step": 19896 }, { "epoch": 9.250581125058112, "grad_norm": 1.9215558767318726, "learning_rate": 4.967980034987051e-05, "loss": 0.0672, "step": 19898 }, { "epoch": 9.251510925151093, "grad_norm": 2.244532823562622, "learning_rate": 4.969220851487846e-05, "loss": 0.0939, "step": 19900 }, { "epoch": 9.252440725244073, "grad_norm": 1.6286041736602783, "learning_rate": 4.9704372977757034e-05, "loss": 0.0551, "step": 19902 }, { "epoch": 9.253370525337052, "grad_norm": 1.7604037523269653, "learning_rate": 4.971629361844787e-05, "loss": 0.0321, "step": 19904 }, { "epoch": 9.254300325430032, "grad_norm": 1.7550634145736694, "learning_rate": 4.972797031929908e-05, "loss": 0.0504, "step": 19906 }, { "epoch": 9.255230125523013, "grad_norm": 1.8953065872192383, "learning_rate": 4.97394029650663e-05, "loss": 0.0373, "step": 19908 }, { "epoch": 9.256159925615993, "grad_norm": 2.112421751022339, "learning_rate": 4.9750591442913975e-05, "loss": 0.0673, "step": 19910 }, { "epoch": 9.257089725708973, "grad_norm": 1.4170722961425781, "learning_rate": 4.976153564241633e-05, "loss": 0.0647, "step": 19912 }, { "epoch": 9.258019525801952, "grad_norm": 1.7844672203063965, "learning_rate": 4.977223545555851e-05, "loss": 0.0506, "step": 19914 }, { "epoch": 9.258949325894932, "grad_norm": 0.942663848400116, "learning_rate": 4.978269077673771e-05, "loss": 0.0388, "step": 19916 }, { "epoch": 9.259879125987913, "grad_norm": 2.2559967041015625, "learning_rate": 4.97929015027641e-05, "loss": 0.0556, "step": 19918 }, { "epoch": 9.260808926080893, "grad_norm": 1.3412472009658813, "learning_rate": 4.9802867532861976e-05, "loss": 0.0345, "step": 19920 }, { "epoch": 9.261738726173872, "grad_norm": 1.3926421403884888, "learning_rate": 4.9812588768670635e-05, "loss": 0.0656, "step": 19922 }, { "epoch": 9.262668526266852, "grad_norm": 1.9391041994094849, "learning_rate": 4.9822065114245374e-05, "loss": 0.0852, "step": 19924 }, { "epoch": 9.263598326359833, "grad_norm": 2.217533588409424, "learning_rate": 4.983129647605852e-05, "loss": 0.0609, "step": 19926 }, { "epoch": 9.264528126452813, "grad_norm": 0.7186737656593323, "learning_rate": 4.984028276300025e-05, "loss": 0.0366, "step": 19928 }, { "epoch": 9.265457926545793, "grad_norm": 1.3493731021881104, "learning_rate": 4.984902388637952e-05, "loss": 0.0703, "step": 19930 }, { "epoch": 9.266387726638772, "grad_norm": 2.29463791847229, "learning_rate": 4.985751975992501e-05, "loss": 0.0773, "step": 19932 }, { "epoch": 9.267317526731752, "grad_norm": 1.6670912504196167, "learning_rate": 4.986577029978584e-05, "loss": 0.0667, "step": 19934 }, { "epoch": 9.268247326824733, "grad_norm": 1.7837897539138794, "learning_rate": 4.987377542453254e-05, "loss": 0.0519, "step": 19936 }, { "epoch": 9.269177126917713, "grad_norm": 1.9611048698425293, "learning_rate": 4.9881535055157744e-05, "loss": 0.0438, "step": 19938 }, { "epoch": 9.270106927010692, "grad_norm": 1.4714500904083252, "learning_rate": 4.9889049115077026e-05, "loss": 0.0669, "step": 19940 }, { "epoch": 9.271036727103672, "grad_norm": 1.8937338590621948, "learning_rate": 4.989631753012967e-05, "loss": 0.0716, "step": 19942 }, { "epoch": 9.271966527196653, "grad_norm": 1.8910026550292969, "learning_rate": 4.990334022857933e-05, "loss": 0.0665, "step": 19944 }, { "epoch": 9.272896327289633, "grad_norm": 2.5595552921295166, "learning_rate": 4.991011714111483e-05, "loss": 0.0536, "step": 19946 }, { "epoch": 9.273826127382613, "grad_norm": 1.2515686750411987, "learning_rate": 4.991664820085077e-05, "loss": 0.0494, "step": 19948 }, { "epoch": 9.274755927475592, "grad_norm": 2.2633635997772217, "learning_rate": 4.992293334332823e-05, "loss": 0.0673, "step": 19950 }, { "epoch": 9.275685727568574, "grad_norm": 1.1550018787384033, "learning_rate": 4.992897250651537e-05, "loss": 0.05, "step": 19952 }, { "epoch": 9.276615527661553, "grad_norm": 0.94097900390625, "learning_rate": 4.99347656308081e-05, "loss": 0.0319, "step": 19954 }, { "epoch": 9.277545327754533, "grad_norm": 1.208404302597046, "learning_rate": 4.994031265903065e-05, "loss": 0.0345, "step": 19956 }, { "epoch": 9.278475127847512, "grad_norm": 1.9095582962036133, "learning_rate": 4.9945613536436056e-05, "loss": 0.0434, "step": 19958 }, { "epoch": 9.279404927940492, "grad_norm": 2.0148911476135254, "learning_rate": 4.995066821070682e-05, "loss": 0.0386, "step": 19960 }, { "epoch": 9.280334728033473, "grad_norm": 1.4309948682785034, "learning_rate": 4.995547663195532e-05, "loss": 0.0404, "step": 19962 }, { "epoch": 9.281264528126453, "grad_norm": 1.1570665836334229, "learning_rate": 4.9960038752724414e-05, "loss": 0.0301, "step": 19964 }, { "epoch": 9.282194328219433, "grad_norm": 1.8706499338150024, "learning_rate": 4.996435452798778e-05, "loss": 0.069, "step": 19966 }, { "epoch": 9.283124128312412, "grad_norm": 1.5004384517669678, "learning_rate": 4.996842391515048e-05, "loss": 0.0567, "step": 19968 }, { "epoch": 9.284053928405394, "grad_norm": 1.9964087009429932, "learning_rate": 4.997224687404929e-05, "loss": 0.0666, "step": 19970 }, { "epoch": 9.284983728498373, "grad_norm": 2.4293148517608643, "learning_rate": 4.997582336695316e-05, "loss": 0.0494, "step": 19972 }, { "epoch": 9.285913528591353, "grad_norm": 2.08506178855896, "learning_rate": 4.997915335856355e-05, "loss": 0.0753, "step": 19974 }, { "epoch": 9.286843328684332, "grad_norm": 2.5916025638580322, "learning_rate": 4.998223681601478e-05, "loss": 0.051, "step": 19976 }, { "epoch": 9.287773128777314, "grad_norm": 3.885359525680542, "learning_rate": 4.9985073708874375e-05, "loss": 0.0859, "step": 19978 }, { "epoch": 9.288702928870293, "grad_norm": 1.1718958616256714, "learning_rate": 4.9987664009143345e-05, "loss": 0.0249, "step": 19980 }, { "epoch": 9.289632728963273, "grad_norm": 1.229225516319275, "learning_rate": 4.999000769125646e-05, "loss": 0.0283, "step": 19982 }, { "epoch": 9.290562529056253, "grad_norm": 2.1208229064941406, "learning_rate": 4.999210473208255e-05, "loss": 0.0473, "step": 19984 }, { "epoch": 9.291492329149232, "grad_norm": 1.8745532035827637, "learning_rate": 4.9993955110924656e-05, "loss": 0.0338, "step": 19986 }, { "epoch": 9.292422129242214, "grad_norm": 1.4895633459091187, "learning_rate": 4.999555880952028e-05, "loss": 0.0442, "step": 19988 }, { "epoch": 9.293351929335193, "grad_norm": 1.7939835786819458, "learning_rate": 4.999691581204158e-05, "loss": 0.0244, "step": 19990 }, { "epoch": 9.294281729428173, "grad_norm": 1.6374796628952026, "learning_rate": 4.9998026105095466e-05, "loss": 0.0408, "step": 19992 }, { "epoch": 9.295211529521152, "grad_norm": 1.8805670738220215, "learning_rate": 4.9998889677723804e-05, "loss": 0.0529, "step": 19994 }, { "epoch": 9.296141329614134, "grad_norm": 1.7123041152954102, "learning_rate": 4.999950652140348e-05, "loss": 0.0293, "step": 19996 }, { "epoch": 9.297071129707113, "grad_norm": 2.442216157913208, "learning_rate": 4.999987663004651e-05, "loss": 0.0419, "step": 19998 }, { "epoch": 9.298000929800093, "grad_norm": 2.7205049991607666, "learning_rate": 5.000000000000006e-05, "loss": 0.0742, "step": 20000 }, { "epoch": 9.298000929800093, "eval_cer": 0.1590666031001559, "eval_loss": 0.2652747333049774, "eval_runtime": 401.4029, "eval_samples_per_second": 31.624, "eval_steps_per_second": 0.989, "step": 20000 }, { "epoch": 9.298930729893073, "grad_norm": 1.5244897603988647, "learning_rate": 4.999987663004652e-05, "loss": 0.038, "step": 20002 }, { "epoch": 9.299860529986052, "grad_norm": 1.522633671760559, "learning_rate": 4.999950652140349e-05, "loss": 0.0484, "step": 20004 }, { "epoch": 9.300790330079034, "grad_norm": 2.355990171432495, "learning_rate": 4.999888967772381e-05, "loss": 0.1026, "step": 20006 }, { "epoch": 9.301720130172013, "grad_norm": 1.6071490049362183, "learning_rate": 4.999802610509547e-05, "loss": 0.0254, "step": 20008 }, { "epoch": 9.302649930264993, "grad_norm": 1.2053227424621582, "learning_rate": 4.9996915812041586e-05, "loss": 0.0324, "step": 20010 }, { "epoch": 9.303579730357972, "grad_norm": 2.328813076019287, "learning_rate": 4.9995558809520295e-05, "loss": 0.0692, "step": 20012 }, { "epoch": 9.304509530450954, "grad_norm": 1.6694895029067993, "learning_rate": 4.9993955110924676e-05, "loss": 0.047, "step": 20014 }, { "epoch": 9.305439330543933, "grad_norm": 2.2096035480499268, "learning_rate": 4.999210473208257e-05, "loss": 0.0716, "step": 20016 }, { "epoch": 9.306369130636913, "grad_norm": 3.3457448482513428, "learning_rate": 4.999000769125647e-05, "loss": 0.1005, "step": 20018 }, { "epoch": 9.307298930729893, "grad_norm": 1.2673147916793823, "learning_rate": 4.998766400914335e-05, "loss": 0.0519, "step": 20020 }, { "epoch": 9.308228730822874, "grad_norm": 2.1543872356414795, "learning_rate": 4.998507370887438e-05, "loss": 0.0383, "step": 20022 }, { "epoch": 9.309158530915854, "grad_norm": 2.051685333251953, "learning_rate": 4.998223681601479e-05, "loss": 0.0488, "step": 20024 }, { "epoch": 9.310088331008833, "grad_norm": 1.7471096515655518, "learning_rate": 4.9979153358563564e-05, "loss": 0.074, "step": 20026 }, { "epoch": 9.311018131101813, "grad_norm": 0.7483071684837341, "learning_rate": 4.997582336695318e-05, "loss": 0.0405, "step": 20028 }, { "epoch": 9.311947931194792, "grad_norm": 2.140349864959717, "learning_rate": 4.9972246874049315e-05, "loss": 0.0642, "step": 20030 }, { "epoch": 9.312877731287774, "grad_norm": 1.86649489402771, "learning_rate": 4.996842391515051e-05, "loss": 0.0461, "step": 20032 }, { "epoch": 9.313807531380753, "grad_norm": 1.5025370121002197, "learning_rate": 4.996435452798781e-05, "loss": 0.0762, "step": 20034 }, { "epoch": 9.314737331473733, "grad_norm": 1.0935564041137695, "learning_rate": 4.996003875272444e-05, "loss": 0.0355, "step": 20036 }, { "epoch": 9.315667131566713, "grad_norm": 2.618427038192749, "learning_rate": 4.995547663195536e-05, "loss": 0.0701, "step": 20038 }, { "epoch": 9.316596931659694, "grad_norm": 2.070342540740967, "learning_rate": 4.9950668210706854e-05, "loss": 0.0497, "step": 20040 }, { "epoch": 9.317526731752674, "grad_norm": 1.635062575340271, "learning_rate": 4.99456135364361e-05, "loss": 0.0542, "step": 20042 }, { "epoch": 9.318456531845653, "grad_norm": 2.1436760425567627, "learning_rate": 4.99403126590307e-05, "loss": 0.0731, "step": 20044 }, { "epoch": 9.319386331938633, "grad_norm": 1.6892280578613281, "learning_rate": 4.9934765630808156e-05, "loss": 0.0314, "step": 20046 }, { "epoch": 9.320316132031612, "grad_norm": 1.9446260929107666, "learning_rate": 4.9928972506515415e-05, "loss": 0.0489, "step": 20048 }, { "epoch": 9.321245932124594, "grad_norm": 1.112531065940857, "learning_rate": 4.992293334332827e-05, "loss": 0.0406, "step": 20050 }, { "epoch": 9.322175732217573, "grad_norm": 1.3290839195251465, "learning_rate": 4.9916648200850825e-05, "loss": 0.0424, "step": 20052 }, { "epoch": 9.323105532310553, "grad_norm": 2.526136875152588, "learning_rate": 4.991011714111488e-05, "loss": 0.0863, "step": 20054 }, { "epoch": 9.324035332403533, "grad_norm": 1.688620924949646, "learning_rate": 4.990334022857938e-05, "loss": 0.0504, "step": 20056 }, { "epoch": 9.324965132496514, "grad_norm": 1.6458841562271118, "learning_rate": 4.989631753012971e-05, "loss": 0.0609, "step": 20058 }, { "epoch": 9.325894932589494, "grad_norm": 1.297426462173462, "learning_rate": 4.9889049115077066e-05, "loss": 0.0369, "step": 20060 }, { "epoch": 9.326824732682473, "grad_norm": 1.5340871810913086, "learning_rate": 4.988153505515778e-05, "loss": 0.0507, "step": 20062 }, { "epoch": 9.327754532775453, "grad_norm": 1.100599765777588, "learning_rate": 4.987377542453257e-05, "loss": 0.0386, "step": 20064 }, { "epoch": 9.328684332868434, "grad_norm": 2.8691325187683105, "learning_rate": 4.9865770299785876e-05, "loss": 0.0857, "step": 20066 }, { "epoch": 9.329614132961414, "grad_norm": 1.547991156578064, "learning_rate": 4.9857519759925035e-05, "loss": 0.0371, "step": 20068 }, { "epoch": 9.330543933054393, "grad_norm": 1.1262434720993042, "learning_rate": 4.984902388637956e-05, "loss": 0.0314, "step": 20070 }, { "epoch": 9.331473733147373, "grad_norm": 0.9997497200965881, "learning_rate": 4.984028276300027e-05, "loss": 0.0508, "step": 20072 }, { "epoch": 9.332403533240353, "grad_norm": 1.3612537384033203, "learning_rate": 4.9831296476058545e-05, "loss": 0.0744, "step": 20074 }, { "epoch": 9.333333333333334, "grad_norm": 2.4006683826446533, "learning_rate": 4.98220651142454e-05, "loss": 0.0955, "step": 20076 }, { "epoch": 9.334263133426314, "grad_norm": 1.7959741353988647, "learning_rate": 4.9812588768670655e-05, "loss": 0.0331, "step": 20078 }, { "epoch": 9.335192933519293, "grad_norm": 2.972250461578369, "learning_rate": 4.980286753286201e-05, "loss": 0.0786, "step": 20080 }, { "epoch": 9.336122733612273, "grad_norm": 1.6468074321746826, "learning_rate": 4.9792901502764136e-05, "loss": 0.0536, "step": 20082 }, { "epoch": 9.337052533705254, "grad_norm": 2.929086923599243, "learning_rate": 4.978269077673772e-05, "loss": 0.0936, "step": 20084 }, { "epoch": 9.337982333798234, "grad_norm": 2.5833547115325928, "learning_rate": 4.977223545555853e-05, "loss": 0.0685, "step": 20086 }, { "epoch": 9.338912133891213, "grad_norm": 1.4525799751281738, "learning_rate": 4.976153564241633e-05, "loss": 0.0998, "step": 20088 }, { "epoch": 9.339841933984193, "grad_norm": 1.9148151874542236, "learning_rate": 4.975059144291399e-05, "loss": 0.0513, "step": 20090 }, { "epoch": 9.340771734077173, "grad_norm": 2.0188844203948975, "learning_rate": 4.973940296506631e-05, "loss": 0.0657, "step": 20092 }, { "epoch": 9.341701534170154, "grad_norm": 2.280813694000244, "learning_rate": 4.972797031929909e-05, "loss": 0.0835, "step": 20094 }, { "epoch": 9.342631334263134, "grad_norm": 1.3581600189208984, "learning_rate": 4.97162936184479e-05, "loss": 0.0348, "step": 20096 }, { "epoch": 9.343561134356113, "grad_norm": 1.8267627954483032, "learning_rate": 4.9704372977757055e-05, "loss": 0.072, "step": 20098 }, { "epoch": 9.344490934449093, "grad_norm": 1.836459994316101, "learning_rate": 4.96922085148785e-05, "loss": 0.0491, "step": 20100 }, { "epoch": 9.345420734542074, "grad_norm": 1.7374933958053589, "learning_rate": 4.967980034987052e-05, "loss": 0.0427, "step": 20102 }, { "epoch": 9.346350534635054, "grad_norm": 2.5715889930725098, "learning_rate": 4.966714860519675e-05, "loss": 0.1024, "step": 20104 }, { "epoch": 9.347280334728033, "grad_norm": 3.8171119689941406, "learning_rate": 4.965425340572478e-05, "loss": 0.0842, "step": 20106 }, { "epoch": 9.348210134821013, "grad_norm": 2.257570266723633, "learning_rate": 4.9641114878724996e-05, "loss": 0.0408, "step": 20108 }, { "epoch": 9.349139934913994, "grad_norm": 1.6829582452774048, "learning_rate": 4.962773315386939e-05, "loss": 0.0679, "step": 20110 }, { "epoch": 9.350069735006974, "grad_norm": 1.9370282888412476, "learning_rate": 4.961410836323018e-05, "loss": 0.0399, "step": 20112 }, { "epoch": 9.350999535099954, "grad_norm": 0.8795685768127441, "learning_rate": 4.960024064127854e-05, "loss": 0.0603, "step": 20114 }, { "epoch": 9.351929335192933, "grad_norm": 0.5456206798553467, "learning_rate": 4.9586130124883277e-05, "loss": 0.0416, "step": 20116 }, { "epoch": 9.352859135285913, "grad_norm": 1.5806666612625122, "learning_rate": 4.957177695330953e-05, "loss": 0.048, "step": 20118 }, { "epoch": 9.353788935378894, "grad_norm": 2.4094390869140625, "learning_rate": 4.955718126821728e-05, "loss": 0.0597, "step": 20120 }, { "epoch": 9.354718735471874, "grad_norm": 1.5378966331481934, "learning_rate": 4.954234321366003e-05, "loss": 0.0468, "step": 20122 }, { "epoch": 9.355648535564853, "grad_norm": 1.0749309062957764, "learning_rate": 4.952726293608342e-05, "loss": 0.0496, "step": 20124 }, { "epoch": 9.356578335657833, "grad_norm": 2.896594285964966, "learning_rate": 4.9511940584323665e-05, "loss": 0.068, "step": 20126 }, { "epoch": 9.357508135750814, "grad_norm": 2.4930694103240967, "learning_rate": 4.949637630960622e-05, "loss": 0.0749, "step": 20128 }, { "epoch": 9.358437935843794, "grad_norm": 2.2757937908172607, "learning_rate": 4.9480570265544205e-05, "loss": 0.04, "step": 20130 }, { "epoch": 9.359367735936774, "grad_norm": 1.7943227291107178, "learning_rate": 4.946452260813685e-05, "loss": 0.0672, "step": 20132 }, { "epoch": 9.360297536029753, "grad_norm": 1.471610188484192, "learning_rate": 4.944823349576811e-05, "loss": 0.0391, "step": 20134 }, { "epoch": 9.361227336122734, "grad_norm": 1.157633662223816, "learning_rate": 4.9431703089204886e-05, "loss": 0.0463, "step": 20136 }, { "epoch": 9.362157136215714, "grad_norm": 2.312856912612915, "learning_rate": 4.941493155159567e-05, "loss": 0.0784, "step": 20138 }, { "epoch": 9.363086936308694, "grad_norm": 1.6890980005264282, "learning_rate": 4.939791904846875e-05, "loss": 0.0381, "step": 20140 }, { "epoch": 9.364016736401673, "grad_norm": 1.7438316345214844, "learning_rate": 4.9380665747730624e-05, "loss": 0.0475, "step": 20142 }, { "epoch": 9.364946536494653, "grad_norm": 1.7568533420562744, "learning_rate": 4.936317181966448e-05, "loss": 0.0455, "step": 20144 }, { "epoch": 9.365876336587634, "grad_norm": 1.9908463954925537, "learning_rate": 4.934543743692826e-05, "loss": 0.0427, "step": 20146 }, { "epoch": 9.366806136680614, "grad_norm": 1.2878608703613281, "learning_rate": 4.932746277455322e-05, "loss": 0.0368, "step": 20148 }, { "epoch": 9.367735936773594, "grad_norm": 2.8646695613861084, "learning_rate": 4.930924800994198e-05, "loss": 0.0546, "step": 20150 }, { "epoch": 9.368665736866573, "grad_norm": 2.0306363105773926, "learning_rate": 4.92907933228669e-05, "loss": 0.0719, "step": 20152 }, { "epoch": 9.369595536959554, "grad_norm": 1.498990535736084, "learning_rate": 4.927209889546834e-05, "loss": 0.0458, "step": 20154 }, { "epoch": 9.370525337052534, "grad_norm": 0.934830367565155, "learning_rate": 4.925316491225268e-05, "loss": 0.0543, "step": 20156 }, { "epoch": 9.371455137145514, "grad_norm": 1.5235177278518677, "learning_rate": 4.9233991560090784e-05, "loss": 0.0376, "step": 20158 }, { "epoch": 9.372384937238493, "grad_norm": 1.5366220474243164, "learning_rate": 4.921457902821582e-05, "loss": 0.0376, "step": 20160 }, { "epoch": 9.373314737331473, "grad_norm": 1.1569894552230835, "learning_rate": 4.9194927508221685e-05, "loss": 0.032, "step": 20162 }, { "epoch": 9.374244537424454, "grad_norm": 1.7489906549453735, "learning_rate": 4.9175037194060944e-05, "loss": 0.0499, "step": 20164 }, { "epoch": 9.375174337517434, "grad_norm": 1.738752841949463, "learning_rate": 4.915490828204292e-05, "loss": 0.0528, "step": 20166 }, { "epoch": 9.376104137610414, "grad_norm": 1.162450909614563, "learning_rate": 4.913454097083191e-05, "loss": 0.0193, "step": 20168 }, { "epoch": 9.377033937703393, "grad_norm": 2.20414137840271, "learning_rate": 4.911393546144499e-05, "loss": 0.049, "step": 20170 }, { "epoch": 9.377963737796374, "grad_norm": 1.3975075483322144, "learning_rate": 4.909309195725029e-05, "loss": 0.075, "step": 20172 }, { "epoch": 9.378893537889354, "grad_norm": 1.7735830545425415, "learning_rate": 4.907201066396475e-05, "loss": 0.027, "step": 20174 }, { "epoch": 9.379823337982334, "grad_norm": 1.1015894412994385, "learning_rate": 4.90506917896522e-05, "loss": 0.028, "step": 20176 }, { "epoch": 9.380753138075313, "grad_norm": 2.284120798110962, "learning_rate": 4.902913554472137e-05, "loss": 0.0615, "step": 20178 }, { "epoch": 9.381682938168293, "grad_norm": 2.0328760147094727, "learning_rate": 4.900734214192364e-05, "loss": 0.0562, "step": 20180 }, { "epoch": 9.382612738261274, "grad_norm": 1.897494912147522, "learning_rate": 4.898531179635115e-05, "loss": 0.0772, "step": 20182 }, { "epoch": 9.383542538354254, "grad_norm": 2.018882989883423, "learning_rate": 4.896304472543446e-05, "loss": 0.0794, "step": 20184 }, { "epoch": 9.384472338447233, "grad_norm": 2.654917001724243, "learning_rate": 4.89405411489406e-05, "loss": 0.0764, "step": 20186 }, { "epoch": 9.385402138540213, "grad_norm": 1.3140369653701782, "learning_rate": 4.891780128897082e-05, "loss": 0.0339, "step": 20188 }, { "epoch": 9.386331938633194, "grad_norm": 1.60108482837677, "learning_rate": 4.88948253699583e-05, "loss": 0.0825, "step": 20190 }, { "epoch": 9.387261738726174, "grad_norm": 2.2330117225646973, "learning_rate": 4.887161361866613e-05, "loss": 0.0542, "step": 20192 }, { "epoch": 9.388191538819154, "grad_norm": 2.562946319580078, "learning_rate": 4.884816626418492e-05, "loss": 0.0644, "step": 20194 }, { "epoch": 9.389121338912133, "grad_norm": 1.8753165006637573, "learning_rate": 4.882448353793054e-05, "loss": 0.0557, "step": 20196 }, { "epoch": 9.390051139005115, "grad_norm": 1.0984572172164917, "learning_rate": 4.880056567364198e-05, "loss": 0.0337, "step": 20198 }, { "epoch": 9.390980939098094, "grad_norm": 2.1655428409576416, "learning_rate": 4.8776412907378884e-05, "loss": 0.0493, "step": 20200 }, { "epoch": 9.391910739191074, "grad_norm": 1.6438968181610107, "learning_rate": 4.875202547751934e-05, "loss": 0.0361, "step": 20202 }, { "epoch": 9.392840539284053, "grad_norm": 2.1086080074310303, "learning_rate": 4.8727403624757406e-05, "loss": 0.0863, "step": 20204 }, { "epoch": 9.393770339377033, "grad_norm": 1.6976515054702759, "learning_rate": 4.8702547592100864e-05, "loss": 0.0392, "step": 20206 }, { "epoch": 9.394700139470014, "grad_norm": 1.752628207206726, "learning_rate": 4.8677457624868685e-05, "loss": 0.0795, "step": 20208 }, { "epoch": 9.395629939562994, "grad_norm": 2.2505922317504883, "learning_rate": 4.865213397068867e-05, "loss": 0.0477, "step": 20210 }, { "epoch": 9.396559739655974, "grad_norm": 1.6213394403457642, "learning_rate": 4.862657687949518e-05, "loss": 0.0688, "step": 20212 }, { "epoch": 9.397489539748953, "grad_norm": 1.1870921850204468, "learning_rate": 4.8600786603526295e-05, "loss": 0.0648, "step": 20214 }, { "epoch": 9.398419339841935, "grad_norm": 0.7048507332801819, "learning_rate": 4.8574763397321675e-05, "loss": 0.035, "step": 20216 }, { "epoch": 9.399349139934914, "grad_norm": 1.448463797569275, "learning_rate": 4.854850751771985e-05, "loss": 0.0369, "step": 20218 }, { "epoch": 9.400278940027894, "grad_norm": 2.279510259628296, "learning_rate": 4.852201922385571e-05, "loss": 0.0655, "step": 20220 }, { "epoch": 9.401208740120873, "grad_norm": 3.6612470149993896, "learning_rate": 4.849529877715808e-05, "loss": 0.0453, "step": 20222 }, { "epoch": 9.402138540213855, "grad_norm": 2.1012933254241943, "learning_rate": 4.8468346441346894e-05, "loss": 0.0854, "step": 20224 }, { "epoch": 9.403068340306834, "grad_norm": 2.871082067489624, "learning_rate": 4.8441162482430945e-05, "loss": 0.0778, "step": 20226 }, { "epoch": 9.403998140399814, "grad_norm": 1.7851332426071167, "learning_rate": 4.8413747168704876e-05, "loss": 0.0504, "step": 20228 }, { "epoch": 9.404927940492794, "grad_norm": 2.1978769302368164, "learning_rate": 4.8386100770746735e-05, "loss": 0.0742, "step": 20230 }, { "epoch": 9.405857740585773, "grad_norm": 1.946027159690857, "learning_rate": 4.835822356141538e-05, "loss": 0.0548, "step": 20232 }, { "epoch": 9.406787540678755, "grad_norm": 1.1320087909698486, "learning_rate": 4.8330115815847526e-05, "loss": 0.0218, "step": 20234 }, { "epoch": 9.407717340771734, "grad_norm": 1.2329978942871094, "learning_rate": 4.830177781145536e-05, "loss": 0.0439, "step": 20236 }, { "epoch": 9.408647140864714, "grad_norm": 0.7949302196502686, "learning_rate": 4.827320982792346e-05, "loss": 0.0248, "step": 20238 }, { "epoch": 9.409576940957693, "grad_norm": 2.8551788330078125, "learning_rate": 4.8244412147206345e-05, "loss": 0.0675, "step": 20240 }, { "epoch": 9.410506741050675, "grad_norm": 1.3993794918060303, "learning_rate": 4.8215385053525514e-05, "loss": 0.042, "step": 20242 }, { "epoch": 9.411436541143654, "grad_norm": 2.487220287322998, "learning_rate": 4.81861288333666e-05, "loss": 0.0875, "step": 20244 }, { "epoch": 9.412366341236634, "grad_norm": 0.45614323019981384, "learning_rate": 4.815664377547675e-05, "loss": 0.0182, "step": 20246 }, { "epoch": 9.413296141329614, "grad_norm": 1.2997182607650757, "learning_rate": 4.8126930170861525e-05, "loss": 0.0331, "step": 20248 }, { "epoch": 9.414225941422593, "grad_norm": 2.302856206893921, "learning_rate": 4.809698831278227e-05, "loss": 0.0641, "step": 20250 }, { "epoch": 9.415155741515575, "grad_norm": 1.3581492900848389, "learning_rate": 4.806681849675295e-05, "loss": 0.0397, "step": 20252 }, { "epoch": 9.416085541608554, "grad_norm": 1.859794020652771, "learning_rate": 4.803642102053752e-05, "loss": 0.0864, "step": 20254 }, { "epoch": 9.417015341701534, "grad_norm": 1.459070086479187, "learning_rate": 4.8005796184146836e-05, "loss": 0.0463, "step": 20256 }, { "epoch": 9.417945141794513, "grad_norm": 2.870206356048584, "learning_rate": 4.797494428983559e-05, "loss": 0.1454, "step": 20258 }, { "epoch": 9.418874941887495, "grad_norm": 1.608961820602417, "learning_rate": 4.794386564209961e-05, "loss": 0.049, "step": 20260 }, { "epoch": 9.419804741980474, "grad_norm": 2.6132655143737793, "learning_rate": 4.7912560547672554e-05, "loss": 0.0518, "step": 20262 }, { "epoch": 9.420734542073454, "grad_norm": 1.7854036092758179, "learning_rate": 4.788102931552302e-05, "loss": 0.0437, "step": 20264 }, { "epoch": 9.421664342166434, "grad_norm": 2.0426056385040283, "learning_rate": 4.78492722568516e-05, "loss": 0.0405, "step": 20266 }, { "epoch": 9.422594142259415, "grad_norm": 2.1873462200164795, "learning_rate": 4.781728968508762e-05, "loss": 0.0608, "step": 20268 }, { "epoch": 9.423523942352395, "grad_norm": 1.925675630569458, "learning_rate": 4.77850819158862e-05, "loss": 0.041, "step": 20270 }, { "epoch": 9.424453742445374, "grad_norm": 1.8586399555206299, "learning_rate": 4.775264926712499e-05, "loss": 0.0731, "step": 20272 }, { "epoch": 9.425383542538354, "grad_norm": 1.2978582382202148, "learning_rate": 4.771999205890108e-05, "loss": 0.0577, "step": 20274 }, { "epoch": 9.426313342631333, "grad_norm": 2.5580527782440186, "learning_rate": 4.768711061352802e-05, "loss": 0.078, "step": 20276 }, { "epoch": 9.427243142724315, "grad_norm": 1.8930288553237915, "learning_rate": 4.765400525553232e-05, "loss": 0.0366, "step": 20278 }, { "epoch": 9.428172942817294, "grad_norm": 1.9799108505249023, "learning_rate": 4.762067631165055e-05, "loss": 0.0598, "step": 20280 }, { "epoch": 9.429102742910274, "grad_norm": 1.6609619855880737, "learning_rate": 4.7587124110825956e-05, "loss": 0.0706, "step": 20282 }, { "epoch": 9.430032543003254, "grad_norm": 1.1745014190673828, "learning_rate": 4.7553348984205144e-05, "loss": 0.0754, "step": 20284 }, { "epoch": 9.430962343096235, "grad_norm": 1.3338559865951538, "learning_rate": 4.7519351265135045e-05, "loss": 0.0385, "step": 20286 }, { "epoch": 9.431892143189215, "grad_norm": 1.3908544778823853, "learning_rate": 4.748513128915935e-05, "loss": 0.0402, "step": 20288 }, { "epoch": 9.432821943282194, "grad_norm": 1.7006916999816895, "learning_rate": 4.745068939401548e-05, "loss": 0.0551, "step": 20290 }, { "epoch": 9.433751743375174, "grad_norm": 1.8812974691390991, "learning_rate": 4.741602591963093e-05, "loss": 0.0646, "step": 20292 }, { "epoch": 9.434681543468155, "grad_norm": 1.2690621614456177, "learning_rate": 4.738114120812035e-05, "loss": 0.032, "step": 20294 }, { "epoch": 9.435611343561135, "grad_norm": 2.185210704803467, "learning_rate": 4.7346035603781666e-05, "loss": 0.057, "step": 20296 }, { "epoch": 9.436541143654114, "grad_norm": 2.02549409866333, "learning_rate": 4.7310709453093e-05, "loss": 0.0328, "step": 20298 }, { "epoch": 9.437470943747094, "grad_norm": 1.0682963132858276, "learning_rate": 4.7275163104709274e-05, "loss": 0.0252, "step": 20300 }, { "epoch": 9.438400743840074, "grad_norm": 1.8777292966842651, "learning_rate": 4.723939690945851e-05, "loss": 0.1004, "step": 20302 }, { "epoch": 9.439330543933055, "grad_norm": 1.701710820198059, "learning_rate": 4.72034112203387e-05, "loss": 0.038, "step": 20304 }, { "epoch": 9.440260344026035, "grad_norm": 0.6709153652191162, "learning_rate": 4.716720639251398e-05, "loss": 0.0453, "step": 20306 }, { "epoch": 9.441190144119014, "grad_norm": 3.2883951663970947, "learning_rate": 4.7130782783311435e-05, "loss": 0.075, "step": 20308 }, { "epoch": 9.442119944211994, "grad_norm": 1.8078781366348267, "learning_rate": 4.709414075221741e-05, "loss": 0.0873, "step": 20310 }, { "epoch": 9.443049744304975, "grad_norm": 2.1270933151245117, "learning_rate": 4.705728066087389e-05, "loss": 0.0525, "step": 20312 }, { "epoch": 9.443979544397955, "grad_norm": 2.3789427280426025, "learning_rate": 4.702020287307517e-05, "loss": 0.0676, "step": 20314 }, { "epoch": 9.444909344490934, "grad_norm": 1.2470241785049438, "learning_rate": 4.698290775476401e-05, "loss": 0.0275, "step": 20316 }, { "epoch": 9.445839144583914, "grad_norm": 1.7656841278076172, "learning_rate": 4.694539567402813e-05, "loss": 0.0471, "step": 20318 }, { "epoch": 9.446768944676894, "grad_norm": 1.972212791442871, "learning_rate": 4.690766700109666e-05, "loss": 0.0611, "step": 20320 }, { "epoch": 9.447698744769875, "grad_norm": 1.0487275123596191, "learning_rate": 4.686972210833637e-05, "loss": 0.0337, "step": 20322 }, { "epoch": 9.448628544862855, "grad_norm": 1.4550265073776245, "learning_rate": 4.6831561370248077e-05, "loss": 0.0397, "step": 20324 }, { "epoch": 9.449558344955834, "grad_norm": 2.2692039012908936, "learning_rate": 4.679318516346282e-05, "loss": 0.0544, "step": 20326 }, { "epoch": 9.450488145048814, "grad_norm": 1.4902793169021606, "learning_rate": 4.675459386673822e-05, "loss": 0.0575, "step": 20328 }, { "epoch": 9.451417945141795, "grad_norm": 0.8765096664428711, "learning_rate": 4.6715787860954884e-05, "loss": 0.0369, "step": 20330 }, { "epoch": 9.452347745234775, "grad_norm": 1.9415745735168457, "learning_rate": 4.6676767529112334e-05, "loss": 0.082, "step": 20332 }, { "epoch": 9.453277545327754, "grad_norm": 2.1639821529388428, "learning_rate": 4.663753325632554e-05, "loss": 0.0765, "step": 20334 }, { "epoch": 9.454207345420734, "grad_norm": 1.286645770072937, "learning_rate": 4.6598085429820936e-05, "loss": 0.0381, "step": 20336 }, { "epoch": 9.455137145513714, "grad_norm": 1.6468186378479004, "learning_rate": 4.655842443893268e-05, "loss": 0.0333, "step": 20338 }, { "epoch": 9.456066945606695, "grad_norm": 1.5558184385299683, "learning_rate": 4.65185506750987e-05, "loss": 0.0284, "step": 20340 }, { "epoch": 9.456996745699675, "grad_norm": 1.3059983253479004, "learning_rate": 4.64784645318569e-05, "loss": 0.0472, "step": 20342 }, { "epoch": 9.457926545792654, "grad_norm": 1.6110767126083374, "learning_rate": 4.6438166404841415e-05, "loss": 0.0413, "step": 20344 }, { "epoch": 9.458856345885634, "grad_norm": 2.237718343734741, "learning_rate": 4.639765669177842e-05, "loss": 0.0807, "step": 20346 }, { "epoch": 9.459786145978615, "grad_norm": 0.77759850025177, "learning_rate": 4.635693579248245e-05, "loss": 0.051, "step": 20348 }, { "epoch": 9.460715946071595, "grad_norm": 1.3082224130630493, "learning_rate": 4.63160041088524e-05, "loss": 0.0375, "step": 20350 }, { "epoch": 9.461645746164574, "grad_norm": 2.3043549060821533, "learning_rate": 4.627486204486737e-05, "loss": 0.0611, "step": 20352 }, { "epoch": 9.462575546257554, "grad_norm": 1.3183863162994385, "learning_rate": 4.623351000658302e-05, "loss": 0.0545, "step": 20354 }, { "epoch": 9.463505346350535, "grad_norm": 1.7034229040145874, "learning_rate": 4.619194840212717e-05, "loss": 0.0725, "step": 20356 }, { "epoch": 9.464435146443515, "grad_norm": 1.30599045753479, "learning_rate": 4.615017764169617e-05, "loss": 0.0388, "step": 20358 }, { "epoch": 9.465364946536495, "grad_norm": 0.9667690396308899, "learning_rate": 4.610819813755052e-05, "loss": 0.0428, "step": 20360 }, { "epoch": 9.466294746629474, "grad_norm": 1.8089179992675781, "learning_rate": 4.6066010304010875e-05, "loss": 0.069, "step": 20362 }, { "epoch": 9.467224546722454, "grad_norm": 1.7749552726745605, "learning_rate": 4.602361455745432e-05, "loss": 0.0422, "step": 20364 }, { "epoch": 9.468154346815435, "grad_norm": 1.1159839630126953, "learning_rate": 4.598101131630961e-05, "loss": 0.0517, "step": 20366 }, { "epoch": 9.469084146908415, "grad_norm": 0.8899465799331665, "learning_rate": 4.593820100105364e-05, "loss": 0.0329, "step": 20368 }, { "epoch": 9.470013947001394, "grad_norm": 1.7713541984558105, "learning_rate": 4.5895184034206874e-05, "loss": 0.0393, "step": 20370 }, { "epoch": 9.470943747094374, "grad_norm": 1.1255818605422974, "learning_rate": 4.585196084032932e-05, "loss": 0.0465, "step": 20372 }, { "epoch": 9.471873547187355, "grad_norm": 0.9358869791030884, "learning_rate": 4.580853184601671e-05, "loss": 0.0233, "step": 20374 }, { "epoch": 9.472803347280335, "grad_norm": 1.93760347366333, "learning_rate": 4.576489747989542e-05, "loss": 0.0758, "step": 20376 }, { "epoch": 9.473733147373315, "grad_norm": 2.0190656185150146, "learning_rate": 4.572105817261913e-05, "loss": 0.0421, "step": 20378 }, { "epoch": 9.474662947466294, "grad_norm": 1.5344535112380981, "learning_rate": 4.56770143568641e-05, "loss": 0.0394, "step": 20380 }, { "epoch": 9.475592747559276, "grad_norm": 0.6915373206138611, "learning_rate": 4.563276646732503e-05, "loss": 0.0199, "step": 20382 }, { "epoch": 9.476522547652255, "grad_norm": 1.7229053974151611, "learning_rate": 4.55883149407108e-05, "loss": 0.0373, "step": 20384 }, { "epoch": 9.477452347745235, "grad_norm": 1.3812391757965088, "learning_rate": 4.5543660215739854e-05, "loss": 0.0378, "step": 20386 }, { "epoch": 9.478382147838214, "grad_norm": 1.9820281267166138, "learning_rate": 4.549880273313639e-05, "loss": 0.053, "step": 20388 }, { "epoch": 9.479311947931194, "grad_norm": 1.3475078344345093, "learning_rate": 4.545374293562564e-05, "loss": 0.0224, "step": 20390 }, { "epoch": 9.480241748024175, "grad_norm": 1.0353738069534302, "learning_rate": 4.540848126792973e-05, "loss": 0.04, "step": 20392 }, { "epoch": 9.481171548117155, "grad_norm": 1.6741281747817993, "learning_rate": 4.536301817676284e-05, "loss": 0.0416, "step": 20394 }, { "epoch": 9.482101348210135, "grad_norm": 0.39706283807754517, "learning_rate": 4.531735411082743e-05, "loss": 0.0338, "step": 20396 }, { "epoch": 9.483031148303114, "grad_norm": 1.9603863954544067, "learning_rate": 4.5271489520809394e-05, "loss": 0.0555, "step": 20398 }, { "epoch": 9.483960948396096, "grad_norm": 1.5576367378234863, "learning_rate": 4.522542485937372e-05, "loss": 0.0342, "step": 20400 }, { "epoch": 9.484890748489075, "grad_norm": 2.232156753540039, "learning_rate": 4.517916058116012e-05, "loss": 0.0865, "step": 20402 }, { "epoch": 9.485820548582055, "grad_norm": 1.8501875400543213, "learning_rate": 4.513269714277814e-05, "loss": 0.0383, "step": 20404 }, { "epoch": 9.486750348675034, "grad_norm": 1.4699455499649048, "learning_rate": 4.508603500280326e-05, "loss": 0.036, "step": 20406 }, { "epoch": 9.487680148768014, "grad_norm": 1.46129310131073, "learning_rate": 4.5039174621772066e-05, "loss": 0.0376, "step": 20408 }, { "epoch": 9.488609948860995, "grad_norm": 1.3573507070541382, "learning_rate": 4.499211646217728e-05, "loss": 0.0405, "step": 20410 }, { "epoch": 9.489539748953975, "grad_norm": 2.14516544342041, "learning_rate": 4.494486098846437e-05, "loss": 0.0631, "step": 20412 }, { "epoch": 9.490469549046955, "grad_norm": 2.0479118824005127, "learning_rate": 4.4897408667025466e-05, "loss": 0.0572, "step": 20414 }, { "epoch": 9.491399349139934, "grad_norm": 0.8126055002212524, "learning_rate": 4.484975996619593e-05, "loss": 0.0323, "step": 20416 }, { "epoch": 9.492329149232916, "grad_norm": 1.2764489650726318, "learning_rate": 4.480191535624931e-05, "loss": 0.0503, "step": 20418 }, { "epoch": 9.493258949325895, "grad_norm": 1.6395434141159058, "learning_rate": 4.475387530939237e-05, "loss": 0.0381, "step": 20420 }, { "epoch": 9.494188749418875, "grad_norm": 0.6232980489730835, "learning_rate": 4.470564029976109e-05, "loss": 0.0213, "step": 20422 }, { "epoch": 9.495118549511854, "grad_norm": 1.8184118270874023, "learning_rate": 4.4657210803415525e-05, "loss": 0.0572, "step": 20424 }, { "epoch": 9.496048349604836, "grad_norm": 2.238239288330078, "learning_rate": 4.4608587298335287e-05, "loss": 0.0591, "step": 20426 }, { "epoch": 9.496978149697815, "grad_norm": 1.0384749174118042, "learning_rate": 4.4559770264414824e-05, "loss": 0.0378, "step": 20428 }, { "epoch": 9.497907949790795, "grad_norm": 1.4486957788467407, "learning_rate": 4.451076018345834e-05, "loss": 0.039, "step": 20430 }, { "epoch": 9.498837749883775, "grad_norm": 1.4823354482650757, "learning_rate": 4.446155753917566e-05, "loss": 0.0489, "step": 20432 }, { "epoch": 9.499767549976754, "grad_norm": 2.584362506866455, "learning_rate": 4.441216281717702e-05, "loss": 0.0602, "step": 20434 }, { "epoch": 9.500697350069736, "grad_norm": 1.6264712810516357, "learning_rate": 4.4362576504968485e-05, "loss": 0.0356, "step": 20436 }, { "epoch": 9.501627150162715, "grad_norm": 1.7568308115005493, "learning_rate": 4.4312799091946725e-05, "loss": 0.0735, "step": 20438 }, { "epoch": 9.502556950255695, "grad_norm": 2.0282671451568604, "learning_rate": 4.4262831069394825e-05, "loss": 0.0579, "step": 20440 }, { "epoch": 9.503486750348674, "grad_norm": 1.3875665664672852, "learning_rate": 4.4212672930476976e-05, "loss": 0.0493, "step": 20442 }, { "epoch": 9.504416550441656, "grad_norm": 1.159569501876831, "learning_rate": 4.416232517023378e-05, "loss": 0.0362, "step": 20444 }, { "epoch": 9.505346350534635, "grad_norm": 3.008265972137451, "learning_rate": 4.411178828557742e-05, "loss": 0.0673, "step": 20446 }, { "epoch": 9.506276150627615, "grad_norm": 1.1706725358963013, "learning_rate": 4.40610627752863e-05, "loss": 0.0521, "step": 20448 }, { "epoch": 9.507205950720595, "grad_norm": 3.173297166824341, "learning_rate": 4.401014914000085e-05, "loss": 0.0794, "step": 20450 }, { "epoch": 9.508135750813576, "grad_norm": 1.27824866771698, "learning_rate": 4.39590478822181e-05, "loss": 0.041, "step": 20452 }, { "epoch": 9.509065550906556, "grad_norm": 1.1727920770645142, "learning_rate": 4.390775950628683e-05, "loss": 0.0435, "step": 20454 }, { "epoch": 9.509995350999535, "grad_norm": 1.7034082412719727, "learning_rate": 4.3856284518402716e-05, "loss": 0.0471, "step": 20456 }, { "epoch": 9.510925151092515, "grad_norm": 1.5511490106582642, "learning_rate": 4.3804623426602886e-05, "loss": 0.0491, "step": 20458 }, { "epoch": 9.511854951185494, "grad_norm": 1.3542413711547852, "learning_rate": 4.375277674076156e-05, "loss": 0.0448, "step": 20460 }, { "epoch": 9.512784751278476, "grad_norm": 1.6883907318115234, "learning_rate": 4.3700744972584725e-05, "loss": 0.0205, "step": 20462 }, { "epoch": 9.513714551371455, "grad_norm": 0.7595230937004089, "learning_rate": 4.364852863560457e-05, "loss": 0.025, "step": 20464 }, { "epoch": 9.514644351464435, "grad_norm": 0.8864503502845764, "learning_rate": 4.359612824517574e-05, "loss": 0.0249, "step": 20466 }, { "epoch": 9.515574151557415, "grad_norm": 1.9166584014892578, "learning_rate": 4.354354431846856e-05, "loss": 0.0424, "step": 20468 }, { "epoch": 9.516503951650396, "grad_norm": 2.6252293586730957, "learning_rate": 4.3490777374465295e-05, "loss": 0.0434, "step": 20470 }, { "epoch": 9.517433751743376, "grad_norm": 1.99375319480896, "learning_rate": 4.34378279339545e-05, "loss": 0.0403, "step": 20472 }, { "epoch": 9.518363551836355, "grad_norm": 1.8935047388076782, "learning_rate": 4.338469651952552e-05, "loss": 0.0586, "step": 20474 }, { "epoch": 9.519293351929335, "grad_norm": 0.9473549723625183, "learning_rate": 4.3331383655564094e-05, "loss": 0.0343, "step": 20476 }, { "epoch": 9.520223152022314, "grad_norm": 2.6944432258605957, "learning_rate": 4.327788986824667e-05, "loss": 0.0401, "step": 20478 }, { "epoch": 9.521152952115296, "grad_norm": 1.6180446147918701, "learning_rate": 4.3224215685535334e-05, "loss": 0.0633, "step": 20480 }, { "epoch": 9.522082752208275, "grad_norm": 2.5117874145507812, "learning_rate": 4.3170361637172704e-05, "loss": 0.0704, "step": 20482 }, { "epoch": 9.523012552301255, "grad_norm": 1.9892140626907349, "learning_rate": 4.3116328254676274e-05, "loss": 0.0429, "step": 20484 }, { "epoch": 9.523942352394235, "grad_norm": 1.0263265371322632, "learning_rate": 4.306211607133382e-05, "loss": 0.0322, "step": 20486 }, { "epoch": 9.524872152487216, "grad_norm": 2.12164568901062, "learning_rate": 4.300772562219772e-05, "loss": 0.069, "step": 20488 }, { "epoch": 9.525801952580196, "grad_norm": 1.7337896823883057, "learning_rate": 4.295315744407987e-05, "loss": 0.034, "step": 20490 }, { "epoch": 9.526731752673175, "grad_norm": 1.9764213562011719, "learning_rate": 4.2898412075545895e-05, "loss": 0.0763, "step": 20492 }, { "epoch": 9.527661552766155, "grad_norm": 1.2140700817108154, "learning_rate": 4.2843490056910616e-05, "loss": 0.0302, "step": 20494 }, { "epoch": 9.528591352859134, "grad_norm": 1.670811653137207, "learning_rate": 4.278839193023219e-05, "loss": 0.0358, "step": 20496 }, { "epoch": 9.529521152952116, "grad_norm": 1.1480984687805176, "learning_rate": 4.273311823930688e-05, "loss": 0.0277, "step": 20498 }, { "epoch": 9.530450953045095, "grad_norm": 2.4655539989471436, "learning_rate": 4.2677669529663825e-05, "loss": 0.048, "step": 20500 }, { "epoch": 9.531380753138075, "grad_norm": 2.989811420440674, "learning_rate": 4.2622046348559145e-05, "loss": 0.0626, "step": 20502 }, { "epoch": 9.532310553231055, "grad_norm": 0.8768781423568726, "learning_rate": 4.256624924497131e-05, "loss": 0.0443, "step": 20504 }, { "epoch": 9.533240353324036, "grad_norm": 1.8885650634765625, "learning_rate": 4.2510278769595215e-05, "loss": 0.0458, "step": 20506 }, { "epoch": 9.534170153417016, "grad_norm": 1.6030464172363281, "learning_rate": 4.2454135474836844e-05, "loss": 0.0618, "step": 20508 }, { "epoch": 9.535099953509995, "grad_norm": 0.8885872960090637, "learning_rate": 4.239781991480798e-05, "loss": 0.0352, "step": 20510 }, { "epoch": 9.536029753602975, "grad_norm": 1.570239782333374, "learning_rate": 4.234133264532021e-05, "loss": 0.0405, "step": 20512 }, { "epoch": 9.536959553695956, "grad_norm": 1.2740134000778198, "learning_rate": 4.2284674223880214e-05, "loss": 0.0293, "step": 20514 }, { "epoch": 9.537889353788936, "grad_norm": 1.3992352485656738, "learning_rate": 4.2227845209683866e-05, "loss": 0.0333, "step": 20516 }, { "epoch": 9.538819153881915, "grad_norm": 0.7655329704284668, "learning_rate": 4.217084616361033e-05, "loss": 0.0265, "step": 20518 }, { "epoch": 9.539748953974895, "grad_norm": 0.9818165898323059, "learning_rate": 4.211367764821732e-05, "loss": 0.0417, "step": 20520 }, { "epoch": 9.540678754067875, "grad_norm": 0.9287179708480835, "learning_rate": 4.2056340227734974e-05, "loss": 0.0384, "step": 20522 }, { "epoch": 9.541608554160856, "grad_norm": 1.9052592515945435, "learning_rate": 4.199883446806051e-05, "loss": 0.0427, "step": 20524 }, { "epoch": 9.542538354253836, "grad_norm": 2.1873908042907715, "learning_rate": 4.1941160936752684e-05, "loss": 0.0418, "step": 20526 }, { "epoch": 9.543468154346815, "grad_norm": 1.4238231182098389, "learning_rate": 4.188332020302571e-05, "loss": 0.0386, "step": 20528 }, { "epoch": 9.544397954439795, "grad_norm": 1.5737762451171875, "learning_rate": 4.182531283774441e-05, "loss": 0.0306, "step": 20530 }, { "epoch": 9.545327754532776, "grad_norm": 2.576809883117676, "learning_rate": 4.1767139413418054e-05, "loss": 0.0977, "step": 20532 }, { "epoch": 9.546257554625756, "grad_norm": 1.6246922016143799, "learning_rate": 4.1708800504194854e-05, "loss": 0.0269, "step": 20534 }, { "epoch": 9.547187354718735, "grad_norm": 1.189677357673645, "learning_rate": 4.1650296685856414e-05, "loss": 0.0335, "step": 20536 }, { "epoch": 9.548117154811715, "grad_norm": 1.3093469142913818, "learning_rate": 4.159162853581156e-05, "loss": 0.038, "step": 20538 }, { "epoch": 9.549046954904696, "grad_norm": 2.040688991546631, "learning_rate": 4.153279663309136e-05, "loss": 0.0397, "step": 20540 }, { "epoch": 9.549976754997676, "grad_norm": 1.1409189701080322, "learning_rate": 4.1473801558342955e-05, "loss": 0.0293, "step": 20542 }, { "epoch": 9.550906555090656, "grad_norm": 1.8646405935287476, "learning_rate": 4.141464389382405e-05, "loss": 0.0589, "step": 20544 }, { "epoch": 9.551836355183635, "grad_norm": 1.7025431394577026, "learning_rate": 4.1355324223396496e-05, "loss": 0.0566, "step": 20546 }, { "epoch": 9.552766155276615, "grad_norm": 1.4476585388183594, "learning_rate": 4.129584313252204e-05, "loss": 0.038, "step": 20548 }, { "epoch": 9.553695955369596, "grad_norm": 1.8579213619232178, "learning_rate": 4.123620120825463e-05, "loss": 0.0536, "step": 20550 }, { "epoch": 9.554625755462576, "grad_norm": 0.9534528851509094, "learning_rate": 4.117639903923612e-05, "loss": 0.0374, "step": 20552 }, { "epoch": 9.555555555555555, "grad_norm": 2.19531512260437, "learning_rate": 4.1116437215689906e-05, "loss": 0.0695, "step": 20554 }, { "epoch": 9.556485355648535, "grad_norm": 1.0649112462997437, "learning_rate": 4.1056316329414704e-05, "loss": 0.0169, "step": 20556 }, { "epoch": 9.557415155741516, "grad_norm": 2.3604204654693604, "learning_rate": 4.099603697377952e-05, "loss": 0.0426, "step": 20558 }, { "epoch": 9.558344955834496, "grad_norm": 1.667061448097229, "learning_rate": 4.093559974371728e-05, "loss": 0.0198, "step": 20560 }, { "epoch": 9.559274755927476, "grad_norm": 1.6122138500213623, "learning_rate": 4.087500523571903e-05, "loss": 0.0294, "step": 20562 }, { "epoch": 9.560204556020455, "grad_norm": 1.1423972845077515, "learning_rate": 4.081425404782823e-05, "loss": 0.0492, "step": 20564 }, { "epoch": 9.561134356113435, "grad_norm": 0.8459952473640442, "learning_rate": 4.075334677963432e-05, "loss": 0.0218, "step": 20566 }, { "epoch": 9.562064156206416, "grad_norm": 1.5340927839279175, "learning_rate": 4.069228403226757e-05, "loss": 0.0552, "step": 20568 }, { "epoch": 9.562993956299396, "grad_norm": 2.093158483505249, "learning_rate": 4.0631066408392794e-05, "loss": 0.058, "step": 20570 }, { "epoch": 9.563923756392375, "grad_norm": 0.8340564370155334, "learning_rate": 4.056969451220295e-05, "loss": 0.0265, "step": 20572 }, { "epoch": 9.564853556485355, "grad_norm": 1.236176609992981, "learning_rate": 4.0508168949414e-05, "loss": 0.0287, "step": 20574 }, { "epoch": 9.565783356578336, "grad_norm": 1.0881060361862183, "learning_rate": 4.044649032725843e-05, "loss": 0.0586, "step": 20576 }, { "epoch": 9.566713156671316, "grad_norm": 2.446580171585083, "learning_rate": 4.038465925447933e-05, "loss": 0.0351, "step": 20578 }, { "epoch": 9.567642956764296, "grad_norm": 0.6509385108947754, "learning_rate": 4.032267634132456e-05, "loss": 0.0294, "step": 20580 }, { "epoch": 9.568572756857275, "grad_norm": 1.6801575422286987, "learning_rate": 4.026054219954017e-05, "loss": 0.043, "step": 20582 }, { "epoch": 9.569502556950255, "grad_norm": 1.062565803527832, "learning_rate": 4.019825744236521e-05, "loss": 0.0258, "step": 20584 }, { "epoch": 9.570432357043236, "grad_norm": 1.1149113178253174, "learning_rate": 4.0135822684525085e-05, "loss": 0.0389, "step": 20586 }, { "epoch": 9.571362157136216, "grad_norm": 2.206608533859253, "learning_rate": 4.007323854222563e-05, "loss": 0.0364, "step": 20588 }, { "epoch": 9.572291957229195, "grad_norm": 1.5157781839370728, "learning_rate": 4.0010505633147085e-05, "loss": 0.0336, "step": 20590 }, { "epoch": 9.573221757322175, "grad_norm": 1.648058295249939, "learning_rate": 3.994762457643806e-05, "loss": 0.0383, "step": 20592 }, { "epoch": 9.574151557415156, "grad_norm": 1.1971964836120605, "learning_rate": 3.9884595992708934e-05, "loss": 0.0227, "step": 20594 }, { "epoch": 9.575081357508136, "grad_norm": 1.8136576414108276, "learning_rate": 3.982142050402651e-05, "loss": 0.045, "step": 20596 }, { "epoch": 9.576011157601116, "grad_norm": 0.8523985147476196, "learning_rate": 3.9758098733907505e-05, "loss": 0.0276, "step": 20598 }, { "epoch": 9.576940957694095, "grad_norm": 1.3638068437576294, "learning_rate": 3.9694631307311945e-05, "loss": 0.0329, "step": 20600 }, { "epoch": 9.577870757787077, "grad_norm": 0.938133716583252, "learning_rate": 3.963101885063783e-05, "loss": 0.0177, "step": 20602 }, { "epoch": 9.578800557880056, "grad_norm": 2.2312724590301514, "learning_rate": 3.9567261991714445e-05, "loss": 0.0406, "step": 20604 }, { "epoch": 9.579730357973036, "grad_norm": 1.7062269449234009, "learning_rate": 3.9503361359796244e-05, "loss": 0.0288, "step": 20606 }, { "epoch": 9.580660158066015, "grad_norm": 1.282921552658081, "learning_rate": 3.943931758555681e-05, "loss": 0.0422, "step": 20608 }, { "epoch": 9.581589958158997, "grad_norm": 1.4612760543823242, "learning_rate": 3.9375131301082056e-05, "loss": 0.0307, "step": 20610 }, { "epoch": 9.582519758251976, "grad_norm": 0.547149658203125, "learning_rate": 3.931080313986483e-05, "loss": 0.0537, "step": 20612 }, { "epoch": 9.583449558344956, "grad_norm": 0.593905508518219, "learning_rate": 3.924633373679826e-05, "loss": 0.0553, "step": 20614 }, { "epoch": 9.584379358437936, "grad_norm": 1.2787277698516846, "learning_rate": 3.9181723728168896e-05, "loss": 0.0288, "step": 20616 }, { "epoch": 9.585309158530915, "grad_norm": 2.063220977783203, "learning_rate": 3.911697375165202e-05, "loss": 0.0644, "step": 20618 }, { "epoch": 9.586238958623897, "grad_norm": 1.1746948957443237, "learning_rate": 3.905208444630333e-05, "loss": 0.0311, "step": 20620 }, { "epoch": 9.587168758716876, "grad_norm": 1.623823642730713, "learning_rate": 3.8987056452554204e-05, "loss": 0.0233, "step": 20622 }, { "epoch": 9.588098558809856, "grad_norm": 1.1021217107772827, "learning_rate": 3.892189041220485e-05, "loss": 0.0325, "step": 20624 }, { "epoch": 9.589028358902835, "grad_norm": 2.094430923461914, "learning_rate": 3.885658696841747e-05, "loss": 0.0522, "step": 20626 }, { "epoch": 9.589958158995817, "grad_norm": 1.4382567405700684, "learning_rate": 3.879114676571084e-05, "loss": 0.0431, "step": 20628 }, { "epoch": 9.590887959088796, "grad_norm": 2.084031820297241, "learning_rate": 3.8725570449953346e-05, "loss": 0.0345, "step": 20630 }, { "epoch": 9.591817759181776, "grad_norm": 2.1600148677825928, "learning_rate": 3.865985866835675e-05, "loss": 0.0614, "step": 20632 }, { "epoch": 9.592747559274756, "grad_norm": 1.6653823852539062, "learning_rate": 3.859401206946981e-05, "loss": 0.039, "step": 20634 }, { "epoch": 9.593677359367735, "grad_norm": 0.8939815163612366, "learning_rate": 3.8528031303172004e-05, "loss": 0.0222, "step": 20636 }, { "epoch": 9.594607159460717, "grad_norm": 3.058358669281006, "learning_rate": 3.846191702066657e-05, "loss": 0.0472, "step": 20638 }, { "epoch": 9.595536959553696, "grad_norm": 1.1746177673339844, "learning_rate": 3.839566987447494e-05, "loss": 0.0528, "step": 20640 }, { "epoch": 9.596466759646676, "grad_norm": 0.39694634079933167, "learning_rate": 3.8329290518429705e-05, "loss": 0.0228, "step": 20642 }, { "epoch": 9.597396559739655, "grad_norm": 0.7880655527114868, "learning_rate": 3.8262779607668306e-05, "loss": 0.0249, "step": 20644 }, { "epoch": 9.598326359832637, "grad_norm": 1.394147515296936, "learning_rate": 3.819613779862673e-05, "loss": 0.0575, "step": 20646 }, { "epoch": 9.599256159925616, "grad_norm": 1.7365047931671143, "learning_rate": 3.812936574903243e-05, "loss": 0.0427, "step": 20648 }, { "epoch": 9.600185960018596, "grad_norm": 0.46456512808799744, "learning_rate": 3.806246411789872e-05, "loss": 0.0247, "step": 20650 }, { "epoch": 9.601115760111576, "grad_norm": 1.9043911695480347, "learning_rate": 3.799543356551786e-05, "loss": 0.0379, "step": 20652 }, { "epoch": 9.602045560204555, "grad_norm": 1.7941557168960571, "learning_rate": 3.792827475345402e-05, "loss": 0.0457, "step": 20654 }, { "epoch": 9.602975360297537, "grad_norm": 1.4503278732299805, "learning_rate": 3.7860988344537715e-05, "loss": 0.0229, "step": 20656 }, { "epoch": 9.603905160390516, "grad_norm": 1.712411642074585, "learning_rate": 3.779357500285864e-05, "loss": 0.0332, "step": 20658 }, { "epoch": 9.604834960483496, "grad_norm": 1.8070933818817139, "learning_rate": 3.772603539375927e-05, "loss": 0.036, "step": 20660 }, { "epoch": 9.605764760576475, "grad_norm": 2.2146005630493164, "learning_rate": 3.765837018382842e-05, "loss": 0.0695, "step": 20662 }, { "epoch": 9.606694560669457, "grad_norm": 2.184776782989502, "learning_rate": 3.75905800408941e-05, "loss": 0.0433, "step": 20664 }, { "epoch": 9.607624360762436, "grad_norm": 1.3894309997558594, "learning_rate": 3.752266563401779e-05, "loss": 0.0317, "step": 20666 }, { "epoch": 9.608554160855416, "grad_norm": 2.0195629596710205, "learning_rate": 3.7454627633487436e-05, "loss": 0.0732, "step": 20668 }, { "epoch": 9.609483960948396, "grad_norm": 1.9680967330932617, "learning_rate": 3.738646671081017e-05, "loss": 0.0482, "step": 20670 }, { "epoch": 9.610413761041377, "grad_norm": 2.8366072177886963, "learning_rate": 3.731818353870739e-05, "loss": 0.0385, "step": 20672 }, { "epoch": 9.611343561134357, "grad_norm": 1.8984023332595825, "learning_rate": 3.724977879110598e-05, "loss": 0.0443, "step": 20674 }, { "epoch": 9.612273361227336, "grad_norm": 1.9748722314834595, "learning_rate": 3.7181253143133336e-05, "loss": 0.0301, "step": 20676 }, { "epoch": 9.613203161320316, "grad_norm": 0.5678695440292358, "learning_rate": 3.7112607271109944e-05, "loss": 0.0362, "step": 20678 }, { "epoch": 9.614132961413295, "grad_norm": 1.699225902557373, "learning_rate": 3.7043841852542996e-05, "loss": 0.033, "step": 20680 }, { "epoch": 9.615062761506277, "grad_norm": 1.145309329032898, "learning_rate": 3.69749575661191e-05, "loss": 0.0382, "step": 20682 }, { "epoch": 9.615992561599256, "grad_norm": 0.8950843214988708, "learning_rate": 3.690595509169852e-05, "loss": 0.0246, "step": 20684 }, { "epoch": 9.616922361692236, "grad_norm": 1.9043673276901245, "learning_rate": 3.6836835110307815e-05, "loss": 0.0391, "step": 20686 }, { "epoch": 9.617852161785216, "grad_norm": 1.0828176736831665, "learning_rate": 3.67675983041333e-05, "loss": 0.0204, "step": 20688 }, { "epoch": 9.618781961878197, "grad_norm": 1.5909384489059448, "learning_rate": 3.669824535651444e-05, "loss": 0.0547, "step": 20690 }, { "epoch": 9.619711761971177, "grad_norm": 0.7345001697540283, "learning_rate": 3.662877695193654e-05, "loss": 0.0157, "step": 20692 }, { "epoch": 9.620641562064156, "grad_norm": 0.9011335968971252, "learning_rate": 3.655919377602484e-05, "loss": 0.0236, "step": 20694 }, { "epoch": 9.621571362157136, "grad_norm": 1.2426763772964478, "learning_rate": 3.648949651553736e-05, "loss": 0.0194, "step": 20696 }, { "epoch": 9.622501162250117, "grad_norm": 1.788146734237671, "learning_rate": 3.641968585835746e-05, "loss": 0.0323, "step": 20698 }, { "epoch": 9.623430962343097, "grad_norm": 0.9625546932220459, "learning_rate": 3.6349762493488766e-05, "loss": 0.0256, "step": 20700 }, { "epoch": 9.624360762436076, "grad_norm": 1.5856456756591797, "learning_rate": 3.627972711104619e-05, "loss": 0.0269, "step": 20702 }, { "epoch": 9.625290562529056, "grad_norm": 2.586937189102173, "learning_rate": 3.620958040225083e-05, "loss": 0.0446, "step": 20704 }, { "epoch": 9.626220362622036, "grad_norm": 1.478070616722107, "learning_rate": 3.613932305942256e-05, "loss": 0.0429, "step": 20706 }, { "epoch": 9.627150162715017, "grad_norm": 0.9494179487228394, "learning_rate": 3.606895577597265e-05, "loss": 0.0299, "step": 20708 }, { "epoch": 9.628079962807996, "grad_norm": 1.7659952640533447, "learning_rate": 3.5998479246397954e-05, "loss": 0.0296, "step": 20710 }, { "epoch": 9.629009762900976, "grad_norm": 0.823428750038147, "learning_rate": 3.592789416627336e-05, "loss": 0.0283, "step": 20712 }, { "epoch": 9.629939562993956, "grad_norm": 1.200165867805481, "learning_rate": 3.585720123224512e-05, "loss": 0.0194, "step": 20714 }, { "epoch": 9.630869363086937, "grad_norm": 1.6920490264892578, "learning_rate": 3.5786401142024104e-05, "loss": 0.0622, "step": 20716 }, { "epoch": 9.631799163179917, "grad_norm": 2.640800714492798, "learning_rate": 3.57154945943783e-05, "loss": 0.0391, "step": 20718 }, { "epoch": 9.632728963272896, "grad_norm": 1.0574923753738403, "learning_rate": 3.564448228912687e-05, "loss": 0.0471, "step": 20720 }, { "epoch": 9.633658763365876, "grad_norm": 1.3489689826965332, "learning_rate": 3.557336492713259e-05, "loss": 0.0569, "step": 20722 }, { "epoch": 9.634588563458856, "grad_norm": 1.7021862268447876, "learning_rate": 3.5502143210295293e-05, "loss": 0.0207, "step": 20724 }, { "epoch": 9.635518363551837, "grad_norm": 2.0310633182525635, "learning_rate": 3.5430817841544234e-05, "loss": 0.0558, "step": 20726 }, { "epoch": 9.636448163644816, "grad_norm": 1.8926819562911987, "learning_rate": 3.535938952483217e-05, "loss": 0.0744, "step": 20728 }, { "epoch": 9.637377963737796, "grad_norm": 0.8119315505027771, "learning_rate": 3.528785896512775e-05, "loss": 0.0319, "step": 20730 }, { "epoch": 9.638307763830776, "grad_norm": 0.910128653049469, "learning_rate": 3.521622686840871e-05, "loss": 0.0156, "step": 20732 }, { "epoch": 9.639237563923757, "grad_norm": 1.1500860452651978, "learning_rate": 3.5144493941655116e-05, "loss": 0.0287, "step": 20734 }, { "epoch": 9.640167364016737, "grad_norm": 1.5563809871673584, "learning_rate": 3.507266089284164e-05, "loss": 0.0396, "step": 20736 }, { "epoch": 9.641097164109716, "grad_norm": 1.1517776250839233, "learning_rate": 3.500072843093166e-05, "loss": 0.0143, "step": 20738 }, { "epoch": 9.642026964202696, "grad_norm": 1.0564982891082764, "learning_rate": 3.4928697265869515e-05, "loss": 0.0337, "step": 20740 }, { "epoch": 9.642956764295675, "grad_norm": 1.1265958547592163, "learning_rate": 3.485656810857375e-05, "loss": 0.0177, "step": 20742 }, { "epoch": 9.643886564388657, "grad_norm": 1.1920164823532104, "learning_rate": 3.478434167093016e-05, "loss": 0.0421, "step": 20744 }, { "epoch": 9.644816364481636, "grad_norm": 0.5465729236602783, "learning_rate": 3.471201866578421e-05, "loss": 0.0278, "step": 20746 }, { "epoch": 9.645746164574616, "grad_norm": 1.4665504693984985, "learning_rate": 3.4639599806934926e-05, "loss": 0.0347, "step": 20748 }, { "epoch": 9.646675964667596, "grad_norm": 2.1498963832855225, "learning_rate": 3.456708580912738e-05, "loss": 0.0612, "step": 20750 }, { "epoch": 9.647605764760577, "grad_norm": 1.1755146980285645, "learning_rate": 3.449447738804496e-05, "loss": 0.0358, "step": 20752 }, { "epoch": 9.648535564853557, "grad_norm": 1.1375095844268799, "learning_rate": 3.442177526030413e-05, "loss": 0.0104, "step": 20754 }, { "epoch": 9.649465364946536, "grad_norm": 2.0184898376464844, "learning_rate": 3.4348980143445034e-05, "loss": 0.0717, "step": 20756 }, { "epoch": 9.650395165039516, "grad_norm": 1.4329888820648193, "learning_rate": 3.427609275592626e-05, "loss": 0.026, "step": 20758 }, { "epoch": 9.651324965132497, "grad_norm": 2.415371894836426, "learning_rate": 3.4203113817117065e-05, "loss": 0.0514, "step": 20760 }, { "epoch": 9.652254765225477, "grad_norm": 1.690338373184204, "learning_rate": 3.4130044047289775e-05, "loss": 0.0555, "step": 20762 }, { "epoch": 9.653184565318456, "grad_norm": 1.8730984926223755, "learning_rate": 3.4056884167613684e-05, "loss": 0.041, "step": 20764 }, { "epoch": 9.654114365411436, "grad_norm": 0.35749202966690063, "learning_rate": 3.398363490014728e-05, "loss": 0.0217, "step": 20766 }, { "epoch": 9.655044165504417, "grad_norm": 0.8722282648086548, "learning_rate": 3.391029696783123e-05, "loss": 0.0321, "step": 20768 }, { "epoch": 9.655973965597397, "grad_norm": 1.8876827955245972, "learning_rate": 3.3836871094481526e-05, "loss": 0.0367, "step": 20770 }, { "epoch": 9.656903765690377, "grad_norm": 1.2156193256378174, "learning_rate": 3.3763358004781536e-05, "loss": 0.0471, "step": 20772 }, { "epoch": 9.657833565783356, "grad_norm": 1.9941625595092773, "learning_rate": 3.368975842427595e-05, "loss": 0.0249, "step": 20774 }, { "epoch": 9.658763365876336, "grad_norm": 1.6873035430908203, "learning_rate": 3.361607307936291e-05, "loss": 0.0476, "step": 20776 }, { "epoch": 9.659693165969317, "grad_norm": 1.151911973953247, "learning_rate": 3.3542302697287196e-05, "loss": 0.0445, "step": 20778 }, { "epoch": 9.660622966062297, "grad_norm": 2.0033912658691406, "learning_rate": 3.346844800613236e-05, "loss": 0.0307, "step": 20780 }, { "epoch": 9.661552766155276, "grad_norm": 1.561510443687439, "learning_rate": 3.339450973481456e-05, "loss": 0.0368, "step": 20782 }, { "epoch": 9.662482566248256, "grad_norm": 1.8177443742752075, "learning_rate": 3.3320488613074676e-05, "loss": 0.0277, "step": 20784 }, { "epoch": 9.663412366341237, "grad_norm": 1.8999956846237183, "learning_rate": 3.3246385371471286e-05, "loss": 0.0466, "step": 20786 }, { "epoch": 9.664342166434217, "grad_norm": 1.262072205543518, "learning_rate": 3.317220074137366e-05, "loss": 0.0245, "step": 20788 }, { "epoch": 9.665271966527197, "grad_norm": 0.45902320742607117, "learning_rate": 3.30979354549538e-05, "loss": 0.0342, "step": 20790 }, { "epoch": 9.666201766620176, "grad_norm": 1.718457579612732, "learning_rate": 3.302359024518026e-05, "loss": 0.0336, "step": 20792 }, { "epoch": 9.667131566713156, "grad_norm": 1.21803879737854, "learning_rate": 3.294916584581026e-05, "loss": 0.0274, "step": 20794 }, { "epoch": 9.668061366806137, "grad_norm": 1.1648457050323486, "learning_rate": 3.287466299138257e-05, "loss": 0.0404, "step": 20796 }, { "epoch": 9.668991166899117, "grad_norm": 2.473496437072754, "learning_rate": 3.280008241721045e-05, "loss": 0.0596, "step": 20798 }, { "epoch": 9.669920966992096, "grad_norm": 0.6262966990470886, "learning_rate": 3.2725424859373725e-05, "loss": 0.0386, "step": 20800 }, { "epoch": 9.670850767085076, "grad_norm": 0.4921514093875885, "learning_rate": 3.2650691054712526e-05, "loss": 0.0146, "step": 20802 }, { "epoch": 9.671780567178057, "grad_norm": 0.8465093970298767, "learning_rate": 3.2575881740819484e-05, "loss": 0.0274, "step": 20804 }, { "epoch": 9.672710367271037, "grad_norm": 1.1353774070739746, "learning_rate": 3.2500997656032e-05, "loss": 0.0249, "step": 20806 }, { "epoch": 9.673640167364017, "grad_norm": 1.7054888010025024, "learning_rate": 3.242603953942593e-05, "loss": 0.0257, "step": 20808 }, { "epoch": 9.674569967456996, "grad_norm": 0.9636514186859131, "learning_rate": 3.235100813080762e-05, "loss": 0.0239, "step": 20810 }, { "epoch": 9.675499767549976, "grad_norm": 1.0312323570251465, "learning_rate": 3.227590417070678e-05, "loss": 0.0234, "step": 20812 }, { "epoch": 9.676429567642957, "grad_norm": 0.9637829065322876, "learning_rate": 3.2200728400369355e-05, "loss": 0.0278, "step": 20814 }, { "epoch": 9.677359367735937, "grad_norm": 0.8540744185447693, "learning_rate": 3.212548156174948e-05, "loss": 0.0368, "step": 20816 }, { "epoch": 9.678289167828916, "grad_norm": 0.8373894095420837, "learning_rate": 3.205016439750328e-05, "loss": 0.022, "step": 20818 }, { "epoch": 9.679218967921896, "grad_norm": 1.0765831470489502, "learning_rate": 3.197477765098074e-05, "loss": 0.0159, "step": 20820 }, { "epoch": 9.680148768014877, "grad_norm": 1.550389051437378, "learning_rate": 3.1899322066218625e-05, "loss": 0.0283, "step": 20822 }, { "epoch": 9.681078568107857, "grad_norm": 1.2312010526657104, "learning_rate": 3.1823798387933236e-05, "loss": 0.037, "step": 20824 }, { "epoch": 9.682008368200837, "grad_norm": 0.9247159361839294, "learning_rate": 3.174820736151248e-05, "loss": 0.0359, "step": 20826 }, { "epoch": 9.682938168293816, "grad_norm": 0.7395539283752441, "learning_rate": 3.1672549733009424e-05, "loss": 0.0428, "step": 20828 }, { "epoch": 9.683867968386798, "grad_norm": 1.539130687713623, "learning_rate": 3.159682624913432e-05, "loss": 0.055, "step": 20830 }, { "epoch": 9.684797768479777, "grad_norm": 1.4220389127731323, "learning_rate": 3.152103765724755e-05, "loss": 0.0242, "step": 20832 }, { "epoch": 9.685727568572757, "grad_norm": 0.8888561129570007, "learning_rate": 3.144518470535159e-05, "loss": 0.0164, "step": 20834 }, { "epoch": 9.686657368665736, "grad_norm": 1.2943781614303589, "learning_rate": 3.136926814208461e-05, "loss": 0.0281, "step": 20836 }, { "epoch": 9.687587168758716, "grad_norm": 1.0212578773498535, "learning_rate": 3.1293288716712444e-05, "loss": 0.0227, "step": 20838 }, { "epoch": 9.688516968851697, "grad_norm": 1.8642735481262207, "learning_rate": 3.1217247179121344e-05, "loss": 0.0265, "step": 20840 }, { "epoch": 9.689446768944677, "grad_norm": 1.6118552684783936, "learning_rate": 3.114114427981077e-05, "loss": 0.0209, "step": 20842 }, { "epoch": 9.690376569037657, "grad_norm": 1.4840457439422607, "learning_rate": 3.1064980769885254e-05, "loss": 0.0208, "step": 20844 }, { "epoch": 9.691306369130636, "grad_norm": 1.4732637405395508, "learning_rate": 3.098875740104808e-05, "loss": 0.0523, "step": 20846 }, { "epoch": 9.692236169223618, "grad_norm": 1.1510971784591675, "learning_rate": 3.091247492559311e-05, "loss": 0.0361, "step": 20848 }, { "epoch": 9.693165969316597, "grad_norm": 1.4773786067962646, "learning_rate": 3.0836134096397595e-05, "loss": 0.0336, "step": 20850 }, { "epoch": 9.694095769409577, "grad_norm": 1.046183466911316, "learning_rate": 3.075973566691486e-05, "loss": 0.0226, "step": 20852 }, { "epoch": 9.695025569502556, "grad_norm": 2.4575765132904053, "learning_rate": 3.0683280391166215e-05, "loss": 0.0674, "step": 20854 }, { "epoch": 9.695955369595538, "grad_norm": 2.1318962574005127, "learning_rate": 3.060676902373454e-05, "loss": 0.0529, "step": 20856 }, { "epoch": 9.696885169688517, "grad_norm": 0.9678087830543518, "learning_rate": 3.053020231975633e-05, "loss": 0.0199, "step": 20858 }, { "epoch": 9.697814969781497, "grad_norm": 1.954803228378296, "learning_rate": 3.0453581034913672e-05, "loss": 0.0345, "step": 20860 }, { "epoch": 9.698744769874477, "grad_norm": 1.0087541341781616, "learning_rate": 3.037690592542773e-05, "loss": 0.0245, "step": 20862 }, { "epoch": 9.699674569967456, "grad_norm": 0.6676196455955505, "learning_rate": 3.0300177748051396e-05, "loss": 0.0237, "step": 20864 }, { "epoch": 9.700604370060438, "grad_norm": 2.063610076904297, "learning_rate": 3.0223397260060285e-05, "loss": 0.0719, "step": 20866 }, { "epoch": 9.701534170153417, "grad_norm": 1.7127890586853027, "learning_rate": 3.0146565219246985e-05, "loss": 0.049, "step": 20868 }, { "epoch": 9.702463970246397, "grad_norm": 1.9675804376602173, "learning_rate": 3.00696823839129e-05, "loss": 0.0285, "step": 20870 }, { "epoch": 9.703393770339376, "grad_norm": 0.7507153153419495, "learning_rate": 2.9992749512860224e-05, "loss": 0.0195, "step": 20872 }, { "epoch": 9.704323570432358, "grad_norm": 1.388795018196106, "learning_rate": 2.991576736538551e-05, "loss": 0.0206, "step": 20874 }, { "epoch": 9.705253370525337, "grad_norm": 0.7008753418922424, "learning_rate": 2.9838736701271477e-05, "loss": 0.0197, "step": 20876 }, { "epoch": 9.706183170618317, "grad_norm": 1.6664211750030518, "learning_rate": 2.9761658280779853e-05, "loss": 0.0405, "step": 20878 }, { "epoch": 9.707112970711297, "grad_norm": 3.546198606491089, "learning_rate": 2.9684532864643183e-05, "loss": 0.0931, "step": 20880 }, { "epoch": 9.708042770804276, "grad_norm": 1.481884479522705, "learning_rate": 2.960736121405836e-05, "loss": 0.049, "step": 20882 }, { "epoch": 9.708972570897258, "grad_norm": 1.2169636487960815, "learning_rate": 2.9530144090678595e-05, "loss": 0.0198, "step": 20884 }, { "epoch": 9.709902370990237, "grad_norm": 2.3883893489837646, "learning_rate": 2.9452882256605377e-05, "loss": 0.0545, "step": 20886 }, { "epoch": 9.710832171083217, "grad_norm": 0.8922926783561707, "learning_rate": 2.9375576474381986e-05, "loss": 0.0233, "step": 20888 }, { "epoch": 9.711761971176196, "grad_norm": 1.6166813373565674, "learning_rate": 2.9298227506985286e-05, "loss": 0.0444, "step": 20890 }, { "epoch": 9.712691771269178, "grad_norm": 1.0541170835494995, "learning_rate": 2.9220836117818354e-05, "loss": 0.0249, "step": 20892 }, { "epoch": 9.713621571362157, "grad_norm": 1.3583602905273438, "learning_rate": 2.914340307070296e-05, "loss": 0.0602, "step": 20894 }, { "epoch": 9.714551371455137, "grad_norm": 1.1432082653045654, "learning_rate": 2.906592912987202e-05, "loss": 0.0209, "step": 20896 }, { "epoch": 9.715481171548117, "grad_norm": 2.598864793777466, "learning_rate": 2.8988415059962224e-05, "loss": 0.0475, "step": 20898 }, { "epoch": 9.716410971641096, "grad_norm": 1.4788429737091064, "learning_rate": 2.8910861626005796e-05, "loss": 0.0478, "step": 20900 }, { "epoch": 9.717340771734078, "grad_norm": 0.8971057534217834, "learning_rate": 2.8833269593424004e-05, "loss": 0.0173, "step": 20902 }, { "epoch": 9.718270571827057, "grad_norm": 0.7477700710296631, "learning_rate": 2.8755639728019048e-05, "loss": 0.019, "step": 20904 }, { "epoch": 9.719200371920037, "grad_norm": 1.6090013980865479, "learning_rate": 2.8677972795966014e-05, "loss": 0.0442, "step": 20906 }, { "epoch": 9.720130172013016, "grad_norm": 0.8838825225830078, "learning_rate": 2.860026956380635e-05, "loss": 0.0199, "step": 20908 }, { "epoch": 9.721059972105998, "grad_norm": 0.6559600234031677, "learning_rate": 2.8522530798439743e-05, "loss": 0.0209, "step": 20910 }, { "epoch": 9.721989772198977, "grad_norm": 0.5597121119499207, "learning_rate": 2.8444757267116087e-05, "loss": 0.0626, "step": 20912 }, { "epoch": 9.722919572291957, "grad_norm": 1.220995306968689, "learning_rate": 2.8366949737428915e-05, "loss": 0.0387, "step": 20914 }, { "epoch": 9.723849372384937, "grad_norm": 5.84530782699585, "learning_rate": 2.828910897730695e-05, "loss": 0.0219, "step": 20916 }, { "epoch": 9.724779172477918, "grad_norm": 1.0416861772537231, "learning_rate": 2.8211235755007592e-05, "loss": 0.0202, "step": 20918 }, { "epoch": 9.725708972570898, "grad_norm": 1.367591142654419, "learning_rate": 2.8133330839107584e-05, "loss": 0.0315, "step": 20920 }, { "epoch": 9.726638772663877, "grad_norm": 1.3709347248077393, "learning_rate": 2.805539499849718e-05, "loss": 0.0338, "step": 20922 }, { "epoch": 9.727568572756857, "grad_norm": 0.3966136872768402, "learning_rate": 2.797742900237182e-05, "loss": 0.0261, "step": 20924 }, { "epoch": 9.728498372849838, "grad_norm": 0.7854337692260742, "learning_rate": 2.789943362022407e-05, "loss": 0.0352, "step": 20926 }, { "epoch": 9.729428172942818, "grad_norm": 0.4895089566707611, "learning_rate": 2.7821409621837043e-05, "loss": 0.0176, "step": 20928 }, { "epoch": 9.730357973035797, "grad_norm": 1.49092698097229, "learning_rate": 2.774335777727609e-05, "loss": 0.0448, "step": 20930 }, { "epoch": 9.731287773128777, "grad_norm": 1.872982382774353, "learning_rate": 2.7665278856881593e-05, "loss": 0.0415, "step": 20932 }, { "epoch": 9.732217573221757, "grad_norm": 0.4654926657676697, "learning_rate": 2.7587173631260616e-05, "loss": 0.0207, "step": 20934 }, { "epoch": 9.733147373314738, "grad_norm": 0.5943230390548706, "learning_rate": 2.750904287128039e-05, "loss": 0.0182, "step": 20936 }, { "epoch": 9.734077173407718, "grad_norm": 1.405617594718933, "learning_rate": 2.7430887348060132e-05, "loss": 0.028, "step": 20938 }, { "epoch": 9.735006973500697, "grad_norm": 1.7506234645843506, "learning_rate": 2.7352707832962967e-05, "loss": 0.0182, "step": 20940 }, { "epoch": 9.735936773593677, "grad_norm": 1.1626231670379639, "learning_rate": 2.7274505097589314e-05, "loss": 0.0336, "step": 20942 }, { "epoch": 9.736866573686658, "grad_norm": 0.24957329034805298, "learning_rate": 2.7196279913768608e-05, "loss": 0.0171, "step": 20944 }, { "epoch": 9.737796373779638, "grad_norm": 0.9363298416137695, "learning_rate": 2.7118033053551832e-05, "loss": 0.0356, "step": 20946 }, { "epoch": 9.738726173872617, "grad_norm": 0.8906186819076538, "learning_rate": 2.703976528920389e-05, "loss": 0.0358, "step": 20948 }, { "epoch": 9.739655973965597, "grad_norm": 1.006089210510254, "learning_rate": 2.6961477393196035e-05, "loss": 0.0313, "step": 20950 }, { "epoch": 9.740585774058577, "grad_norm": 1.345988392829895, "learning_rate": 2.6883170138198367e-05, "loss": 0.0331, "step": 20952 }, { "epoch": 9.741515574151558, "grad_norm": 0.5328461527824402, "learning_rate": 2.6804844297071522e-05, "loss": 0.0259, "step": 20954 }, { "epoch": 9.742445374244538, "grad_norm": 0.7580409646034241, "learning_rate": 2.6726500642860113e-05, "loss": 0.0183, "step": 20956 }, { "epoch": 9.743375174337517, "grad_norm": 0.42276057600975037, "learning_rate": 2.6648139948784555e-05, "loss": 0.0098, "step": 20958 }, { "epoch": 9.744304974430497, "grad_norm": 1.2482719421386719, "learning_rate": 2.6569762988232903e-05, "loss": 0.0362, "step": 20960 }, { "epoch": 9.745234774523478, "grad_norm": 0.14154556393623352, "learning_rate": 2.6491370534754295e-05, "loss": 0.0041, "step": 20962 }, { "epoch": 9.746164574616458, "grad_norm": 0.8022745847702026, "learning_rate": 2.64129633620506e-05, "loss": 0.0163, "step": 20964 }, { "epoch": 9.747094374709437, "grad_norm": 1.5009790658950806, "learning_rate": 2.6334542243969136e-05, "loss": 0.0185, "step": 20966 }, { "epoch": 9.748024174802417, "grad_norm": 1.2187970876693726, "learning_rate": 2.625610795449432e-05, "loss": 0.0217, "step": 20968 }, { "epoch": 9.748953974895397, "grad_norm": 0.43738415837287903, "learning_rate": 2.617766126774111e-05, "loss": 0.0115, "step": 20970 }, { "epoch": 9.749883774988378, "grad_norm": 1.1629663705825806, "learning_rate": 2.6099202957946627e-05, "loss": 0.0269, "step": 20972 }, { "epoch": 9.750813575081358, "grad_norm": 1.1839091777801514, "learning_rate": 2.6020733799462716e-05, "loss": 0.0146, "step": 20974 }, { "epoch": 9.751743375174337, "grad_norm": 1.6369211673736572, "learning_rate": 2.5942254566748286e-05, "loss": 0.0365, "step": 20976 }, { "epoch": 9.752673175267317, "grad_norm": 1.1025125980377197, "learning_rate": 2.5863766034361865e-05, "loss": 0.0225, "step": 20978 }, { "epoch": 9.753602975360298, "grad_norm": 1.030517816543579, "learning_rate": 2.5785268976953227e-05, "loss": 0.0346, "step": 20980 }, { "epoch": 9.754532775453278, "grad_norm": 0.8156864047050476, "learning_rate": 2.570676416925681e-05, "loss": 0.0398, "step": 20982 }, { "epoch": 9.755462575546257, "grad_norm": 0.6037646532058716, "learning_rate": 2.5628252386083376e-05, "loss": 0.0299, "step": 20984 }, { "epoch": 9.756392375639237, "grad_norm": 1.0942388772964478, "learning_rate": 2.5549734402312705e-05, "loss": 0.0504, "step": 20986 }, { "epoch": 9.757322175732218, "grad_norm": 0.7901957631111145, "learning_rate": 2.547121099288524e-05, "loss": 0.0205, "step": 20988 }, { "epoch": 9.758251975825198, "grad_norm": 0.6209781765937805, "learning_rate": 2.5392682932795513e-05, "loss": 0.0221, "step": 20990 }, { "epoch": 9.759181775918178, "grad_norm": 0.45550933480262756, "learning_rate": 2.5314150997083955e-05, "loss": 0.0211, "step": 20992 }, { "epoch": 9.760111576011157, "grad_norm": 1.5361814498901367, "learning_rate": 2.52356159608287e-05, "loss": 0.0324, "step": 20994 }, { "epoch": 9.761041376104137, "grad_norm": 0.9472241997718811, "learning_rate": 2.5157078599139038e-05, "loss": 0.042, "step": 20996 }, { "epoch": 9.761971176197118, "grad_norm": 1.7265413999557495, "learning_rate": 2.5078539687147188e-05, "loss": 0.0196, "step": 20998 }, { "epoch": 9.762900976290098, "grad_norm": 1.1779921054840088, "learning_rate": 2.499999999999998e-05, "loss": 0.0248, "step": 21000 }, { "epoch": 9.762900976290098, "eval_cer": 0.1335279716159778, "eval_loss": 0.20770516991615295, "eval_runtime": 399.5195, "eval_samples_per_second": 31.773, "eval_steps_per_second": 0.994, "step": 21000 }, { "epoch": 9.763830776383077, "grad_norm": 0.9113019704818726, "learning_rate": 2.4921460312852946e-05, "loss": 0.0138, "step": 21002 }, { "epoch": 9.764760576476057, "grad_norm": 1.1924824714660645, "learning_rate": 2.4842921400860924e-05, "loss": 0.0276, "step": 21004 }, { "epoch": 9.765690376569038, "grad_norm": 1.4643362760543823, "learning_rate": 2.4764384039171438e-05, "loss": 0.0311, "step": 21006 }, { "epoch": 9.766620176662018, "grad_norm": 1.1202845573425293, "learning_rate": 2.468584900291618e-05, "loss": 0.0297, "step": 21008 }, { "epoch": 9.767549976754998, "grad_norm": 2.148101806640625, "learning_rate": 2.4607317067204438e-05, "loss": 0.0385, "step": 21010 }, { "epoch": 9.768479776847977, "grad_norm": 0.4124605059623718, "learning_rate": 2.4528789007114885e-05, "loss": 0.0105, "step": 21012 }, { "epoch": 9.769409576940959, "grad_norm": 0.6067842245101929, "learning_rate": 2.445026559768742e-05, "loss": 0.0166, "step": 21014 }, { "epoch": 9.770339377033938, "grad_norm": 0.5913342237472534, "learning_rate": 2.4371747613916572e-05, "loss": 0.0195, "step": 21016 }, { "epoch": 9.771269177126918, "grad_norm": 1.5166305303573608, "learning_rate": 2.429323583074314e-05, "loss": 0.0318, "step": 21018 }, { "epoch": 9.772198977219897, "grad_norm": 0.728155791759491, "learning_rate": 2.42147310230469e-05, "loss": 0.0131, "step": 21020 }, { "epoch": 9.773128777312877, "grad_norm": 0.6778787970542908, "learning_rate": 2.4136233965638263e-05, "loss": 0.016, "step": 21022 }, { "epoch": 9.774058577405858, "grad_norm": 0.9047334790229797, "learning_rate": 2.4057745433251665e-05, "loss": 0.0238, "step": 21024 }, { "epoch": 9.774988377498838, "grad_norm": 0.7832279205322266, "learning_rate": 2.3979266200537418e-05, "loss": 0.0245, "step": 21026 }, { "epoch": 9.775918177591818, "grad_norm": 1.1314641237258911, "learning_rate": 2.390079704205333e-05, "loss": 0.0209, "step": 21028 }, { "epoch": 9.776847977684797, "grad_norm": 1.0061947107315063, "learning_rate": 2.3822338732258842e-05, "loss": 0.0156, "step": 21030 }, { "epoch": 9.777777777777779, "grad_norm": 1.0614219903945923, "learning_rate": 2.3743892045505808e-05, "loss": 0.0255, "step": 21032 }, { "epoch": 9.778707577870758, "grad_norm": 1.622346043586731, "learning_rate": 2.3665457756030995e-05, "loss": 0.0219, "step": 21034 }, { "epoch": 9.779637377963738, "grad_norm": 0.27204376459121704, "learning_rate": 2.3587036637949357e-05, "loss": 0.0154, "step": 21036 }, { "epoch": 9.780567178056717, "grad_norm": 1.6348055601119995, "learning_rate": 2.3508629465245667e-05, "loss": 0.0237, "step": 21038 }, { "epoch": 9.781496978149697, "grad_norm": 0.7717455625534058, "learning_rate": 2.3430237011767235e-05, "loss": 0.0365, "step": 21040 }, { "epoch": 9.782426778242678, "grad_norm": 0.9094606041908264, "learning_rate": 2.3351860051215583e-05, "loss": 0.0275, "step": 21042 }, { "epoch": 9.783356578335658, "grad_norm": 2.2817814350128174, "learning_rate": 2.3273499357139842e-05, "loss": 0.0301, "step": 21044 }, { "epoch": 9.784286378428638, "grad_norm": 1.5725266933441162, "learning_rate": 2.319515570292861e-05, "loss": 0.0594, "step": 21046 }, { "epoch": 9.785216178521617, "grad_norm": 1.048722267150879, "learning_rate": 2.3116829861801764e-05, "loss": 0.0128, "step": 21048 }, { "epoch": 9.786145978614599, "grad_norm": 0.7701202630996704, "learning_rate": 2.303852260680392e-05, "loss": 0.0149, "step": 21050 }, { "epoch": 9.787075778707578, "grad_norm": 0.20296135544776917, "learning_rate": 2.2960234710796063e-05, "loss": 0.0079, "step": 21052 }, { "epoch": 9.788005578800558, "grad_norm": 0.8897780179977417, "learning_rate": 2.288196694644813e-05, "loss": 0.0121, "step": 21054 }, { "epoch": 9.788935378893537, "grad_norm": 0.6611125469207764, "learning_rate": 2.280372008623135e-05, "loss": 0.0204, "step": 21056 }, { "epoch": 9.789865178986517, "grad_norm": 1.1970634460449219, "learning_rate": 2.2725494902410638e-05, "loss": 0.0227, "step": 21058 }, { "epoch": 9.790794979079498, "grad_norm": 0.6438745260238647, "learning_rate": 2.2647292167037168e-05, "loss": 0.0086, "step": 21060 }, { "epoch": 9.791724779172478, "grad_norm": 0.9091984629631042, "learning_rate": 2.2569112651940006e-05, "loss": 0.0279, "step": 21062 }, { "epoch": 9.792654579265458, "grad_norm": 0.4828213155269623, "learning_rate": 2.249095712871958e-05, "loss": 0.0142, "step": 21064 }, { "epoch": 9.793584379358437, "grad_norm": 1.2774615287780762, "learning_rate": 2.2412826368739528e-05, "loss": 0.0274, "step": 21066 }, { "epoch": 9.794514179451419, "grad_norm": 0.4891436994075775, "learning_rate": 2.2334721143118555e-05, "loss": 0.014, "step": 21068 }, { "epoch": 9.795443979544398, "grad_norm": 1.138724684715271, "learning_rate": 2.2256642222723883e-05, "loss": 0.019, "step": 21070 }, { "epoch": 9.796373779637378, "grad_norm": 1.236178994178772, "learning_rate": 2.2178590378162932e-05, "loss": 0.0187, "step": 21072 }, { "epoch": 9.797303579730357, "grad_norm": 1.9555003643035889, "learning_rate": 2.210056637977608e-05, "loss": 0.0298, "step": 21074 }, { "epoch": 9.798233379823339, "grad_norm": 0.6787943840026855, "learning_rate": 2.202257099762833e-05, "loss": 0.0214, "step": 21076 }, { "epoch": 9.799163179916318, "grad_norm": 0.9665157198905945, "learning_rate": 2.19446050015028e-05, "loss": 0.0129, "step": 21078 }, { "epoch": 9.800092980009298, "grad_norm": 1.1195839643478394, "learning_rate": 2.1866669160892567e-05, "loss": 0.0163, "step": 21080 }, { "epoch": 9.801022780102278, "grad_norm": 0.79791659116745, "learning_rate": 2.1788764244992383e-05, "loss": 0.031, "step": 21082 }, { "epoch": 9.801952580195259, "grad_norm": 0.7657071948051453, "learning_rate": 2.171089102269285e-05, "loss": 0.032, "step": 21084 }, { "epoch": 9.802882380288239, "grad_norm": 0.764632523059845, "learning_rate": 2.163305026257124e-05, "loss": 0.0121, "step": 21086 }, { "epoch": 9.803812180381218, "grad_norm": 0.7318683862686157, "learning_rate": 2.155524273288406e-05, "loss": 0.0222, "step": 21088 }, { "epoch": 9.804741980474198, "grad_norm": 2.1022627353668213, "learning_rate": 2.1477469201560408e-05, "loss": 0.0853, "step": 21090 }, { "epoch": 9.805671780567177, "grad_norm": 1.6846503019332886, "learning_rate": 2.139973043619363e-05, "loss": 0.0364, "step": 21092 }, { "epoch": 9.806601580660159, "grad_norm": 0.9351772665977478, "learning_rate": 2.1322027204034137e-05, "loss": 0.0159, "step": 21094 }, { "epoch": 9.807531380753138, "grad_norm": 0.8814836740493774, "learning_rate": 2.1244360271981104e-05, "loss": 0.0116, "step": 21096 }, { "epoch": 9.808461180846118, "grad_norm": 1.2835909128189087, "learning_rate": 2.1166730406575978e-05, "loss": 0.0269, "step": 21098 }, { "epoch": 9.809390980939098, "grad_norm": 0.5557108521461487, "learning_rate": 2.108913837399436e-05, "loss": 0.0138, "step": 21100 }, { "epoch": 9.810320781032079, "grad_norm": 1.8327653408050537, "learning_rate": 2.1011584940037934e-05, "loss": 0.0187, "step": 21102 }, { "epoch": 9.811250581125059, "grad_norm": 0.31121209263801575, "learning_rate": 2.0934070870127963e-05, "loss": 0.0174, "step": 21104 }, { "epoch": 9.812180381218038, "grad_norm": 0.42354437708854675, "learning_rate": 2.085659692929702e-05, "loss": 0.0146, "step": 21106 }, { "epoch": 9.813110181311018, "grad_norm": 0.9022510647773743, "learning_rate": 2.07791638821818e-05, "loss": 0.0149, "step": 21108 }, { "epoch": 9.814039981403997, "grad_norm": 0.7031683325767517, "learning_rate": 2.0701772493014696e-05, "loss": 0.0094, "step": 21110 }, { "epoch": 9.814969781496979, "grad_norm": 1.4712706804275513, "learning_rate": 2.0624423525617996e-05, "loss": 0.0234, "step": 21112 }, { "epoch": 9.815899581589958, "grad_norm": 0.7404355406761169, "learning_rate": 2.054711774339478e-05, "loss": 0.023, "step": 21114 }, { "epoch": 9.816829381682938, "grad_norm": 0.7482485771179199, "learning_rate": 2.0469855909321564e-05, "loss": 0.019, "step": 21116 }, { "epoch": 9.817759181775918, "grad_norm": 0.6501482725143433, "learning_rate": 2.0392638785941627e-05, "loss": 0.0144, "step": 21118 }, { "epoch": 9.818688981868899, "grad_norm": 2.164371967315674, "learning_rate": 2.0315467135356978e-05, "loss": 0.0291, "step": 21120 }, { "epoch": 9.819618781961879, "grad_norm": 0.7803456783294678, "learning_rate": 2.0238341719220308e-05, "loss": 0.0336, "step": 21122 }, { "epoch": 9.820548582054858, "grad_norm": 0.9960726499557495, "learning_rate": 2.016126329872851e-05, "loss": 0.0157, "step": 21124 }, { "epoch": 9.821478382147838, "grad_norm": 0.7922543287277222, "learning_rate": 2.0084232634614476e-05, "loss": 0.0204, "step": 21126 }, { "epoch": 9.822408182240817, "grad_norm": 0.3833554685115814, "learning_rate": 2.000725048713993e-05, "loss": 0.0195, "step": 21128 }, { "epoch": 9.823337982333799, "grad_norm": 0.30819275975227356, "learning_rate": 1.993031761608726e-05, "loss": 0.0102, "step": 21130 }, { "epoch": 9.824267782426778, "grad_norm": 0.8450417518615723, "learning_rate": 1.9853434780753e-05, "loss": 0.018, "step": 21132 }, { "epoch": 9.825197582519758, "grad_norm": 0.37118637561798096, "learning_rate": 1.9776602739939877e-05, "loss": 0.008, "step": 21134 }, { "epoch": 9.826127382612738, "grad_norm": 1.1623915433883667, "learning_rate": 1.9699822251948592e-05, "loss": 0.0164, "step": 21136 }, { "epoch": 9.827057182705719, "grad_norm": 1.738770842552185, "learning_rate": 1.9623094074572085e-05, "loss": 0.0445, "step": 21138 }, { "epoch": 9.827986982798699, "grad_norm": 1.3166661262512207, "learning_rate": 1.954641896508632e-05, "loss": 0.0241, "step": 21140 }, { "epoch": 9.828916782891678, "grad_norm": 1.6902695894241333, "learning_rate": 1.946979768024384e-05, "loss": 0.0331, "step": 21142 }, { "epoch": 9.829846582984658, "grad_norm": 0.9820387959480286, "learning_rate": 1.939323097626545e-05, "loss": 0.0232, "step": 21144 }, { "epoch": 9.83077638307764, "grad_norm": 1.716986060142517, "learning_rate": 1.931671960883378e-05, "loss": 0.0226, "step": 21146 }, { "epoch": 9.831706183170619, "grad_norm": 1.6406487226486206, "learning_rate": 1.9240264333085313e-05, "loss": 0.0159, "step": 21148 }, { "epoch": 9.832635983263598, "grad_norm": 0.8316081166267395, "learning_rate": 1.9163865903602404e-05, "loss": 0.0192, "step": 21150 }, { "epoch": 9.833565783356578, "grad_norm": 1.097002387046814, "learning_rate": 1.9087525074406882e-05, "loss": 0.0188, "step": 21152 }, { "epoch": 9.834495583449558, "grad_norm": 1.3871139287948608, "learning_rate": 1.9011242598952088e-05, "loss": 0.0266, "step": 21154 }, { "epoch": 9.835425383542539, "grad_norm": 0.6021047234535217, "learning_rate": 1.893501923011491e-05, "loss": 0.0092, "step": 21156 }, { "epoch": 9.836355183635519, "grad_norm": 0.3519536554813385, "learning_rate": 1.88588557201894e-05, "loss": 0.0098, "step": 21158 }, { "epoch": 9.837284983728498, "grad_norm": 0.9766266942024231, "learning_rate": 1.8782752820878655e-05, "loss": 0.0289, "step": 21160 }, { "epoch": 9.838214783821478, "grad_norm": 0.3449132740497589, "learning_rate": 1.8706711283287728e-05, "loss": 0.0074, "step": 21162 }, { "epoch": 9.83914458391446, "grad_norm": 0.8526092171669006, "learning_rate": 1.863073185791539e-05, "loss": 0.0294, "step": 21164 }, { "epoch": 9.840074384007439, "grad_norm": 1.9162706136703491, "learning_rate": 1.8554815294648412e-05, "loss": 0.0296, "step": 21166 }, { "epoch": 9.841004184100418, "grad_norm": 0.6562959551811218, "learning_rate": 1.8478962342752617e-05, "loss": 0.012, "step": 21168 }, { "epoch": 9.841933984193398, "grad_norm": 0.4830157458782196, "learning_rate": 1.8403173750865678e-05, "loss": 0.0127, "step": 21170 }, { "epoch": 9.84286378428638, "grad_norm": 1.195530652999878, "learning_rate": 1.8327450266990572e-05, "loss": 0.0248, "step": 21172 }, { "epoch": 9.843793584379359, "grad_norm": 0.7517695426940918, "learning_rate": 1.825179263848769e-05, "loss": 0.0151, "step": 21174 }, { "epoch": 9.844723384472339, "grad_norm": 0.8628256916999817, "learning_rate": 1.817620161206693e-05, "loss": 0.0117, "step": 21176 }, { "epoch": 9.845653184565318, "grad_norm": 1.158907413482666, "learning_rate": 1.810067793378138e-05, "loss": 0.0229, "step": 21178 }, { "epoch": 9.846582984658298, "grad_norm": 1.0594547986984253, "learning_rate": 1.8025222349019253e-05, "loss": 0.0165, "step": 21180 }, { "epoch": 9.84751278475128, "grad_norm": 1.4142855405807495, "learning_rate": 1.794983560249688e-05, "loss": 0.0402, "step": 21182 }, { "epoch": 9.848442584844259, "grad_norm": 0.33009228110313416, "learning_rate": 1.7874518438250675e-05, "loss": 0.0109, "step": 21184 }, { "epoch": 9.849372384937238, "grad_norm": 0.6096293330192566, "learning_rate": 1.779927159963081e-05, "loss": 0.0127, "step": 21186 }, { "epoch": 9.850302185030218, "grad_norm": 1.3879303932189941, "learning_rate": 1.7724095829293372e-05, "loss": 0.0148, "step": 21188 }, { "epoch": 9.8512319851232, "grad_norm": 0.442330926656723, "learning_rate": 1.764899186919253e-05, "loss": 0.0086, "step": 21190 }, { "epoch": 9.852161785216179, "grad_norm": 1.0396596193313599, "learning_rate": 1.7573960460574052e-05, "loss": 0.03, "step": 21192 }, { "epoch": 9.853091585309159, "grad_norm": 0.4308894872665405, "learning_rate": 1.749900234396798e-05, "loss": 0.0142, "step": 21194 }, { "epoch": 9.854021385402138, "grad_norm": 0.48039552569389343, "learning_rate": 1.7424118259180668e-05, "loss": 0.0193, "step": 21196 }, { "epoch": 9.854951185495118, "grad_norm": 0.903914749622345, "learning_rate": 1.7349308945287463e-05, "loss": 0.0227, "step": 21198 }, { "epoch": 9.855880985588099, "grad_norm": 0.49028822779655457, "learning_rate": 1.727457514062626e-05, "loss": 0.0118, "step": 21200 }, { "epoch": 9.856810785681079, "grad_norm": 0.9401408433914185, "learning_rate": 1.7199917582789707e-05, "loss": 0.0243, "step": 21202 }, { "epoch": 9.857740585774058, "grad_norm": 1.2338136434555054, "learning_rate": 1.712533700861742e-05, "loss": 0.0252, "step": 21204 }, { "epoch": 9.858670385867038, "grad_norm": 0.855456531047821, "learning_rate": 1.705083415418973e-05, "loss": 0.0473, "step": 21206 }, { "epoch": 9.85960018596002, "grad_norm": 0.7976994514465332, "learning_rate": 1.6976409754819896e-05, "loss": 0.0321, "step": 21208 }, { "epoch": 9.860529986052999, "grad_norm": 0.660814642906189, "learning_rate": 1.690206454504636e-05, "loss": 0.0244, "step": 21210 }, { "epoch": 9.861459786145979, "grad_norm": 0.5001892447471619, "learning_rate": 1.6827799258626493e-05, "loss": 0.0168, "step": 21212 }, { "epoch": 9.862389586238958, "grad_norm": 0.5525653958320618, "learning_rate": 1.6753614628528693e-05, "loss": 0.0094, "step": 21214 }, { "epoch": 9.863319386331938, "grad_norm": 0.5247082710266113, "learning_rate": 1.667951138692548e-05, "loss": 0.0103, "step": 21216 }, { "epoch": 9.864249186424919, "grad_norm": 0.9914645552635193, "learning_rate": 1.6605490265185424e-05, "loss": 0.015, "step": 21218 }, { "epoch": 9.865178986517899, "grad_norm": 0.2955265939235687, "learning_rate": 1.653155199386762e-05, "loss": 0.0181, "step": 21220 }, { "epoch": 9.866108786610878, "grad_norm": 1.1493695974349976, "learning_rate": 1.645769730271295e-05, "loss": 0.0277, "step": 21222 }, { "epoch": 9.867038586703858, "grad_norm": 0.3204687237739563, "learning_rate": 1.638392692063707e-05, "loss": 0.0137, "step": 21224 }, { "epoch": 9.86796838679684, "grad_norm": 1.8571516275405884, "learning_rate": 1.631024157572403e-05, "loss": 0.0299, "step": 21226 }, { "epoch": 9.868898186889819, "grad_norm": 0.41888442635536194, "learning_rate": 1.6236641995218446e-05, "loss": 0.007, "step": 21228 }, { "epoch": 9.869827986982799, "grad_norm": 0.5397762656211853, "learning_rate": 1.6163128905518622e-05, "loss": 0.0598, "step": 21230 }, { "epoch": 9.870757787075778, "grad_norm": 0.6807245016098022, "learning_rate": 1.6089703032168744e-05, "loss": 0.0104, "step": 21232 }, { "epoch": 9.87168758716876, "grad_norm": 1.0204553604125977, "learning_rate": 1.6016365099852705e-05, "loss": 0.0131, "step": 21234 }, { "epoch": 9.872617387261739, "grad_norm": 1.305677890777588, "learning_rate": 1.5943115832386458e-05, "loss": 0.0373, "step": 21236 }, { "epoch": 9.873547187354719, "grad_norm": 0.5652944445610046, "learning_rate": 1.586995595271037e-05, "loss": 0.0096, "step": 21238 }, { "epoch": 9.874476987447698, "grad_norm": 0.2560262978076935, "learning_rate": 1.579688618288308e-05, "loss": 0.0232, "step": 21240 }, { "epoch": 9.87540678754068, "grad_norm": 1.1853536367416382, "learning_rate": 1.572390724407388e-05, "loss": 0.0157, "step": 21242 }, { "epoch": 9.87633658763366, "grad_norm": 0.6543559432029724, "learning_rate": 1.565101985655511e-05, "loss": 0.0107, "step": 21244 }, { "epoch": 9.877266387726639, "grad_norm": 0.5997948050498962, "learning_rate": 1.5578224739695853e-05, "loss": 0.0241, "step": 21246 }, { "epoch": 9.878196187819619, "grad_norm": 1.5778865814208984, "learning_rate": 1.5505522611954856e-05, "loss": 0.0172, "step": 21248 }, { "epoch": 9.879125987912598, "grad_norm": 1.0293285846710205, "learning_rate": 1.5432914190872767e-05, "loss": 0.0142, "step": 21250 }, { "epoch": 9.88005578800558, "grad_norm": 0.3231503665447235, "learning_rate": 1.536040019306506e-05, "loss": 0.0113, "step": 21252 }, { "epoch": 9.880985588098559, "grad_norm": 1.4508057832717896, "learning_rate": 1.5287981334215784e-05, "loss": 0.0127, "step": 21254 }, { "epoch": 9.881915388191539, "grad_norm": 1.3046531677246094, "learning_rate": 1.5215658329070002e-05, "loss": 0.0163, "step": 21256 }, { "epoch": 9.882845188284518, "grad_norm": 0.6350612044334412, "learning_rate": 1.5143431891426248e-05, "loss": 0.0112, "step": 21258 }, { "epoch": 9.8837749883775, "grad_norm": 0.9349697232246399, "learning_rate": 1.5071302734130474e-05, "loss": 0.016, "step": 21260 }, { "epoch": 9.88470478847048, "grad_norm": 0.8442132472991943, "learning_rate": 1.4999271569068497e-05, "loss": 0.0172, "step": 21262 }, { "epoch": 9.885634588563459, "grad_norm": 0.18410788476467133, "learning_rate": 1.4927339107158516e-05, "loss": 0.0066, "step": 21264 }, { "epoch": 9.886564388656438, "grad_norm": 1.3518264293670654, "learning_rate": 1.4855506058345046e-05, "loss": 0.0177, "step": 21266 }, { "epoch": 9.887494188749418, "grad_norm": 0.5789269208908081, "learning_rate": 1.4783773131591283e-05, "loss": 0.0095, "step": 21268 }, { "epoch": 9.8884239888424, "grad_norm": 0.7802212238311768, "learning_rate": 1.4712141034872416e-05, "loss": 0.0088, "step": 21270 }, { "epoch": 9.889353788935379, "grad_norm": 0.604471743106842, "learning_rate": 1.4640610475167996e-05, "loss": 0.0137, "step": 21272 }, { "epoch": 9.890283589028359, "grad_norm": 0.5086084604263306, "learning_rate": 1.4569182158455768e-05, "loss": 0.0128, "step": 21274 }, { "epoch": 9.891213389121338, "grad_norm": 1.3591594696044922, "learning_rate": 1.4497856789704868e-05, "loss": 0.0182, "step": 21276 }, { "epoch": 9.89214318921432, "grad_norm": 0.35414233803749084, "learning_rate": 1.4426635072867408e-05, "loss": 0.0077, "step": 21278 }, { "epoch": 9.8930729893073, "grad_norm": 0.3595898449420929, "learning_rate": 1.4355517710873138e-05, "loss": 0.0073, "step": 21280 }, { "epoch": 9.894002789400279, "grad_norm": 0.9452372789382935, "learning_rate": 1.428450540562171e-05, "loss": 0.0157, "step": 21282 }, { "epoch": 9.894932589493258, "grad_norm": 1.0617755651474, "learning_rate": 1.4213598857976068e-05, "loss": 0.0185, "step": 21284 }, { "epoch": 9.895862389586238, "grad_norm": 0.5060895681381226, "learning_rate": 1.4142798767754894e-05, "loss": 0.0111, "step": 21286 }, { "epoch": 9.89679218967922, "grad_norm": 1.1655324697494507, "learning_rate": 1.4072105833726652e-05, "loss": 0.0327, "step": 21288 }, { "epoch": 9.897721989772199, "grad_norm": 1.4186631441116333, "learning_rate": 1.400152075360222e-05, "loss": 0.0219, "step": 21290 }, { "epoch": 9.898651789865179, "grad_norm": 1.238330602645874, "learning_rate": 1.3931044224027524e-05, "loss": 0.0224, "step": 21292 }, { "epoch": 9.899581589958158, "grad_norm": 0.52821284532547, "learning_rate": 1.386067694057762e-05, "loss": 0.0118, "step": 21294 }, { "epoch": 9.90051139005114, "grad_norm": 0.3006243109703064, "learning_rate": 1.3790419597749343e-05, "loss": 0.0239, "step": 21296 }, { "epoch": 9.90144119014412, "grad_norm": 0.4743809401988983, "learning_rate": 1.3720272888953988e-05, "loss": 0.0287, "step": 21298 }, { "epoch": 9.902370990237099, "grad_norm": 0.7003999352455139, "learning_rate": 1.365023750651141e-05, "loss": 0.0105, "step": 21300 }, { "epoch": 9.903300790330078, "grad_norm": 0.4949309825897217, "learning_rate": 1.3580314141642397e-05, "loss": 0.0256, "step": 21302 }, { "epoch": 9.90423059042306, "grad_norm": 0.8394972085952759, "learning_rate": 1.351050348446281e-05, "loss": 0.0105, "step": 21304 }, { "epoch": 9.90516039051604, "grad_norm": 1.0735681056976318, "learning_rate": 1.3440806223975176e-05, "loss": 0.0127, "step": 21306 }, { "epoch": 9.906090190609019, "grad_norm": 0.7730695009231567, "learning_rate": 1.337122304806348e-05, "loss": 0.024, "step": 21308 }, { "epoch": 9.907019990701999, "grad_norm": 0.3558708131313324, "learning_rate": 1.3301754643485724e-05, "loss": 0.0116, "step": 21310 }, { "epoch": 9.907949790794978, "grad_norm": 0.7878754734992981, "learning_rate": 1.3232401695866706e-05, "loss": 0.0123, "step": 21312 }, { "epoch": 9.90887959088796, "grad_norm": 0.19865736365318298, "learning_rate": 1.3163164889692184e-05, "loss": 0.0054, "step": 21314 }, { "epoch": 9.90980939098094, "grad_norm": 0.5098492503166199, "learning_rate": 1.3094044908301474e-05, "loss": 0.0092, "step": 21316 }, { "epoch": 9.910739191073919, "grad_norm": 0.25896233320236206, "learning_rate": 1.3025042433881048e-05, "loss": 0.0154, "step": 21318 }, { "epoch": 9.911668991166898, "grad_norm": 0.2821354568004608, "learning_rate": 1.2956158147457155e-05, "loss": 0.0066, "step": 21320 }, { "epoch": 9.91259879125988, "grad_norm": 0.569591224193573, "learning_rate": 1.2887392728890052e-05, "loss": 0.0089, "step": 21322 }, { "epoch": 9.91352859135286, "grad_norm": 0.6455589532852173, "learning_rate": 1.2818746856866814e-05, "loss": 0.0093, "step": 21324 }, { "epoch": 9.914458391445839, "grad_norm": 0.6467027068138123, "learning_rate": 1.2750221208894174e-05, "loss": 0.0139, "step": 21326 }, { "epoch": 9.915388191538819, "grad_norm": 0.6991382241249084, "learning_rate": 1.268181646129261e-05, "loss": 0.0312, "step": 21328 }, { "epoch": 9.9163179916318, "grad_norm": 1.2078638076782227, "learning_rate": 1.2613533289189832e-05, "loss": 0.0172, "step": 21330 }, { "epoch": 9.91724779172478, "grad_norm": 0.7173454761505127, "learning_rate": 1.2545372366512718e-05, "loss": 0.0122, "step": 21332 }, { "epoch": 9.91817759181776, "grad_norm": 1.5630736351013184, "learning_rate": 1.2477334365982206e-05, "loss": 0.0332, "step": 21334 }, { "epoch": 9.919107391910739, "grad_norm": 0.46987032890319824, "learning_rate": 1.24094199591059e-05, "loss": 0.0106, "step": 21336 }, { "epoch": 9.920037192003718, "grad_norm": 1.515573501586914, "learning_rate": 1.2341629816171727e-05, "loss": 0.0128, "step": 21338 }, { "epoch": 9.9209669920967, "grad_norm": 0.12940743565559387, "learning_rate": 1.2273964606240723e-05, "loss": 0.006, "step": 21340 }, { "epoch": 9.92189679218968, "grad_norm": 1.5048452615737915, "learning_rate": 1.2206424997141349e-05, "loss": 0.0174, "step": 21342 }, { "epoch": 9.922826592282659, "grad_norm": 2.6828103065490723, "learning_rate": 1.2139011655462428e-05, "loss": 0.0302, "step": 21344 }, { "epoch": 9.923756392375639, "grad_norm": 1.1702700853347778, "learning_rate": 1.2071725246546124e-05, "loss": 0.0232, "step": 21346 }, { "epoch": 9.92468619246862, "grad_norm": 0.6794484257698059, "learning_rate": 1.2004566434482285e-05, "loss": 0.019, "step": 21348 }, { "epoch": 9.9256159925616, "grad_norm": 0.4254205822944641, "learning_rate": 1.1937535882101417e-05, "loss": 0.0114, "step": 21350 }, { "epoch": 9.92654579265458, "grad_norm": 1.1053928136825562, "learning_rate": 1.1870634250967711e-05, "loss": 0.0246, "step": 21352 }, { "epoch": 9.927475592747559, "grad_norm": 0.2443574070930481, "learning_rate": 1.1803862201373411e-05, "loss": 0.0166, "step": 21354 }, { "epoch": 9.928405392840538, "grad_norm": 0.8445461988449097, "learning_rate": 1.1737220392331536e-05, "loss": 0.0141, "step": 21356 }, { "epoch": 9.92933519293352, "grad_norm": 0.7419790029525757, "learning_rate": 1.167070948157029e-05, "loss": 0.0114, "step": 21358 }, { "epoch": 9.9302649930265, "grad_norm": 0.37863728404045105, "learning_rate": 1.1604330125525055e-05, "loss": 0.0087, "step": 21360 }, { "epoch": 9.931194793119479, "grad_norm": 0.8040772676467896, "learning_rate": 1.1538082979333432e-05, "loss": 0.0119, "step": 21362 }, { "epoch": 9.932124593212459, "grad_norm": 0.3608481287956238, "learning_rate": 1.1471968696828156e-05, "loss": 0.0096, "step": 21364 }, { "epoch": 9.93305439330544, "grad_norm": 0.8708871006965637, "learning_rate": 1.1405987930530208e-05, "loss": 0.0171, "step": 21366 }, { "epoch": 9.93398419339842, "grad_norm": 0.70891273021698, "learning_rate": 1.134014133164326e-05, "loss": 0.0104, "step": 21368 }, { "epoch": 9.9349139934914, "grad_norm": 0.5964924097061157, "learning_rate": 1.1274429550046661e-05, "loss": 0.0144, "step": 21370 }, { "epoch": 9.935843793584379, "grad_norm": 0.3371500074863434, "learning_rate": 1.1208853234289315e-05, "loss": 0.0074, "step": 21372 }, { "epoch": 9.936773593677358, "grad_norm": 0.2075449675321579, "learning_rate": 1.1143413031582685e-05, "loss": 0.0087, "step": 21374 }, { "epoch": 9.93770339377034, "grad_norm": 1.068024754524231, "learning_rate": 1.1078109587795305e-05, "loss": 0.0131, "step": 21376 }, { "epoch": 9.93863319386332, "grad_norm": 0.24172353744506836, "learning_rate": 1.1012943547445946e-05, "loss": 0.0109, "step": 21378 }, { "epoch": 9.939562993956299, "grad_norm": 0.5106493234634399, "learning_rate": 1.0947915553696822e-05, "loss": 0.0142, "step": 21380 }, { "epoch": 9.940492794049279, "grad_norm": 1.3645174503326416, "learning_rate": 1.0883026248348127e-05, "loss": 0.0196, "step": 21382 }, { "epoch": 9.94142259414226, "grad_norm": 1.051592469215393, "learning_rate": 1.0818276271831108e-05, "loss": 0.0086, "step": 21384 }, { "epoch": 9.94235239423524, "grad_norm": 1.2543137073516846, "learning_rate": 1.0753666263201896e-05, "loss": 0.0281, "step": 21386 }, { "epoch": 9.94328219432822, "grad_norm": 1.0424449443817139, "learning_rate": 1.0689196860135185e-05, "loss": 0.0128, "step": 21388 }, { "epoch": 9.944211994421199, "grad_norm": 0.5747120380401611, "learning_rate": 1.062486869891796e-05, "loss": 0.014, "step": 21390 }, { "epoch": 9.94514179451418, "grad_norm": 1.1732151508331299, "learning_rate": 1.0560682414443348e-05, "loss": 0.0127, "step": 21392 }, { "epoch": 9.94607159460716, "grad_norm": 2.086686372756958, "learning_rate": 1.0496638640203772e-05, "loss": 0.0234, "step": 21394 }, { "epoch": 9.94700139470014, "grad_norm": 0.3637048006057739, "learning_rate": 1.0432738008285571e-05, "loss": 0.0092, "step": 21396 }, { "epoch": 9.947931194793119, "grad_norm": 0.6946988105773926, "learning_rate": 1.036898114936233e-05, "loss": 0.0178, "step": 21398 }, { "epoch": 9.9488609948861, "grad_norm": 0.8359127640724182, "learning_rate": 1.0305368692688225e-05, "loss": 0.0122, "step": 21400 }, { "epoch": 9.94979079497908, "grad_norm": 0.7976959943771362, "learning_rate": 1.0241901266092658e-05, "loss": 0.0091, "step": 21402 }, { "epoch": 9.95072059507206, "grad_norm": 0.2963663637638092, "learning_rate": 1.0178579495973507e-05, "loss": 0.0069, "step": 21404 }, { "epoch": 9.95165039516504, "grad_norm": 0.8207062482833862, "learning_rate": 1.0115404007291228e-05, "loss": 0.0131, "step": 21406 }, { "epoch": 9.952580195258019, "grad_norm": 0.8815699815750122, "learning_rate": 1.0052375423562095e-05, "loss": 0.0169, "step": 21408 }, { "epoch": 9.953509995351, "grad_norm": 0.8991329073905945, "learning_rate": 9.989494366852792e-06, "loss": 0.0117, "step": 21410 }, { "epoch": 9.95443979544398, "grad_norm": 0.1434587836265564, "learning_rate": 9.926761457774385e-06, "loss": 0.0066, "step": 21412 }, { "epoch": 9.95536959553696, "grad_norm": 0.24034561216831207, "learning_rate": 9.864177315474936e-06, "loss": 0.0055, "step": 21414 }, { "epoch": 9.956299395629939, "grad_norm": 1.503424048423767, "learning_rate": 9.801742557634804e-06, "loss": 0.0159, "step": 21416 }, { "epoch": 9.95722919572292, "grad_norm": 0.8753485083580017, "learning_rate": 9.739457800459987e-06, "loss": 0.0184, "step": 21418 }, { "epoch": 9.9581589958159, "grad_norm": 1.2181833982467651, "learning_rate": 9.67732365867561e-06, "loss": 0.0412, "step": 21420 }, { "epoch": 9.95908879590888, "grad_norm": 1.0637651681900024, "learning_rate": 9.615340745520702e-06, "loss": 0.0099, "step": 21422 }, { "epoch": 9.96001859600186, "grad_norm": 0.6067829728126526, "learning_rate": 9.553509672741599e-06, "loss": 0.0136, "step": 21424 }, { "epoch": 9.960948396094839, "grad_norm": 0.42348384857177734, "learning_rate": 9.491831050586166e-06, "loss": 0.0165, "step": 21426 }, { "epoch": 9.96187819618782, "grad_norm": 0.47278350591659546, "learning_rate": 9.430305487797225e-06, "loss": 0.012, "step": 21428 }, { "epoch": 9.9628079962808, "grad_norm": 0.266225665807724, "learning_rate": 9.368933591607377e-06, "loss": 0.0095, "step": 21430 }, { "epoch": 9.96373779637378, "grad_norm": 1.0793190002441406, "learning_rate": 9.307715967732601e-06, "loss": 0.0152, "step": 21432 }, { "epoch": 9.964667596466759, "grad_norm": 1.3987677097320557, "learning_rate": 9.246653220365856e-06, "loss": 0.0326, "step": 21434 }, { "epoch": 9.96559739655974, "grad_norm": 0.36641186475753784, "learning_rate": 9.185745952171939e-06, "loss": 0.006, "step": 21436 }, { "epoch": 9.96652719665272, "grad_norm": 0.26265060901641846, "learning_rate": 9.124994764281005e-06, "loss": 0.015, "step": 21438 }, { "epoch": 9.9674569967457, "grad_norm": 0.4352189898490906, "learning_rate": 9.064400256282748e-06, "loss": 0.028, "step": 21440 }, { "epoch": 9.96838679683868, "grad_norm": 0.9554153680801392, "learning_rate": 9.003963026220499e-06, "loss": 0.0132, "step": 21442 }, { "epoch": 9.969316596931659, "grad_norm": 0.26225024461746216, "learning_rate": 8.94368367058532e-06, "loss": 0.0058, "step": 21444 }, { "epoch": 9.97024639702464, "grad_norm": 1.6146883964538574, "learning_rate": 8.883562784310251e-06, "loss": 0.019, "step": 21446 }, { "epoch": 9.97117619711762, "grad_norm": 0.8449541330337524, "learning_rate": 8.823600960763898e-06, "loss": 0.0189, "step": 21448 }, { "epoch": 9.9721059972106, "grad_norm": 0.8544593453407288, "learning_rate": 8.763798791745384e-06, "loss": 0.0168, "step": 21450 }, { "epoch": 9.973035797303579, "grad_norm": 0.5186305046081543, "learning_rate": 8.70415686747811e-06, "loss": 0.0079, "step": 21452 }, { "epoch": 9.97396559739656, "grad_norm": 0.672985851764679, "learning_rate": 8.644675776603523e-06, "loss": 0.0233, "step": 21454 }, { "epoch": 9.97489539748954, "grad_norm": 0.25308340787887573, "learning_rate": 8.585356106176112e-06, "loss": 0.015, "step": 21456 }, { "epoch": 9.97582519758252, "grad_norm": 0.7427087426185608, "learning_rate": 8.526198441657068e-06, "loss": 0.0113, "step": 21458 }, { "epoch": 9.9767549976755, "grad_norm": 0.8502529263496399, "learning_rate": 8.467203366908798e-06, "loss": 0.023, "step": 21460 }, { "epoch": 9.97768479776848, "grad_norm": 0.5045360922813416, "learning_rate": 8.408371464188603e-06, "loss": 0.009, "step": 21462 }, { "epoch": 9.97861459786146, "grad_norm": 0.738111674785614, "learning_rate": 8.349703314143743e-06, "loss": 0.0102, "step": 21464 }, { "epoch": 9.97954439795444, "grad_norm": 1.758612871170044, "learning_rate": 8.291199495805178e-06, "loss": 0.0274, "step": 21466 }, { "epoch": 9.98047419804742, "grad_norm": 0.9569330215454102, "learning_rate": 8.232860586581976e-06, "loss": 0.0086, "step": 21468 }, { "epoch": 9.981403998140399, "grad_norm": 0.2515107989311218, "learning_rate": 8.174687162255613e-06, "loss": 0.0055, "step": 21470 }, { "epoch": 9.98233379823338, "grad_norm": 1.138981819152832, "learning_rate": 8.116679796974437e-06, "loss": 0.0178, "step": 21472 }, { "epoch": 9.98326359832636, "grad_norm": 1.0740631818771362, "learning_rate": 8.058839063247464e-06, "loss": 0.0259, "step": 21474 }, { "epoch": 9.98419339841934, "grad_norm": 0.6939243078231812, "learning_rate": 8.001165531939511e-06, "loss": 0.0085, "step": 21476 }, { "epoch": 9.98512319851232, "grad_norm": 0.21445024013519287, "learning_rate": 7.943659772265054e-06, "loss": 0.0223, "step": 21478 }, { "epoch": 9.9860529986053, "grad_norm": 0.6178061962127686, "learning_rate": 7.886322351782847e-06, "loss": 0.0128, "step": 21480 }, { "epoch": 9.98698279869828, "grad_norm": 0.7846469283103943, "learning_rate": 7.829153836389832e-06, "loss": 0.0098, "step": 21482 }, { "epoch": 9.98791259879126, "grad_norm": 1.7662298679351807, "learning_rate": 7.772154790316296e-06, "loss": 0.0238, "step": 21484 }, { "epoch": 9.98884239888424, "grad_norm": 0.2560727894306183, "learning_rate": 7.715325776119949e-06, "loss": 0.007, "step": 21486 }, { "epoch": 9.98977219897722, "grad_norm": 0.6322963833808899, "learning_rate": 7.658667354679954e-06, "loss": 0.0168, "step": 21488 }, { "epoch": 9.9907019990702, "grad_norm": 0.5269032716751099, "learning_rate": 7.602180085192192e-06, "loss": 0.0111, "step": 21490 }, { "epoch": 9.99163179916318, "grad_norm": 0.35745200514793396, "learning_rate": 7.545864525163079e-06, "loss": 0.0195, "step": 21492 }, { "epoch": 9.99256159925616, "grad_norm": 0.42703142762184143, "learning_rate": 7.489721230404837e-06, "loss": 0.009, "step": 21494 }, { "epoch": 9.99349139934914, "grad_norm": 0.3475998640060425, "learning_rate": 7.433750755028737e-06, "loss": 0.0055, "step": 21496 }, { "epoch": 9.99442119944212, "grad_norm": 0.25793686509132385, "learning_rate": 7.377953651440903e-06, "loss": 0.0185, "step": 21498 }, { "epoch": 9.9953509995351, "grad_norm": 0.9724370837211609, "learning_rate": 7.322330470336347e-06, "loss": 0.0245, "step": 21500 }, { "epoch": 9.99628079962808, "grad_norm": 0.32585445046424866, "learning_rate": 7.2668817606931615e-06, "loss": 0.0057, "step": 21502 }, { "epoch": 9.99721059972106, "grad_norm": 0.3106752336025238, "learning_rate": 7.211608069767847e-06, "loss": 0.0073, "step": 21504 }, { "epoch": 9.99814039981404, "grad_norm": 0.7314226031303406, "learning_rate": 7.156509943089545e-06, "loss": 0.0121, "step": 21506 }, { "epoch": 9.99907019990702, "grad_norm": 0.07096082717180252, "learning_rate": 7.101587924454269e-06, "loss": 0.0085, "step": 21508 }, { "epoch": 10.0, "grad_norm": 0.20905353128910065, "learning_rate": 7.046842555920302e-06, "loss": 0.0095, "step": 21510 }, { "epoch": 10.00092980009298, "grad_norm": 0.4728732109069824, "learning_rate": 6.99227437780232e-06, "loss": 0.0053, "step": 21512 }, { "epoch": 10.00185960018596, "grad_norm": 0.5766304731369019, "learning_rate": 6.937883928666343e-06, "loss": 0.0059, "step": 21514 }, { "epoch": 10.00278940027894, "grad_norm": 0.29690268635749817, "learning_rate": 6.883671745323895e-06, "loss": 0.0047, "step": 21516 }, { "epoch": 10.00371920037192, "grad_norm": 0.20313331484794617, "learning_rate": 6.829638362827463e-06, "loss": 0.0047, "step": 21518 }, { "epoch": 10.0046490004649, "grad_norm": 0.10831072926521301, "learning_rate": 6.775784314464718e-06, "loss": 0.0044, "step": 21520 }, { "epoch": 10.00557880055788, "grad_norm": 0.9340137243270874, "learning_rate": 6.722110131753377e-06, "loss": 0.0122, "step": 21522 }, { "epoch": 10.00650860065086, "grad_norm": 0.3259643018245697, "learning_rate": 6.6686163444359534e-06, "loss": 0.0054, "step": 21524 }, { "epoch": 10.00743840074384, "grad_norm": 0.8241928219795227, "learning_rate": 6.615303480474529e-06, "loss": 0.0051, "step": 21526 }, { "epoch": 10.00836820083682, "grad_norm": 0.6469022035598755, "learning_rate": 6.5621720660456734e-06, "loss": 0.0161, "step": 21528 }, { "epoch": 10.0092980009298, "grad_norm": 0.18935364484786987, "learning_rate": 6.509222625534752e-06, "loss": 0.0051, "step": 21530 }, { "epoch": 10.010227801022781, "grad_norm": 0.2647024989128113, "learning_rate": 6.4564556815314865e-06, "loss": 0.0047, "step": 21532 }, { "epoch": 10.01115760111576, "grad_norm": 0.198951855301857, "learning_rate": 6.403871754824433e-06, "loss": 0.0064, "step": 21534 }, { "epoch": 10.01208740120874, "grad_norm": 1.1978093385696411, "learning_rate": 6.3514713643954805e-06, "loss": 0.0064, "step": 21536 }, { "epoch": 10.01301720130172, "grad_norm": 0.09569618850946426, "learning_rate": 6.299255027415446e-06, "loss": 0.0053, "step": 21538 }, { "epoch": 10.0139470013947, "grad_norm": 0.12395673245191574, "learning_rate": 6.247223259238611e-06, "loss": 0.0032, "step": 21540 }, { "epoch": 10.01487680148768, "grad_norm": 0.4169042706489563, "learning_rate": 6.1953765733972886e-06, "loss": 0.0064, "step": 21542 }, { "epoch": 10.01580660158066, "grad_norm": 0.20942693948745728, "learning_rate": 6.143715481597451e-06, "loss": 0.0036, "step": 21544 }, { "epoch": 10.01673640167364, "grad_norm": 0.685283899307251, "learning_rate": 6.092240493713222e-06, "loss": 0.0135, "step": 21546 }, { "epoch": 10.01766620176662, "grad_norm": 0.28225186467170715, "learning_rate": 6.040952117781948e-06, "loss": 0.0199, "step": 21548 }, { "epoch": 10.018596001859601, "grad_norm": 0.5005629658699036, "learning_rate": 5.989850859999194e-06, "loss": 0.006, "step": 21550 }, { "epoch": 10.01952580195258, "grad_norm": 0.4231517016887665, "learning_rate": 5.938937224713748e-06, "loss": 0.013, "step": 21552 }, { "epoch": 10.02045560204556, "grad_norm": 0.35766300559043884, "learning_rate": 5.888211714422742e-06, "loss": 0.0072, "step": 21554 }, { "epoch": 10.02138540213854, "grad_norm": 0.3096015453338623, "learning_rate": 5.83767482976626e-06, "loss": 0.0047, "step": 21556 }, { "epoch": 10.02231520223152, "grad_norm": 0.5908424258232117, "learning_rate": 5.787327069523067e-06, "loss": 0.0109, "step": 21558 }, { "epoch": 10.0232450023245, "grad_norm": 0.06917202472686768, "learning_rate": 5.7371689306053385e-06, "loss": 0.0063, "step": 21560 }, { "epoch": 10.02417480241748, "grad_norm": 0.16493457555770874, "learning_rate": 5.687200908053433e-06, "loss": 0.0038, "step": 21562 }, { "epoch": 10.02510460251046, "grad_norm": 1.554207444190979, "learning_rate": 5.637423495031674e-06, "loss": 0.012, "step": 21564 }, { "epoch": 10.02603440260344, "grad_norm": 0.33994466066360474, "learning_rate": 5.58783718282303e-06, "loss": 0.0059, "step": 21566 }, { "epoch": 10.026964202696421, "grad_norm": 0.12197005748748779, "learning_rate": 5.538442460824494e-06, "loss": 0.0036, "step": 21568 }, { "epoch": 10.0278940027894, "grad_norm": 0.4146116077899933, "learning_rate": 5.489239816541813e-06, "loss": 0.0046, "step": 21570 }, { "epoch": 10.02882380288238, "grad_norm": 0.2051737904548645, "learning_rate": 5.440229735585327e-06, "loss": 0.0041, "step": 21572 }, { "epoch": 10.02975360297536, "grad_norm": 0.1694311499595642, "learning_rate": 5.391412701664862e-06, "loss": 0.0051, "step": 21574 }, { "epoch": 10.030683403068341, "grad_norm": 0.48061197996139526, "learning_rate": 5.34278919658451e-06, "loss": 0.0075, "step": 21576 }, { "epoch": 10.03161320316132, "grad_norm": 0.914577305316925, "learning_rate": 5.294359700238957e-06, "loss": 0.0113, "step": 21578 }, { "epoch": 10.0325430032543, "grad_norm": 0.35158199071884155, "learning_rate": 5.246124690607673e-06, "loss": 0.0071, "step": 21580 }, { "epoch": 10.03347280334728, "grad_norm": 0.2854892611503601, "learning_rate": 5.198084643750842e-06, "loss": 0.0047, "step": 21582 }, { "epoch": 10.03440260344026, "grad_norm": 0.5021907091140747, "learning_rate": 5.1502400338041105e-06, "loss": 0.0067, "step": 21584 }, { "epoch": 10.035332403533241, "grad_norm": 0.2811710238456726, "learning_rate": 5.1025913329745745e-06, "loss": 0.0058, "step": 21586 }, { "epoch": 10.03626220362622, "grad_norm": 0.33736103773117065, "learning_rate": 5.055139011535778e-06, "loss": 0.0076, "step": 21588 }, { "epoch": 10.0371920037192, "grad_norm": 0.4657565951347351, "learning_rate": 5.007883537822766e-06, "loss": 0.0083, "step": 21590 }, { "epoch": 10.03812180381218, "grad_norm": 0.08875282853841782, "learning_rate": 4.960825378228089e-06, "loss": 0.0042, "step": 21592 }, { "epoch": 10.039051603905161, "grad_norm": 0.3790278732776642, "learning_rate": 4.913964997196899e-06, "loss": 0.0096, "step": 21594 }, { "epoch": 10.03998140399814, "grad_norm": 0.4659610092639923, "learning_rate": 4.867302857222015e-06, "loss": 0.0081, "step": 21596 }, { "epoch": 10.04091120409112, "grad_norm": 0.536570131778717, "learning_rate": 4.820839418840035e-06, "loss": 0.0061, "step": 21598 }, { "epoch": 10.0418410041841, "grad_norm": 0.09461947530508041, "learning_rate": 4.774575140626332e-06, "loss": 0.0031, "step": 21600 }, { "epoch": 10.04277080427708, "grad_norm": 0.27885985374450684, "learning_rate": 4.728510479190763e-06, "loss": 0.0059, "step": 21602 }, { "epoch": 10.043700604370061, "grad_norm": 0.9094914793968201, "learning_rate": 4.6826458891726225e-06, "loss": 0.0163, "step": 21604 }, { "epoch": 10.04463040446304, "grad_norm": 0.9651092886924744, "learning_rate": 4.636981823237215e-06, "loss": 0.0058, "step": 21606 }, { "epoch": 10.04556020455602, "grad_norm": 0.2129446566104889, "learning_rate": 4.591518732070431e-06, "loss": 0.0039, "step": 21608 }, { "epoch": 10.046490004649, "grad_norm": 0.31402677297592163, "learning_rate": 4.54625706437442e-06, "loss": 0.0033, "step": 21610 }, { "epoch": 10.047419804741981, "grad_norm": 0.2010621577501297, "learning_rate": 4.5011972668636765e-06, "loss": 0.0056, "step": 21612 }, { "epoch": 10.04834960483496, "grad_norm": 0.28880277276039124, "learning_rate": 4.456339784260207e-06, "loss": 0.0066, "step": 21614 }, { "epoch": 10.04927940492794, "grad_norm": 0.534664511680603, "learning_rate": 4.4116850592893555e-06, "loss": 0.0067, "step": 21616 }, { "epoch": 10.05020920502092, "grad_norm": 0.30877506732940674, "learning_rate": 4.367233532675031e-06, "loss": 0.0035, "step": 21618 }, { "epoch": 10.051139005113901, "grad_norm": 0.6136394143104553, "learning_rate": 4.322985643135952e-06, "loss": 0.0049, "step": 21620 }, { "epoch": 10.052068805206881, "grad_norm": 0.8154939413070679, "learning_rate": 4.278941827381025e-06, "loss": 0.0078, "step": 21622 }, { "epoch": 10.05299860529986, "grad_norm": 0.2896881401538849, "learning_rate": 4.235102520104736e-06, "loss": 0.0062, "step": 21624 }, { "epoch": 10.05392840539284, "grad_norm": 0.19389109313488007, "learning_rate": 4.191468153983443e-06, "loss": 0.0081, "step": 21626 }, { "epoch": 10.05485820548582, "grad_norm": 0.14330320060253143, "learning_rate": 4.148039159670824e-06, "loss": 0.0026, "step": 21628 }, { "epoch": 10.055788005578801, "grad_norm": 0.3819088637828827, "learning_rate": 4.104815965793229e-06, "loss": 0.0107, "step": 21630 }, { "epoch": 10.05671780567178, "grad_norm": 1.5924203395843506, "learning_rate": 4.061798998946419e-06, "loss": 0.0126, "step": 21632 }, { "epoch": 10.05764760576476, "grad_norm": 0.3394891619682312, "learning_rate": 4.0189886836904e-06, "loss": 0.0132, "step": 21634 }, { "epoch": 10.05857740585774, "grad_norm": 0.1902756541967392, "learning_rate": 3.976385442545791e-06, "loss": 0.0059, "step": 21636 }, { "epoch": 10.059507205950721, "grad_norm": 0.17289596796035767, "learning_rate": 3.933989695989191e-06, "loss": 0.0053, "step": 21638 }, { "epoch": 10.060437006043701, "grad_norm": 0.21129043400287628, "learning_rate": 3.891801862449599e-06, "loss": 0.0041, "step": 21640 }, { "epoch": 10.06136680613668, "grad_norm": 0.4531809091567993, "learning_rate": 3.849822358303996e-06, "loss": 0.0057, "step": 21642 }, { "epoch": 10.06229660622966, "grad_norm": 0.2763444185256958, "learning_rate": 3.8080515978729476e-06, "loss": 0.0047, "step": 21644 }, { "epoch": 10.06322640632264, "grad_norm": 0.7534215450286865, "learning_rate": 3.766489993417094e-06, "loss": 0.007, "step": 21646 }, { "epoch": 10.064156206415621, "grad_norm": 0.3528742492198944, "learning_rate": 3.725137955132782e-06, "loss": 0.0061, "step": 21648 }, { "epoch": 10.0650860065086, "grad_norm": 0.407747358083725, "learning_rate": 3.683995891147751e-06, "loss": 0.0113, "step": 21650 }, { "epoch": 10.06601580660158, "grad_norm": 0.1301284283399582, "learning_rate": 3.6430642075176576e-06, "loss": 0.0032, "step": 21652 }, { "epoch": 10.06694560669456, "grad_norm": 0.9726051092147827, "learning_rate": 3.6023433082216924e-06, "loss": 0.0077, "step": 21654 }, { "epoch": 10.067875406787541, "grad_norm": 0.3793378472328186, "learning_rate": 3.5618335951587813e-06, "loss": 0.0076, "step": 21656 }, { "epoch": 10.068805206880521, "grad_norm": 0.18170960247516632, "learning_rate": 3.5215354681431663e-06, "loss": 0.0051, "step": 21658 }, { "epoch": 10.0697350069735, "grad_norm": 0.6151535511016846, "learning_rate": 3.4814493249013646e-06, "loss": 0.0043, "step": 21660 }, { "epoch": 10.07066480706648, "grad_norm": 0.3010707199573517, "learning_rate": 3.4415755610674285e-06, "loss": 0.0071, "step": 21662 }, { "epoch": 10.071594607159462, "grad_norm": 0.14260295033454895, "learning_rate": 3.401914570179124e-06, "loss": 0.0063, "step": 21664 }, { "epoch": 10.072524407252441, "grad_norm": 0.14169102907180786, "learning_rate": 3.3624667436745107e-06, "loss": 0.005, "step": 21666 }, { "epoch": 10.07345420734542, "grad_norm": 0.25038355588912964, "learning_rate": 3.3232324708877156e-06, "loss": 0.0047, "step": 21668 }, { "epoch": 10.0743840074384, "grad_norm": 0.0722779855132103, "learning_rate": 3.2842121390452527e-06, "loss": 0.0026, "step": 21670 }, { "epoch": 10.07531380753138, "grad_norm": 0.24693520367145538, "learning_rate": 3.2454061332618738e-06, "loss": 0.0061, "step": 21672 }, { "epoch": 10.076243607624361, "grad_norm": 0.48316702246665955, "learning_rate": 3.2068148365372747e-06, "loss": 0.007, "step": 21674 }, { "epoch": 10.077173407717341, "grad_norm": 0.15267318487167358, "learning_rate": 3.16843862975206e-06, "loss": 0.0041, "step": 21676 }, { "epoch": 10.07810320781032, "grad_norm": 0.20557242631912231, "learning_rate": 3.1302778916637247e-06, "loss": 0.0037, "step": 21678 }, { "epoch": 10.0790330079033, "grad_norm": 0.032266877591609955, "learning_rate": 3.0923329989034357e-06, "loss": 0.003, "step": 21680 }, { "epoch": 10.079962807996282, "grad_norm": 0.07016509026288986, "learning_rate": 3.0546043259720494e-06, "loss": 0.0037, "step": 21682 }, { "epoch": 10.080892608089261, "grad_norm": 0.22929653525352478, "learning_rate": 3.017092245236164e-06, "loss": 0.0058, "step": 21684 }, { "epoch": 10.08182240818224, "grad_norm": 0.3157115876674652, "learning_rate": 2.9797971269248767e-06, "loss": 0.0071, "step": 21686 }, { "epoch": 10.08275220827522, "grad_norm": 0.08062831312417984, "learning_rate": 2.942719339126118e-06, "loss": 0.0022, "step": 21688 }, { "epoch": 10.0836820083682, "grad_norm": 0.17584270238876343, "learning_rate": 2.9058592477826797e-06, "loss": 0.0057, "step": 21690 }, { "epoch": 10.084611808461181, "grad_norm": 0.6966472864151001, "learning_rate": 2.8692172166886155e-06, "loss": 0.0075, "step": 21692 }, { "epoch": 10.085541608554161, "grad_norm": 0.39501938223838806, "learning_rate": 2.8327936074860644e-06, "loss": 0.0079, "step": 21694 }, { "epoch": 10.08647140864714, "grad_norm": 0.2459646612405777, "learning_rate": 2.7965887796614274e-06, "loss": 0.004, "step": 21696 }, { "epoch": 10.08740120874012, "grad_norm": 0.6496378183364868, "learning_rate": 2.760603090541575e-06, "loss": 0.0175, "step": 21698 }, { "epoch": 10.088331008833102, "grad_norm": 0.3142193853855133, "learning_rate": 2.7248368952908083e-06, "loss": 0.0044, "step": 21700 }, { "epoch": 10.089260808926081, "grad_norm": 1.595138669013977, "learning_rate": 2.689290546907039e-06, "loss": 0.0135, "step": 21702 }, { "epoch": 10.09019060901906, "grad_norm": 0.2315404713153839, "learning_rate": 2.6539643962184557e-06, "loss": 0.004, "step": 21704 }, { "epoch": 10.09112040911204, "grad_norm": 0.28348350524902344, "learning_rate": 2.6188587918797383e-06, "loss": 0.005, "step": 21706 }, { "epoch": 10.092050209205022, "grad_norm": 0.14303071796894073, "learning_rate": 2.5839740803691168e-06, "loss": 0.006, "step": 21708 }, { "epoch": 10.092980009298001, "grad_norm": 1.2057807445526123, "learning_rate": 2.5493106059846836e-06, "loss": 0.0103, "step": 21710 }, { "epoch": 10.093909809390981, "grad_norm": 0.37672486901283264, "learning_rate": 2.5148687108407035e-06, "loss": 0.0047, "step": 21712 }, { "epoch": 10.09483960948396, "grad_norm": 0.1394534856081009, "learning_rate": 2.4806487348650095e-06, "loss": 0.0036, "step": 21714 }, { "epoch": 10.09576940957694, "grad_norm": 0.23942790925502777, "learning_rate": 2.4466510157949507e-06, "loss": 0.0056, "step": 21716 }, { "epoch": 10.096699209669922, "grad_norm": 0.30981072783470154, "learning_rate": 2.412875889174134e-06, "loss": 0.0054, "step": 21718 }, { "epoch": 10.097629009762901, "grad_norm": 0.19021637737751007, "learning_rate": 2.379323688349499e-06, "loss": 0.0044, "step": 21720 }, { "epoch": 10.09855880985588, "grad_norm": 0.46153828501701355, "learning_rate": 2.3459947444677245e-06, "loss": 0.0045, "step": 21722 }, { "epoch": 10.09948860994886, "grad_norm": 0.13807640969753265, "learning_rate": 2.3128893864721054e-06, "loss": 0.0041, "step": 21724 }, { "epoch": 10.100418410041842, "grad_norm": 0.18626263737678528, "learning_rate": 2.28000794109901e-06, "loss": 0.007, "step": 21726 }, { "epoch": 10.101348210134821, "grad_norm": 0.5365481376647949, "learning_rate": 2.2473507328751026e-06, "loss": 0.01, "step": 21728 }, { "epoch": 10.102278010227801, "grad_norm": 0.1004176065325737, "learning_rate": 2.21491808411392e-06, "loss": 0.0043, "step": 21730 }, { "epoch": 10.10320781032078, "grad_norm": 0.17166611552238464, "learning_rate": 2.182710314912467e-06, "loss": 0.0036, "step": 21732 }, { "epoch": 10.104137610413762, "grad_norm": 0.8816288709640503, "learning_rate": 2.1507277431484895e-06, "loss": 0.0045, "step": 21734 }, { "epoch": 10.105067410506742, "grad_norm": 0.17564009130001068, "learning_rate": 2.118970684477137e-06, "loss": 0.0072, "step": 21736 }, { "epoch": 10.105997210599721, "grad_norm": 0.1284118890762329, "learning_rate": 2.0874394523276113e-06, "loss": 0.0033, "step": 21738 }, { "epoch": 10.1069270106927, "grad_norm": 0.3142309784889221, "learning_rate": 2.0561343579004444e-06, "loss": 0.0052, "step": 21740 }, { "epoch": 10.10785681078568, "grad_norm": 0.37538835406303406, "learning_rate": 2.025055710164423e-06, "loss": 0.0168, "step": 21742 }, { "epoch": 10.108786610878662, "grad_norm": 0.33656278252601624, "learning_rate": 1.994203815853255e-06, "loss": 0.0046, "step": 21744 }, { "epoch": 10.109716410971641, "grad_norm": 0.0907326266169548, "learning_rate": 1.963578979462536e-06, "loss": 0.0048, "step": 21746 }, { "epoch": 10.110646211064621, "grad_norm": 0.9130950570106506, "learning_rate": 1.933181503247112e-06, "loss": 0.0174, "step": 21748 }, { "epoch": 10.1115760111576, "grad_norm": 0.08127091079950333, "learning_rate": 1.9030116872178657e-06, "loss": 0.0031, "step": 21750 }, { "epoch": 10.112505811250582, "grad_norm": 0.14529365301132202, "learning_rate": 1.8730698291385696e-06, "loss": 0.0063, "step": 21752 }, { "epoch": 10.113435611343562, "grad_norm": 0.5950282216072083, "learning_rate": 1.8433562245233387e-06, "loss": 0.0058, "step": 21754 }, { "epoch": 10.114365411436541, "grad_norm": 2.283590078353882, "learning_rate": 1.8138711666334584e-06, "loss": 0.014, "step": 21756 }, { "epoch": 10.11529521152952, "grad_norm": 0.07280270010232925, "learning_rate": 1.7846149464746091e-06, "loss": 0.0036, "step": 21758 }, { "epoch": 10.1162250116225, "grad_norm": 0.6047022938728333, "learning_rate": 1.7555878527937422e-06, "loss": 0.0051, "step": 21760 }, { "epoch": 10.117154811715482, "grad_norm": 1.2563892602920532, "learning_rate": 1.726790172076618e-06, "loss": 0.01, "step": 21762 }, { "epoch": 10.118084611808461, "grad_norm": 0.7356751561164856, "learning_rate": 1.6982221885447884e-06, "loss": 0.0087, "step": 21764 }, { "epoch": 10.119014411901441, "grad_norm": 0.09514646232128143, "learning_rate": 1.6698841841525823e-06, "loss": 0.0037, "step": 21766 }, { "epoch": 10.11994421199442, "grad_norm": 0.4383307099342346, "learning_rate": 1.641776438584667e-06, "loss": 0.0054, "step": 21768 }, { "epoch": 10.120874012087402, "grad_norm": 0.4057369530200958, "learning_rate": 1.6138992292533357e-06, "loss": 0.004, "step": 21770 }, { "epoch": 10.121803812180381, "grad_norm": 0.8866657614707947, "learning_rate": 1.5862528312951963e-06, "loss": 0.0185, "step": 21772 }, { "epoch": 10.122733612273361, "grad_norm": 0.15503865480422974, "learning_rate": 1.5588375175690984e-06, "loss": 0.0035, "step": 21774 }, { "epoch": 10.12366341236634, "grad_norm": 0.5140541791915894, "learning_rate": 1.531653558653124e-06, "loss": 0.0075, "step": 21776 }, { "epoch": 10.124593212459322, "grad_norm": 0.32880493998527527, "learning_rate": 1.5047012228420318e-06, "loss": 0.0376, "step": 21778 }, { "epoch": 10.125523012552302, "grad_norm": 0.18810461461544037, "learning_rate": 1.4779807761443728e-06, "loss": 0.0069, "step": 21780 }, { "epoch": 10.126452812645281, "grad_norm": 0.31820744276046753, "learning_rate": 1.451492482280232e-06, "loss": 0.0047, "step": 21782 }, { "epoch": 10.127382612738261, "grad_norm": 0.27595123648643494, "learning_rate": 1.4252366026784322e-06, "loss": 0.0055, "step": 21784 }, { "epoch": 10.12831241283124, "grad_norm": 0.0946207121014595, "learning_rate": 1.3992133964737872e-06, "loss": 0.0045, "step": 21786 }, { "epoch": 10.129242212924222, "grad_norm": 1.2617411613464355, "learning_rate": 1.3734231205048948e-06, "loss": 0.015, "step": 21788 }, { "epoch": 10.130172013017201, "grad_norm": 0.0837922915816307, "learning_rate": 1.3478660293113716e-06, "loss": 0.0067, "step": 21790 }, { "epoch": 10.131101813110181, "grad_norm": 0.14552851021289825, "learning_rate": 1.3225423751314425e-06, "loss": 0.008, "step": 21792 }, { "epoch": 10.13203161320316, "grad_norm": 0.06583212316036224, "learning_rate": 1.2974524078991783e-06, "loss": 0.0036, "step": 21794 }, { "epoch": 10.132961413296142, "grad_norm": 0.3902694582939148, "learning_rate": 1.2725963752426027e-06, "loss": 0.0058, "step": 21796 }, { "epoch": 10.133891213389122, "grad_norm": 0.6320227980613708, "learning_rate": 1.2479745224807169e-06, "loss": 0.0085, "step": 21798 }, { "epoch": 10.134821013482101, "grad_norm": 0.1134895458817482, "learning_rate": 1.223587092621157e-06, "loss": 0.0073, "step": 21800 }, { "epoch": 10.135750813575081, "grad_norm": 0.4987502098083496, "learning_rate": 1.1994343263580685e-06, "loss": 0.0057, "step": 21802 }, { "epoch": 10.13668061366806, "grad_norm": 0.10787331312894821, "learning_rate": 1.175516462069557e-06, "loss": 0.0084, "step": 21804 }, { "epoch": 10.137610413761042, "grad_norm": 0.17494763433933258, "learning_rate": 1.1518337358151756e-06, "loss": 0.0041, "step": 21806 }, { "epoch": 10.138540213854021, "grad_norm": 0.36025941371917725, "learning_rate": 1.1283863813339297e-06, "loss": 0.0059, "step": 21808 }, { "epoch": 10.139470013947001, "grad_norm": 0.6799572706222534, "learning_rate": 1.1051746300417394e-06, "loss": 0.0201, "step": 21810 }, { "epoch": 10.14039981403998, "grad_norm": 0.47223514318466187, "learning_rate": 1.0821987110292704e-06, "loss": 0.0053, "step": 21812 }, { "epoch": 10.141329614132962, "grad_norm": 0.49094530940055847, "learning_rate": 1.0594588510594644e-06, "loss": 0.0068, "step": 21814 }, { "epoch": 10.142259414225942, "grad_norm": 0.356006920337677, "learning_rate": 1.0369552745656102e-06, "loss": 0.0063, "step": 21816 }, { "epoch": 10.143189214318921, "grad_norm": 0.734500527381897, "learning_rate": 1.0146882036489758e-06, "loss": 0.0072, "step": 21818 }, { "epoch": 10.144119014411901, "grad_norm": 0.12181918323040009, "learning_rate": 9.9265785807646e-07, "loss": 0.0045, "step": 21820 }, { "epoch": 10.145048814504882, "grad_norm": 0.11829199641942978, "learning_rate": 9.708644552786783e-07, "loss": 0.0068, "step": 21822 }, { "epoch": 10.145978614597862, "grad_norm": 0.3621716797351837, "learning_rate": 9.493082103478661e-07, "loss": 0.0054, "step": 21824 }, { "epoch": 10.146908414690841, "grad_norm": 1.130730152130127, "learning_rate": 9.279893360353124e-07, "loss": 0.0073, "step": 21826 }, { "epoch": 10.147838214783821, "grad_norm": 0.24073483049869537, "learning_rate": 9.069080427497493e-07, "loss": 0.0049, "step": 21828 }, { "epoch": 10.1487680148768, "grad_norm": 0.11307564377784729, "learning_rate": 8.86064538555029e-07, "loss": 0.0045, "step": 21830 }, { "epoch": 10.149697814969782, "grad_norm": 0.3525427281856537, "learning_rate": 8.654590291681701e-07, "loss": 0.0052, "step": 21832 }, { "epoch": 10.150627615062762, "grad_norm": 0.2044956088066101, "learning_rate": 8.450917179571392e-07, "loss": 0.005, "step": 21834 }, { "epoch": 10.151557415155741, "grad_norm": 0.06934583932161331, "learning_rate": 8.249628059391199e-07, "loss": 0.003, "step": 21836 }, { "epoch": 10.15248721524872, "grad_norm": 0.24910524487495422, "learning_rate": 8.050724917783946e-07, "loss": 0.0045, "step": 21838 }, { "epoch": 10.153417015341702, "grad_norm": 0.12431599944829941, "learning_rate": 7.854209717842429e-07, "loss": 0.0026, "step": 21840 }, { "epoch": 10.154346815434682, "grad_norm": 0.13882040977478027, "learning_rate": 7.660084399092801e-07, "loss": 0.0053, "step": 21842 }, { "epoch": 10.155276615527661, "grad_norm": 0.20792615413665771, "learning_rate": 7.468350877473554e-07, "loss": 0.0059, "step": 21844 }, { "epoch": 10.156206415620641, "grad_norm": 0.09850143641233444, "learning_rate": 7.279011045317616e-07, "loss": 0.0041, "step": 21846 }, { "epoch": 10.15713621571362, "grad_norm": 0.221795454621315, "learning_rate": 7.092066771331759e-07, "loss": 0.0039, "step": 21848 }, { "epoch": 10.158066015806602, "grad_norm": 0.25995948910713196, "learning_rate": 6.907519900580586e-07, "loss": 0.0034, "step": 21850 }, { "epoch": 10.158995815899582, "grad_norm": 0.09836166352033615, "learning_rate": 6.725372254468429e-07, "loss": 0.0101, "step": 21852 }, { "epoch": 10.159925615992561, "grad_norm": 0.2991373538970947, "learning_rate": 6.545625630717758e-07, "loss": 0.0047, "step": 21854 }, { "epoch": 10.16085541608554, "grad_norm": 0.0992470383644104, "learning_rate": 6.36828180335561e-07, "loss": 0.0105, "step": 21856 }, { "epoch": 10.161785216178522, "grad_norm": 0.39076805114746094, "learning_rate": 6.193342522694354e-07, "loss": 0.0085, "step": 21858 }, { "epoch": 10.162715016271502, "grad_norm": 0.3088371157646179, "learning_rate": 6.020809515313255e-07, "loss": 0.0036, "step": 21860 }, { "epoch": 10.163644816364481, "grad_norm": 0.20349270105361938, "learning_rate": 5.850684484043886e-07, "loss": 0.0047, "step": 21862 }, { "epoch": 10.164574616457461, "grad_norm": 0.5619430541992188, "learning_rate": 5.682969107951576e-07, "loss": 0.0059, "step": 21864 }, { "epoch": 10.165504416550442, "grad_norm": 0.8535001277923584, "learning_rate": 5.517665042319766e-07, "loss": 0.0192, "step": 21866 }, { "epoch": 10.166434216643422, "grad_norm": 0.3132452070713043, "learning_rate": 5.354773918632124e-07, "loss": 0.0059, "step": 21868 }, { "epoch": 10.167364016736402, "grad_norm": 0.40000104904174805, "learning_rate": 5.19429734455862e-07, "loss": 0.008, "step": 21870 }, { "epoch": 10.168293816829381, "grad_norm": 0.2747734487056732, "learning_rate": 5.036236903938621e-07, "loss": 0.0032, "step": 21872 }, { "epoch": 10.16922361692236, "grad_norm": 0.1730402112007141, "learning_rate": 4.880594156764148e-07, "loss": 0.0072, "step": 21874 }, { "epoch": 10.170153417015342, "grad_norm": 0.610512375831604, "learning_rate": 4.7273706391666474e-07, "loss": 0.0062, "step": 21876 }, { "epoch": 10.171083217108322, "grad_norm": 0.14724695682525635, "learning_rate": 4.5765678634000244e-07, "loss": 0.0049, "step": 21878 }, { "epoch": 10.172013017201301, "grad_norm": 0.3644307553768158, "learning_rate": 4.42818731782785e-07, "loss": 0.0047, "step": 21880 }, { "epoch": 10.172942817294281, "grad_norm": 0.1839289367198944, "learning_rate": 4.2822304669051263e-07, "loss": 0.0042, "step": 21882 }, { "epoch": 10.173872617387262, "grad_norm": 0.17940270900726318, "learning_rate": 4.1386987511674603e-07, "loss": 0.0145, "step": 21884 }, { "epoch": 10.174802417480242, "grad_norm": 0.2697952091693878, "learning_rate": 3.99759358721519e-07, "loss": 0.0072, "step": 21886 }, { "epoch": 10.175732217573222, "grad_norm": 0.8033081293106079, "learning_rate": 3.8589163676986976e-07, "loss": 0.0088, "step": 21888 }, { "epoch": 10.176662017666201, "grad_norm": 0.5656349062919617, "learning_rate": 3.722668461306508e-07, "loss": 0.0128, "step": 21890 }, { "epoch": 10.177591817759183, "grad_norm": 0.2048841267824173, "learning_rate": 3.5888512127507124e-07, "loss": 0.0107, "step": 21892 }, { "epoch": 10.178521617852162, "grad_norm": 0.4101751446723938, "learning_rate": 3.457465942752927e-07, "loss": 0.0167, "step": 21894 }, { "epoch": 10.179451417945142, "grad_norm": 0.09821304678916931, "learning_rate": 3.328513948033049e-07, "loss": 0.0053, "step": 21896 }, { "epoch": 10.180381218038121, "grad_norm": 0.18621477484703064, "learning_rate": 3.201996501295215e-07, "loss": 0.0038, "step": 21898 }, { "epoch": 10.181311018131101, "grad_norm": 1.0426170825958252, "learning_rate": 3.0779148512158106e-07, "loss": 0.0078, "step": 21900 }, { "epoch": 10.182240818224082, "grad_norm": 0.5503432750701904, "learning_rate": 2.9562702224300333e-07, "loss": 0.0045, "step": 21902 }, { "epoch": 10.183170618317062, "grad_norm": 0.14507848024368286, "learning_rate": 2.8370638155213217e-07, "loss": 0.0027, "step": 21904 }, { "epoch": 10.184100418410042, "grad_norm": 0.12788140773773193, "learning_rate": 2.7202968070095845e-07, "loss": 0.0127, "step": 21906 }, { "epoch": 10.185030218503021, "grad_norm": 0.2636089324951172, "learning_rate": 2.605970349337242e-07, "loss": 0.0038, "step": 21908 }, { "epoch": 10.185960018596003, "grad_norm": 0.6482944488525391, "learning_rate": 2.4940855708605353e-07, "loss": 0.0077, "step": 21910 }, { "epoch": 10.186889818688982, "grad_norm": 0.9950451850891113, "learning_rate": 2.384643575837317e-07, "loss": 0.0072, "step": 21912 }, { "epoch": 10.187819618781962, "grad_norm": 0.5926790237426758, "learning_rate": 2.277645444415391e-07, "loss": 0.0078, "step": 21914 }, { "epoch": 10.188749418874941, "grad_norm": 0.2334466576576233, "learning_rate": 2.1730922326233552e-07, "loss": 0.0033, "step": 21916 }, { "epoch": 10.189679218967921, "grad_norm": 0.3079580068588257, "learning_rate": 2.0709849723592493e-07, "loss": 0.0061, "step": 21918 }, { "epoch": 10.190609019060902, "grad_norm": 0.5790102481842041, "learning_rate": 1.971324671380672e-07, "loss": 0.005, "step": 21920 }, { "epoch": 10.191538819153882, "grad_norm": 1.0519185066223145, "learning_rate": 1.8741123132941538e-07, "loss": 0.0073, "step": 21922 }, { "epoch": 10.192468619246862, "grad_norm": 0.10670884698629379, "learning_rate": 1.779348857546605e-07, "loss": 0.0055, "step": 21924 }, { "epoch": 10.193398419339841, "grad_norm": 0.9976654052734375, "learning_rate": 1.687035239415326e-07, "loss": 0.0121, "step": 21926 }, { "epoch": 10.194328219432823, "grad_norm": 0.16121473908424377, "learning_rate": 1.5971723699980414e-07, "loss": 0.0039, "step": 21928 }, { "epoch": 10.195258019525802, "grad_norm": 1.0120497941970825, "learning_rate": 1.5097611362051855e-07, "loss": 0.0066, "step": 21930 }, { "epoch": 10.196187819618782, "grad_norm": 0.4513673782348633, "learning_rate": 1.4248024007501314e-07, "loss": 0.0049, "step": 21932 }, { "epoch": 10.197117619711761, "grad_norm": 0.3342040479183197, "learning_rate": 1.342297002141919e-07, "loss": 0.0043, "step": 21934 }, { "epoch": 10.198047419804743, "grad_norm": 0.09586300700902939, "learning_rate": 1.2622457546749297e-07, "loss": 0.0022, "step": 21936 }, { "epoch": 10.198977219897722, "grad_norm": 0.7039956450462341, "learning_rate": 1.1846494484228372e-07, "loss": 0.0065, "step": 21938 }, { "epoch": 10.199907019990702, "grad_norm": 0.19572828710079193, "learning_rate": 1.109508849230057e-07, "loss": 0.0041, "step": 21940 }, { "epoch": 10.200836820083682, "grad_norm": 0.14604254066944122, "learning_rate": 1.036824698703615e-07, "loss": 0.0041, "step": 21942 }, { "epoch": 10.201766620176661, "grad_norm": 0.6731509566307068, "learning_rate": 9.665977142068743e-08, "loss": 0.0089, "step": 21944 }, { "epoch": 10.202696420269643, "grad_norm": 0.1512695550918579, "learning_rate": 8.988285888520136e-08, "loss": 0.0038, "step": 21946 }, { "epoch": 10.203626220362622, "grad_norm": 0.1935105174779892, "learning_rate": 8.335179914925888e-08, "loss": 0.0074, "step": 21948 }, { "epoch": 10.204556020455602, "grad_norm": 0.5899786353111267, "learning_rate": 7.706665667180373e-08, "loss": 0.0092, "step": 21950 }, { "epoch": 10.205485820548581, "grad_norm": 0.14643338322639465, "learning_rate": 7.102749348465169e-08, "loss": 0.0046, "step": 21952 }, { "epoch": 10.206415620641563, "grad_norm": 0.4669265151023865, "learning_rate": 6.523436919191886e-08, "loss": 0.0053, "step": 21954 }, { "epoch": 10.207345420734542, "grad_norm": 0.7384536266326904, "learning_rate": 5.96873409693777e-08, "loss": 0.0102, "step": 21956 }, { "epoch": 10.208275220827522, "grad_norm": 1.1666070222854614, "learning_rate": 5.4386463563968506e-08, "loss": 0.0097, "step": 21958 }, { "epoch": 10.209205020920502, "grad_norm": 0.16488860547542572, "learning_rate": 4.9331789293213825e-08, "loss": 0.0029, "step": 21960 }, { "epoch": 10.210134821013481, "grad_norm": 0.3456726372241974, "learning_rate": 4.4523368044704936e-08, "loss": 0.0043, "step": 21962 }, { "epoch": 10.211064621106463, "grad_norm": 0.3109781742095947, "learning_rate": 3.996124727562169e-08, "loss": 0.004, "step": 21964 }, { "epoch": 10.211994421199442, "grad_norm": 0.7786000370979309, "learning_rate": 3.564547201225234e-08, "loss": 0.0097, "step": 21966 }, { "epoch": 10.212924221292422, "grad_norm": 0.4410637319087982, "learning_rate": 3.157608484956611e-08, "loss": 0.0077, "step": 21968 }, { "epoch": 10.213854021385401, "grad_norm": 0.06326178461313248, "learning_rate": 2.7753125950749654e-08, "loss": 0.0096, "step": 21970 }, { "epoch": 10.214783821478383, "grad_norm": 1.0535235404968262, "learning_rate": 2.4176633046879575e-08, "loss": 0.0087, "step": 21972 }, { "epoch": 10.215713621571362, "grad_norm": 0.1542460322380066, "learning_rate": 2.084664143650052e-08, "loss": 0.0049, "step": 21974 }, { "epoch": 10.216643421664342, "grad_norm": 0.07165570557117462, "learning_rate": 1.7763183985269893e-08, "loss": 0.0033, "step": 21976 }, { "epoch": 10.217573221757322, "grad_norm": 0.25932031869888306, "learning_rate": 1.492629112567757e-08, "loss": 0.0029, "step": 21978 }, { "epoch": 10.218503021850303, "grad_norm": 0.20223002135753632, "learning_rate": 1.233599085671556e-08, "loss": 0.0042, "step": 21980 }, { "epoch": 10.219432821943283, "grad_norm": 0.5216938853263855, "learning_rate": 9.992308743589368e-09, "loss": 0.006, "step": 21982 }, { "epoch": 10.220362622036262, "grad_norm": 0.15699328482151031, "learning_rate": 7.895267917501507e-09, "loss": 0.0073, "step": 21984 }, { "epoch": 10.221292422129242, "grad_norm": 0.4163954257965088, "learning_rate": 6.044889075396134e-09, "loss": 0.0046, "step": 21986 }, { "epoch": 10.222222222222221, "grad_norm": 0.7502469420433044, "learning_rate": 4.441190479775871e-09, "loss": 0.0105, "step": 21988 }, { "epoch": 10.223152022315203, "grad_norm": 1.0040243864059448, "learning_rate": 3.084187958485308e-09, "loss": 0.0151, "step": 21990 }, { "epoch": 10.224081822408182, "grad_norm": 0.16170673072338104, "learning_rate": 1.9738949045972072e-09, "loss": 0.0036, "step": 21992 }, { "epoch": 10.225011622501162, "grad_norm": 0.41334256529808044, "learning_rate": 1.1103222762542945e-09, "loss": 0.0109, "step": 21994 }, { "epoch": 10.225941422594142, "grad_norm": 0.1598186492919922, "learning_rate": 4.934785965721168e-10, "loss": 0.0042, "step": 21996 }, { "epoch": 10.226871222687123, "grad_norm": 0.40364551544189453, "learning_rate": 1.2336995354467197e-10, "loss": 0.006, "step": 21998 }, { "epoch": 10.227801022780103, "grad_norm": 0.07193301618099213, "learning_rate": 0.0, "loss": 0.0048, "step": 22000 }, { "epoch": 10.227801022780103, "eval_cer": 0.11396363254151776, "eval_loss": 0.1785571575164795, "eval_runtime": 398.9599, "eval_samples_per_second": 31.818, "eval_steps_per_second": 0.995, "step": 22000 }, { "epoch": 10.228730822873082, "grad_norm": 0.04655643180012703, "learning_rate": 1.2336995354467197e-10, "loss": 0.0036, "step": 22002 }, { "epoch": 10.229660622966062, "grad_norm": 0.35294437408447266, "learning_rate": 4.934785965721167e-10, "loss": 0.0049, "step": 22004 }, { "epoch": 10.230590423059041, "grad_norm": 0.6237164735794067, "learning_rate": 1.1103222762542941e-09, "loss": 0.0106, "step": 22006 }, { "epoch": 10.231520223152023, "grad_norm": 0.16315260529518127, "learning_rate": 1.973894904594431e-09, "loss": 0.0039, "step": 22008 }, { "epoch": 10.232450023245002, "grad_norm": 1.1137884855270386, "learning_rate": 3.084187958482532e-09, "loss": 0.0044, "step": 22010 }, { "epoch": 10.233379823337982, "grad_norm": 0.26731088757514954, "learning_rate": 4.441190479773094e-09, "loss": 0.0041, "step": 22012 }, { "epoch": 10.234309623430962, "grad_norm": 0.40303653478622437, "learning_rate": 6.044889075398907e-09, "loss": 0.0047, "step": 22014 }, { "epoch": 10.235239423523943, "grad_norm": 0.20134121179580688, "learning_rate": 7.895267917501502e-09, "loss": 0.0043, "step": 22016 }, { "epoch": 10.236169223616923, "grad_norm": 0.2298729121685028, "learning_rate": 9.992308743589363e-09, "loss": 0.0028, "step": 22018 }, { "epoch": 10.237099023709902, "grad_norm": 0.15270698070526123, "learning_rate": 1.2335990856712776e-08, "loss": 0.0061, "step": 22020 }, { "epoch": 10.238028823802882, "grad_norm": 0.3266947567462921, "learning_rate": 1.492629112567756e-08, "loss": 0.005, "step": 22022 }, { "epoch": 10.238958623895863, "grad_norm": 0.4036889374256134, "learning_rate": 1.7763183985269883e-08, "loss": 0.0064, "step": 22024 }, { "epoch": 10.239888423988843, "grad_norm": 0.12401308119297028, "learning_rate": 2.0846641436497726e-08, "loss": 0.0029, "step": 22026 }, { "epoch": 10.240818224081822, "grad_norm": 0.35722774267196655, "learning_rate": 2.4176633046876786e-08, "loss": 0.0055, "step": 22028 }, { "epoch": 10.241748024174802, "grad_norm": 0.05097987502813339, "learning_rate": 2.7753125950746865e-08, "loss": 0.0021, "step": 22030 }, { "epoch": 10.242677824267782, "grad_norm": 0.18062755465507507, "learning_rate": 3.157608484956054e-08, "loss": 0.0035, "step": 22032 }, { "epoch": 10.243607624360763, "grad_norm": 0.5415834784507751, "learning_rate": 3.564547201224677e-08, "loss": 0.0072, "step": 22034 }, { "epoch": 10.244537424453743, "grad_norm": 0.7861312031745911, "learning_rate": 3.996124727561612e-08, "loss": 0.0084, "step": 22036 }, { "epoch": 10.245467224546722, "grad_norm": 0.3617849349975586, "learning_rate": 4.452336804469937e-08, "loss": 0.0041, "step": 22038 }, { "epoch": 10.246397024639702, "grad_norm": 0.2689051926136017, "learning_rate": 4.933178929320826e-08, "loss": 0.0082, "step": 22040 }, { "epoch": 10.247326824732683, "grad_norm": 1.0411419868469238, "learning_rate": 5.438646356396295e-08, "loss": 0.0078, "step": 22042 }, { "epoch": 10.248256624825663, "grad_norm": 0.13585197925567627, "learning_rate": 5.968734096937213e-08, "loss": 0.0041, "step": 22044 }, { "epoch": 10.249186424918642, "grad_norm": 0.08091311156749725, "learning_rate": 6.523436919191329e-08, "loss": 0.0021, "step": 22046 }, { "epoch": 10.250116225011622, "grad_norm": 0.2865025997161865, "learning_rate": 7.10274934846461e-08, "loss": 0.0047, "step": 22048 }, { "epoch": 10.251046025104603, "grad_norm": 0.28806039690971375, "learning_rate": 7.706665667179814e-08, "loss": 0.0048, "step": 22050 }, { "epoch": 10.251975825197583, "grad_norm": 0.19852755963802338, "learning_rate": 8.335179914925328e-08, "loss": 0.0039, "step": 22052 }, { "epoch": 10.252905625290563, "grad_norm": 0.2591821849346161, "learning_rate": 8.988285888519575e-08, "loss": 0.0059, "step": 22054 }, { "epoch": 10.253835425383542, "grad_norm": 0.11007722467184067, "learning_rate": 9.665977142068181e-08, "loss": 0.0052, "step": 22056 }, { "epoch": 10.254765225476522, "grad_norm": 0.6230772733688354, "learning_rate": 1.0368246987035311e-07, "loss": 0.0088, "step": 22058 }, { "epoch": 10.255695025569503, "grad_norm": 0.35638731718063354, "learning_rate": 1.1095088492300009e-07, "loss": 0.0047, "step": 22060 }, { "epoch": 10.256624825662483, "grad_norm": 0.45889076590538025, "learning_rate": 1.1846494484227808e-07, "loss": 0.0064, "step": 22062 }, { "epoch": 10.257554625755462, "grad_norm": 0.17683276534080505, "learning_rate": 1.2622457546748455e-07, "loss": 0.0035, "step": 22064 }, { "epoch": 10.258484425848442, "grad_norm": 0.22404222190380096, "learning_rate": 1.3422970021418343e-07, "loss": 0.0065, "step": 22066 }, { "epoch": 10.259414225941423, "grad_norm": 0.3519251346588135, "learning_rate": 1.4248024007500743e-07, "loss": 0.0061, "step": 22068 }, { "epoch": 10.260344026034403, "grad_norm": 0.16193091869354248, "learning_rate": 1.5097611362051005e-07, "loss": 0.0046, "step": 22070 }, { "epoch": 10.261273826127383, "grad_norm": 0.8864076137542725, "learning_rate": 1.5971723699979559e-07, "loss": 0.0182, "step": 22072 }, { "epoch": 10.262203626220362, "grad_norm": 0.09416232258081436, "learning_rate": 1.6870352394152677e-07, "loss": 0.0033, "step": 22074 }, { "epoch": 10.263133426313342, "grad_norm": 0.1992320865392685, "learning_rate": 1.7793488575465465e-07, "loss": 0.0038, "step": 22076 }, { "epoch": 10.264063226406323, "grad_norm": 0.08809993416070938, "learning_rate": 1.8741123132940675e-07, "loss": 0.0029, "step": 22078 }, { "epoch": 10.264993026499303, "grad_norm": 0.6377986073493958, "learning_rate": 1.9713246713805856e-07, "loss": 0.0084, "step": 22080 }, { "epoch": 10.265922826592282, "grad_norm": 0.21913288533687592, "learning_rate": 2.0709849723591625e-07, "loss": 0.0039, "step": 22082 }, { "epoch": 10.266852626685262, "grad_norm": 0.302278608083725, "learning_rate": 2.1730922326232686e-07, "loss": 0.0053, "step": 22084 }, { "epoch": 10.267782426778243, "grad_norm": 0.27235451340675354, "learning_rate": 2.277645444415304e-07, "loss": 0.0059, "step": 22086 }, { "epoch": 10.268712226871223, "grad_norm": 0.3277228772640228, "learning_rate": 2.38464357583698e-07, "loss": 0.0069, "step": 22088 }, { "epoch": 10.269642026964203, "grad_norm": 1.4382621049880981, "learning_rate": 2.494085570860448e-07, "loss": 0.0142, "step": 22090 }, { "epoch": 10.270571827057182, "grad_norm": 0.10070066154003143, "learning_rate": 2.6059703493371267e-07, "loss": 0.0033, "step": 22092 }, { "epoch": 10.271501627150164, "grad_norm": 0.345355361700058, "learning_rate": 2.720296807009497e-07, "loss": 0.0054, "step": 22094 }, { "epoch": 10.272431427243143, "grad_norm": 0.21849723160266876, "learning_rate": 2.8370638155214837e-07, "loss": 0.0041, "step": 22096 }, { "epoch": 10.273361227336123, "grad_norm": 0.25237470865249634, "learning_rate": 2.9562702224299454e-07, "loss": 0.0058, "step": 22098 }, { "epoch": 10.274291027429102, "grad_norm": 0.09571298211812973, "learning_rate": 3.077914851215695e-07, "loss": 0.0034, "step": 22100 }, { "epoch": 10.275220827522082, "grad_norm": 0.5080965161323547, "learning_rate": 3.2019965012951e-07, "loss": 0.0044, "step": 22102 }, { "epoch": 10.276150627615063, "grad_norm": 0.17151962220668793, "learning_rate": 3.328513948032933e-07, "loss": 0.0035, "step": 22104 }, { "epoch": 10.277080427708043, "grad_norm": 0.16548261046409607, "learning_rate": 3.457465942752811e-07, "loss": 0.0048, "step": 22106 }, { "epoch": 10.278010227801023, "grad_norm": 0.19418303668498993, "learning_rate": 3.5888512127505965e-07, "loss": 0.0102, "step": 22108 }, { "epoch": 10.278940027894002, "grad_norm": 0.12673978507518768, "learning_rate": 3.722668461306392e-07, "loss": 0.0038, "step": 22110 }, { "epoch": 10.279869827986984, "grad_norm": 0.13004200160503387, "learning_rate": 3.858916367698581e-07, "loss": 0.0274, "step": 22112 }, { "epoch": 10.280799628079963, "grad_norm": 1.5831536054611206, "learning_rate": 3.9975935872150455e-07, "loss": 0.0167, "step": 22114 }, { "epoch": 10.281729428172943, "grad_norm": 0.9442195296287537, "learning_rate": 4.138698751167317e-07, "loss": 0.0077, "step": 22116 }, { "epoch": 10.282659228265922, "grad_norm": 0.4537271559238434, "learning_rate": 4.2822304669050104e-07, "loss": 0.0085, "step": 22118 }, { "epoch": 10.283589028358902, "grad_norm": 0.26508477330207825, "learning_rate": 4.4281873178277056e-07, "loss": 0.0044, "step": 22120 }, { "epoch": 10.284518828451883, "grad_norm": 0.2483198344707489, "learning_rate": 4.576567863399907e-07, "loss": 0.0053, "step": 22122 }, { "epoch": 10.285448628544863, "grad_norm": 1.0605634450912476, "learning_rate": 4.727370639166502e-07, "loss": 0.0129, "step": 22124 }, { "epoch": 10.286378428637843, "grad_norm": 0.2970576882362366, "learning_rate": 4.880594156764003e-07, "loss": 0.0055, "step": 22126 }, { "epoch": 10.287308228730822, "grad_norm": 0.6455211639404297, "learning_rate": 5.036236903938476e-07, "loss": 0.0128, "step": 22128 }, { "epoch": 10.288238028823804, "grad_norm": 0.7477376461029053, "learning_rate": 5.194297344558476e-07, "loss": 0.007, "step": 22130 }, { "epoch": 10.289167828916783, "grad_norm": 0.9030004143714905, "learning_rate": 5.35477391863198e-07, "loss": 0.0057, "step": 22132 }, { "epoch": 10.290097629009763, "grad_norm": 0.10209526866674423, "learning_rate": 5.517665042319621e-07, "loss": 0.0036, "step": 22134 }, { "epoch": 10.291027429102742, "grad_norm": 0.8761920928955078, "learning_rate": 5.682969107951431e-07, "loss": 0.0072, "step": 22136 }, { "epoch": 10.291957229195724, "grad_norm": 0.2680213749408722, "learning_rate": 5.850684484043712e-07, "loss": 0.0075, "step": 22138 }, { "epoch": 10.292887029288703, "grad_norm": 0.8221049904823303, "learning_rate": 6.020809515313109e-07, "loss": 0.0087, "step": 22140 }, { "epoch": 10.293816829381683, "grad_norm": 0.8018304705619812, "learning_rate": 6.193342522694181e-07, "loss": 0.0058, "step": 22142 }, { "epoch": 10.294746629474663, "grad_norm": 1.015587568283081, "learning_rate": 6.368281803355437e-07, "loss": 0.0146, "step": 22144 }, { "epoch": 10.295676429567642, "grad_norm": 0.8749423027038574, "learning_rate": 6.545625630717583e-07, "loss": 0.0049, "step": 22146 }, { "epoch": 10.296606229660624, "grad_norm": 0.18436777591705322, "learning_rate": 6.725372254468252e-07, "loss": 0.0035, "step": 22148 }, { "epoch": 10.297536029753603, "grad_norm": 0.6563136577606201, "learning_rate": 6.907519900580439e-07, "loss": 0.0104, "step": 22150 }, { "epoch": 10.298465829846583, "grad_norm": 0.21880723536014557, "learning_rate": 7.092066771331611e-07, "loss": 0.0042, "step": 22152 }, { "epoch": 10.299395629939562, "grad_norm": 0.3024226129055023, "learning_rate": 7.279011045317439e-07, "loss": 0.0088, "step": 22154 }, { "epoch": 10.300325430032544, "grad_norm": 0.3214035928249359, "learning_rate": 7.468350877473376e-07, "loss": 0.0034, "step": 22156 }, { "epoch": 10.301255230125523, "grad_norm": 0.4078124463558197, "learning_rate": 7.660084399092623e-07, "loss": 0.0057, "step": 22158 }, { "epoch": 10.302185030218503, "grad_norm": 0.2751961946487427, "learning_rate": 7.85420971784225e-07, "loss": 0.0043, "step": 22160 }, { "epoch": 10.303114830311483, "grad_norm": 0.2767084240913391, "learning_rate": 8.050724917783766e-07, "loss": 0.0084, "step": 22162 }, { "epoch": 10.304044630404462, "grad_norm": 0.28357410430908203, "learning_rate": 8.249628059391019e-07, "loss": 0.0046, "step": 22164 }, { "epoch": 10.304974430497444, "grad_norm": 0.08129514008760452, "learning_rate": 8.450917179571212e-07, "loss": 0.0043, "step": 22166 }, { "epoch": 10.305904230590423, "grad_norm": 0.13450315594673157, "learning_rate": 8.65459029168152e-07, "loss": 0.0035, "step": 22168 }, { "epoch": 10.306834030683403, "grad_norm": 0.3095935881137848, "learning_rate": 8.860645385550109e-07, "loss": 0.0042, "step": 22170 }, { "epoch": 10.307763830776382, "grad_norm": 0.14763376116752625, "learning_rate": 9.069080427497312e-07, "loss": 0.0036, "step": 22172 }, { "epoch": 10.308693630869364, "grad_norm": 0.16394370794296265, "learning_rate": 9.279893360352943e-07, "loss": 0.0031, "step": 22174 }, { "epoch": 10.309623430962343, "grad_norm": 0.2806544601917267, "learning_rate": 9.49308210347798e-07, "loss": 0.0093, "step": 22176 }, { "epoch": 10.310553231055323, "grad_norm": 0.16337627172470093, "learning_rate": 9.708644552786571e-07, "loss": 0.0038, "step": 22178 }, { "epoch": 10.311483031148303, "grad_norm": 0.16867488622665405, "learning_rate": 9.926578580764386e-07, "loss": 0.004, "step": 22180 }, { "epoch": 10.312412831241284, "grad_norm": 0.12247390300035477, "learning_rate": 1.0146882036489544e-06, "loss": 0.0026, "step": 22182 }, { "epoch": 10.313342631334264, "grad_norm": 0.11991692334413528, "learning_rate": 1.0369552745655886e-06, "loss": 0.0037, "step": 22184 }, { "epoch": 10.314272431427243, "grad_norm": 0.07163534313440323, "learning_rate": 1.059458851059443e-06, "loss": 0.0032, "step": 22186 }, { "epoch": 10.315202231520223, "grad_norm": 0.14031000435352325, "learning_rate": 1.0821987110292488e-06, "loss": 0.0036, "step": 22188 }, { "epoch": 10.316132031613202, "grad_norm": 1.260844349861145, "learning_rate": 1.1051746300417178e-06, "loss": 0.0059, "step": 22190 }, { "epoch": 10.317061831706184, "grad_norm": 1.6238799095153809, "learning_rate": 1.128386381333908e-06, "loss": 0.0112, "step": 22192 }, { "epoch": 10.317991631799163, "grad_norm": 0.20295795798301697, "learning_rate": 1.1518337358151538e-06, "loss": 0.0031, "step": 22194 }, { "epoch": 10.318921431892143, "grad_norm": 0.45309898257255554, "learning_rate": 1.1755164620695355e-06, "loss": 0.004, "step": 22196 }, { "epoch": 10.319851231985123, "grad_norm": 0.5110301971435547, "learning_rate": 1.1994343263580469e-06, "loss": 0.012, "step": 22198 }, { "epoch": 10.320781032078104, "grad_norm": 0.3358820080757141, "learning_rate": 1.2235870926211354e-06, "loss": 0.0036, "step": 22200 }, { "epoch": 10.321710832171084, "grad_norm": 0.24725037813186646, "learning_rate": 1.2479745224806925e-06, "loss": 0.0061, "step": 22202 }, { "epoch": 10.322640632264063, "grad_norm": 0.6092311143875122, "learning_rate": 1.2725963752425786e-06, "loss": 0.0035, "step": 22204 }, { "epoch": 10.323570432357043, "grad_norm": 0.33239227533340454, "learning_rate": 1.2974524078992124e-06, "loss": 0.0041, "step": 22206 }, { "epoch": 10.324500232450024, "grad_norm": 0.7325551509857178, "learning_rate": 1.3225423751314183e-06, "loss": 0.0136, "step": 22208 }, { "epoch": 10.325430032543004, "grad_norm": 0.18121661245822906, "learning_rate": 1.3478660293113472e-06, "loss": 0.0037, "step": 22210 }, { "epoch": 10.326359832635983, "grad_norm": 0.09328634291887283, "learning_rate": 1.3734231205048732e-06, "loss": 0.0027, "step": 22212 }, { "epoch": 10.327289632728963, "grad_norm": 0.4087941348552704, "learning_rate": 1.3992133964737629e-06, "loss": 0.0056, "step": 22214 }, { "epoch": 10.328219432821943, "grad_norm": 0.27765563130378723, "learning_rate": 1.4252366026784077e-06, "loss": 0.0048, "step": 22216 }, { "epoch": 10.329149232914924, "grad_norm": 0.3345828652381897, "learning_rate": 1.4514924822802072e-06, "loss": 0.0067, "step": 22218 }, { "epoch": 10.330079033007904, "grad_norm": 0.13958239555358887, "learning_rate": 1.4779807761443478e-06, "loss": 0.0039, "step": 22220 }, { "epoch": 10.331008833100883, "grad_norm": 0.11824110150337219, "learning_rate": 1.5047012228420068e-06, "loss": 0.004, "step": 22222 }, { "epoch": 10.331938633193863, "grad_norm": 0.39801540970802307, "learning_rate": 1.5316535586530992e-06, "loss": 0.0059, "step": 22224 }, { "epoch": 10.332868433286844, "grad_norm": 1.6283162832260132, "learning_rate": 1.5588375175690738e-06, "loss": 0.0135, "step": 22226 }, { "epoch": 10.333798233379824, "grad_norm": 0.09475524723529816, "learning_rate": 1.586252831295169e-06, "loss": 0.0037, "step": 22228 }, { "epoch": 10.334728033472803, "grad_norm": 0.10408955812454224, "learning_rate": 1.6138992292533081e-06, "loss": 0.0054, "step": 22230 }, { "epoch": 10.335657833565783, "grad_norm": 0.16229216754436493, "learning_rate": 1.641776438584642e-06, "loss": 0.0074, "step": 22232 }, { "epoch": 10.336587633658763, "grad_norm": 0.48558491468429565, "learning_rate": 1.6698841841525543e-06, "loss": 0.0121, "step": 22234 }, { "epoch": 10.337517433751744, "grad_norm": 0.14054811000823975, "learning_rate": 1.6982221885447605e-06, "loss": 0.004, "step": 22236 }, { "epoch": 10.338447233844724, "grad_norm": 0.2087661623954773, "learning_rate": 1.7267901720765903e-06, "loss": 0.0039, "step": 22238 }, { "epoch": 10.339377033937703, "grad_norm": 0.1951470971107483, "learning_rate": 1.7555878527937145e-06, "loss": 0.0053, "step": 22240 }, { "epoch": 10.340306834030683, "grad_norm": 0.14254894852638245, "learning_rate": 1.7846149464745812e-06, "loss": 0.0025, "step": 22242 }, { "epoch": 10.341236634123664, "grad_norm": 0.21333935856819153, "learning_rate": 1.8138711666334275e-06, "loss": 0.0056, "step": 22244 }, { "epoch": 10.342166434216644, "grad_norm": 0.40688571333885193, "learning_rate": 1.8433562245233107e-06, "loss": 0.0074, "step": 22246 }, { "epoch": 10.343096234309623, "grad_norm": 0.2607056796550751, "learning_rate": 1.8730698291385416e-06, "loss": 0.0048, "step": 22248 }, { "epoch": 10.344026034402603, "grad_norm": 1.0768139362335205, "learning_rate": 1.9030116872178382e-06, "loss": 0.0082, "step": 22250 }, { "epoch": 10.344955834495583, "grad_norm": 0.5123790502548218, "learning_rate": 1.9331815032470815e-06, "loss": 0.0082, "step": 22252 }, { "epoch": 10.345885634588564, "grad_norm": 0.33697807788848877, "learning_rate": 1.9635789794625056e-06, "loss": 0.0064, "step": 22254 }, { "epoch": 10.346815434681544, "grad_norm": 0.1974305659532547, "learning_rate": 1.9942038158532246e-06, "loss": 0.0062, "step": 22256 }, { "epoch": 10.347745234774523, "grad_norm": 0.12678685784339905, "learning_rate": 2.0250557101643955e-06, "loss": 0.0028, "step": 22258 }, { "epoch": 10.348675034867503, "grad_norm": 0.344281941652298, "learning_rate": 2.056134357900414e-06, "loss": 0.0058, "step": 22260 }, { "epoch": 10.349604834960484, "grad_norm": 0.2652149200439453, "learning_rate": 2.087439452327581e-06, "loss": 0.005, "step": 22262 }, { "epoch": 10.350534635053464, "grad_norm": 0.6638437509536743, "learning_rate": 2.118970684477035e-06, "loss": 0.0076, "step": 22264 }, { "epoch": 10.351464435146443, "grad_norm": 0.44496873021125793, "learning_rate": 2.15072774314846e-06, "loss": 0.006, "step": 22266 }, { "epoch": 10.352394235239423, "grad_norm": 0.5409451127052307, "learning_rate": 2.1827103149124377e-06, "loss": 0.013, "step": 22268 }, { "epoch": 10.353324035332404, "grad_norm": 0.2823585867881775, "learning_rate": 2.214918084113891e-06, "loss": 0.0047, "step": 22270 }, { "epoch": 10.354253835425384, "grad_norm": 0.24387048184871674, "learning_rate": 2.2473507328750734e-06, "loss": 0.0072, "step": 22272 }, { "epoch": 10.355183635518364, "grad_norm": 1.2233470678329468, "learning_rate": 2.280007941098981e-06, "loss": 0.0186, "step": 22274 }, { "epoch": 10.356113435611343, "grad_norm": 0.5287969708442688, "learning_rate": 2.312889386472076e-06, "loss": 0.0076, "step": 22276 }, { "epoch": 10.357043235704323, "grad_norm": 0.5173925757408142, "learning_rate": 2.345994744467695e-06, "loss": 0.0064, "step": 22278 }, { "epoch": 10.357973035797304, "grad_norm": 1.168448567390442, "learning_rate": 2.3793236883494695e-06, "loss": 0.0125, "step": 22280 }, { "epoch": 10.358902835890284, "grad_norm": 0.4479876756668091, "learning_rate": 2.4128758891741024e-06, "loss": 0.0079, "step": 22282 }, { "epoch": 10.359832635983263, "grad_norm": 0.5377499461174011, "learning_rate": 2.446651015794919e-06, "loss": 0.0056, "step": 22284 }, { "epoch": 10.360762436076243, "grad_norm": 0.8518076539039612, "learning_rate": 2.48064873486498e-06, "loss": 0.0102, "step": 22286 }, { "epoch": 10.361692236169224, "grad_norm": 0.32668066024780273, "learning_rate": 2.5148687108407484e-06, "loss": 0.0071, "step": 22288 }, { "epoch": 10.362622036262204, "grad_norm": 0.09640868753194809, "learning_rate": 2.549310605984651e-06, "loss": 0.0086, "step": 22290 }, { "epoch": 10.363551836355184, "grad_norm": 0.4408089518547058, "learning_rate": 2.583974080369084e-06, "loss": 0.0036, "step": 22292 }, { "epoch": 10.364481636448163, "grad_norm": 0.09871216863393784, "learning_rate": 2.6188587918797087e-06, "loss": 0.005, "step": 22294 }, { "epoch": 10.365411436541144, "grad_norm": 0.41543397307395935, "learning_rate": 2.6539643962184226e-06, "loss": 0.0077, "step": 22296 }, { "epoch": 10.366341236634124, "grad_norm": 0.19420653581619263, "learning_rate": 2.689290546907006e-06, "loss": 0.0032, "step": 22298 }, { "epoch": 10.367271036727104, "grad_norm": 0.4881863296031952, "learning_rate": 2.7248368952907744e-06, "loss": 0.008, "step": 22300 }, { "epoch": 10.368200836820083, "grad_norm": 0.416072815656662, "learning_rate": 2.7606030905415406e-06, "loss": 0.0061, "step": 22302 }, { "epoch": 10.369130636913063, "grad_norm": 0.0937308594584465, "learning_rate": 2.7965887796613926e-06, "loss": 0.0033, "step": 22304 }, { "epoch": 10.370060437006044, "grad_norm": 0.23529423773288727, "learning_rate": 2.83279360748603e-06, "loss": 0.0048, "step": 22306 }, { "epoch": 10.370990237099024, "grad_norm": 0.21410012245178223, "learning_rate": 2.869217216688582e-06, "loss": 0.0026, "step": 22308 }, { "epoch": 10.371920037192004, "grad_norm": 0.23622284829616547, "learning_rate": 2.9058592477826437e-06, "loss": 0.0054, "step": 22310 }, { "epoch": 10.372849837284983, "grad_norm": 0.39916878938674927, "learning_rate": 2.942719339126082e-06, "loss": 0.0031, "step": 22312 }, { "epoch": 10.373779637377964, "grad_norm": 0.3368454873561859, "learning_rate": 2.9797971269248433e-06, "loss": 0.0047, "step": 22314 }, { "epoch": 10.374709437470944, "grad_norm": 0.1844632625579834, "learning_rate": 3.0170922452361304e-06, "loss": 0.0029, "step": 22316 }, { "epoch": 10.375639237563924, "grad_norm": 0.08666078746318817, "learning_rate": 3.0546043259720134e-06, "loss": 0.0039, "step": 22318 }, { "epoch": 10.376569037656903, "grad_norm": 0.10348404198884964, "learning_rate": 3.0923329989033997e-06, "loss": 0.0048, "step": 22320 }, { "epoch": 10.377498837749883, "grad_norm": 0.27121761441230774, "learning_rate": 3.1302778916636887e-06, "loss": 0.0043, "step": 22322 }, { "epoch": 10.378428637842864, "grad_norm": 0.12736240029335022, "learning_rate": 3.1684386297520245e-06, "loss": 0.0037, "step": 22324 }, { "epoch": 10.379358437935844, "grad_norm": 0.40026581287384033, "learning_rate": 3.206814836537239e-06, "loss": 0.0043, "step": 22326 }, { "epoch": 10.380288238028823, "grad_norm": 0.7689045071601868, "learning_rate": 3.2454061332618386e-06, "loss": 0.0106, "step": 22328 }, { "epoch": 10.381218038121803, "grad_norm": 0.5262911319732666, "learning_rate": 3.2842121390452175e-06, "loss": 0.004, "step": 22330 }, { "epoch": 10.382147838214784, "grad_norm": 0.23376525938510895, "learning_rate": 3.32323247088768e-06, "loss": 0.0051, "step": 22332 }, { "epoch": 10.383077638307764, "grad_norm": 0.4548405408859253, "learning_rate": 3.3624667436744747e-06, "loss": 0.0059, "step": 22334 }, { "epoch": 10.384007438400744, "grad_norm": 0.2788776755332947, "learning_rate": 3.4019145701790874e-06, "loss": 0.0054, "step": 22336 }, { "epoch": 10.384937238493723, "grad_norm": 0.09389615058898926, "learning_rate": 3.4415755610673886e-06, "loss": 0.0054, "step": 22338 }, { "epoch": 10.385867038586705, "grad_norm": 0.5121673941612244, "learning_rate": 3.4814493249013278e-06, "loss": 0.0058, "step": 22340 }, { "epoch": 10.386796838679684, "grad_norm": 0.8281837105751038, "learning_rate": 3.5215354681431265e-06, "loss": 0.0068, "step": 22342 }, { "epoch": 10.387726638772664, "grad_norm": 0.24310389161109924, "learning_rate": 3.5618335951587436e-06, "loss": 0.0037, "step": 22344 }, { "epoch": 10.388656438865643, "grad_norm": 0.09916753321886063, "learning_rate": 3.6023433082216514e-06, "loss": 0.0041, "step": 22346 }, { "epoch": 10.389586238958623, "grad_norm": 0.12406110018491745, "learning_rate": 3.643064207517619e-06, "loss": 0.0043, "step": 22348 }, { "epoch": 10.390516039051604, "grad_norm": 0.12526759505271912, "learning_rate": 3.683995891147712e-06, "loss": 0.0062, "step": 22350 }, { "epoch": 10.391445839144584, "grad_norm": 0.21892684698104858, "learning_rate": 3.7251379551326486e-06, "loss": 0.0082, "step": 22352 }, { "epoch": 10.392375639237564, "grad_norm": 0.20185954868793488, "learning_rate": 3.7664899934170518e-06, "loss": 0.0034, "step": 22354 }, { "epoch": 10.393305439330543, "grad_norm": 0.10092956572771072, "learning_rate": 3.808051597872905e-06, "loss": 0.0043, "step": 22356 }, { "epoch": 10.394235239423525, "grad_norm": 0.16155768930912018, "learning_rate": 3.849822358303952e-06, "loss": 0.003, "step": 22358 }, { "epoch": 10.395165039516504, "grad_norm": 0.278454065322876, "learning_rate": 3.891801862449556e-06, "loss": 0.0057, "step": 22360 }, { "epoch": 10.396094839609484, "grad_norm": 1.287003755569458, "learning_rate": 3.9339896959891485e-06, "loss": 0.037, "step": 22362 }, { "epoch": 10.397024639702463, "grad_norm": 0.39855077862739563, "learning_rate": 3.976385442545749e-06, "loss": 0.0088, "step": 22364 }, { "epoch": 10.397954439795445, "grad_norm": 0.17260169982910156, "learning_rate": 4.018988683690359e-06, "loss": 0.0046, "step": 22366 }, { "epoch": 10.398884239888424, "grad_norm": 0.7027493715286255, "learning_rate": 4.061798998946379e-06, "loss": 0.0088, "step": 22368 }, { "epoch": 10.399814039981404, "grad_norm": 0.5280640721321106, "learning_rate": 4.104815965793286e-06, "loss": 0.0069, "step": 22370 }, { "epoch": 10.400743840074384, "grad_norm": 0.11456897109746933, "learning_rate": 4.148039159670784e-06, "loss": 0.0031, "step": 22372 }, { "epoch": 10.401673640167363, "grad_norm": 0.2453009933233261, "learning_rate": 4.191468153983401e-06, "loss": 0.0041, "step": 22374 }, { "epoch": 10.402603440260345, "grad_norm": 0.5274255871772766, "learning_rate": 4.2351025201046934e-06, "loss": 0.0063, "step": 22376 }, { "epoch": 10.403533240353324, "grad_norm": 1.0555106401443481, "learning_rate": 4.278941827380985e-06, "loss": 0.0064, "step": 22378 }, { "epoch": 10.404463040446304, "grad_norm": 0.1758853942155838, "learning_rate": 4.3229856431359095e-06, "loss": 0.0043, "step": 22380 }, { "epoch": 10.405392840539283, "grad_norm": 0.2030576765537262, "learning_rate": 4.367233532674987e-06, "loss": 0.009, "step": 22382 }, { "epoch": 10.406322640632265, "grad_norm": 0.30812036991119385, "learning_rate": 4.411685059289313e-06, "loss": 0.005, "step": 22384 }, { "epoch": 10.407252440725244, "grad_norm": 0.2926076054573059, "learning_rate": 4.456339784260166e-06, "loss": 0.0068, "step": 22386 }, { "epoch": 10.408182240818224, "grad_norm": 0.20118600130081177, "learning_rate": 4.501197266863635e-06, "loss": 0.0051, "step": 22388 }, { "epoch": 10.409112040911204, "grad_norm": 0.2568920850753784, "learning_rate": 4.546257064374381e-06, "loss": 0.0111, "step": 22390 }, { "epoch": 10.410041841004183, "grad_norm": 0.08526600152254105, "learning_rate": 4.591518732070389e-06, "loss": 0.0137, "step": 22392 }, { "epoch": 10.410971641097165, "grad_norm": 0.40610387921333313, "learning_rate": 4.636981823237172e-06, "loss": 0.0121, "step": 22394 }, { "epoch": 10.411901441190144, "grad_norm": 0.07381843030452728, "learning_rate": 4.68264588917258e-06, "loss": 0.0034, "step": 22396 }, { "epoch": 10.412831241283124, "grad_norm": 0.11037370562553406, "learning_rate": 4.728510479190718e-06, "loss": 0.0026, "step": 22398 }, { "epoch": 10.413761041376103, "grad_norm": 0.1550145447254181, "learning_rate": 4.774575140626289e-06, "loss": 0.0065, "step": 22400 }, { "epoch": 10.414690841469085, "grad_norm": 0.19587117433547974, "learning_rate": 4.82083941883999e-06, "loss": 0.0039, "step": 22402 }, { "epoch": 10.415620641562064, "grad_norm": 0.10042525082826614, "learning_rate": 4.867302857221973e-06, "loss": 0.003, "step": 22404 }, { "epoch": 10.416550441655044, "grad_norm": 0.7988053560256958, "learning_rate": 4.9139649971968545e-06, "loss": 0.0203, "step": 22406 }, { "epoch": 10.417480241748024, "grad_norm": 0.3184005320072174, "learning_rate": 4.960825378228049e-06, "loss": 0.005, "step": 22408 }, { "epoch": 10.418410041841003, "grad_norm": 0.5531980395317078, "learning_rate": 5.007883537822723e-06, "loss": 0.0051, "step": 22410 }, { "epoch": 10.419339841933985, "grad_norm": 0.2931046187877655, "learning_rate": 5.055139011535734e-06, "loss": 0.0046, "step": 22412 }, { "epoch": 10.420269642026964, "grad_norm": 0.10664862394332886, "learning_rate": 5.10259133297453e-06, "loss": 0.0036, "step": 22414 }, { "epoch": 10.421199442119944, "grad_norm": 1.1168211698532104, "learning_rate": 5.150240033804065e-06, "loss": 0.0087, "step": 22416 }, { "epoch": 10.422129242212923, "grad_norm": 0.5458435416221619, "learning_rate": 5.198084643750797e-06, "loss": 0.0128, "step": 22418 }, { "epoch": 10.423059042305905, "grad_norm": 0.07900171726942062, "learning_rate": 5.2461246906076284e-06, "loss": 0.0069, "step": 22420 }, { "epoch": 10.423988842398884, "grad_norm": 0.6379386186599731, "learning_rate": 5.294359700238912e-06, "loss": 0.0045, "step": 22422 }, { "epoch": 10.424918642491864, "grad_norm": 0.1064772680401802, "learning_rate": 5.3427891965844655e-06, "loss": 0.0033, "step": 22424 }, { "epoch": 10.425848442584844, "grad_norm": 0.7213382720947266, "learning_rate": 5.391412701664819e-06, "loss": 0.0174, "step": 22426 }, { "epoch": 10.426778242677825, "grad_norm": 0.41963550448417664, "learning_rate": 5.440229735585284e-06, "loss": 0.0092, "step": 22428 }, { "epoch": 10.427708042770805, "grad_norm": 0.2647441625595093, "learning_rate": 5.489239816541766e-06, "loss": 0.0041, "step": 22430 }, { "epoch": 10.428637842863784, "grad_norm": 1.1851787567138672, "learning_rate": 5.53844246082445e-06, "loss": 0.0267, "step": 22432 }, { "epoch": 10.429567642956764, "grad_norm": 0.3259734809398651, "learning_rate": 5.587837182822984e-06, "loss": 0.0062, "step": 22434 }, { "epoch": 10.430497443049743, "grad_norm": 0.0344047024846077, "learning_rate": 5.637423495031627e-06, "loss": 0.0019, "step": 22436 }, { "epoch": 10.431427243142725, "grad_norm": 0.11184666305780411, "learning_rate": 5.687200908053389e-06, "loss": 0.0059, "step": 22438 }, { "epoch": 10.432357043235704, "grad_norm": 0.5561648607254028, "learning_rate": 5.737168930605181e-06, "loss": 0.0078, "step": 22440 }, { "epoch": 10.433286843328684, "grad_norm": 0.23760418593883514, "learning_rate": 5.787327069523022e-06, "loss": 0.0034, "step": 22442 }, { "epoch": 10.434216643421664, "grad_norm": 0.15334148705005646, "learning_rate": 5.837674829766217e-06, "loss": 0.0046, "step": 22444 }, { "epoch": 10.435146443514645, "grad_norm": 0.19705082476139069, "learning_rate": 5.8882117144226954e-06, "loss": 0.0036, "step": 22446 }, { "epoch": 10.436076243607625, "grad_norm": 0.09918685257434845, "learning_rate": 5.938937224713701e-06, "loss": 0.0037, "step": 22448 }, { "epoch": 10.437006043700604, "grad_norm": 0.17184863984584808, "learning_rate": 5.9898508599991505e-06, "loss": 0.0082, "step": 22450 }, { "epoch": 10.437935843793584, "grad_norm": 0.9748515486717224, "learning_rate": 6.040952117781903e-06, "loss": 0.0092, "step": 22452 }, { "epoch": 10.438865643886565, "grad_norm": 0.8112996220588684, "learning_rate": 6.092240493713177e-06, "loss": 0.0081, "step": 22454 }, { "epoch": 10.439795443979545, "grad_norm": 0.5187569856643677, "learning_rate": 6.143715481597406e-06, "loss": 0.0043, "step": 22456 }, { "epoch": 10.440725244072524, "grad_norm": 0.14867565035820007, "learning_rate": 6.195376573397243e-06, "loss": 0.0371, "step": 22458 }, { "epoch": 10.441655044165504, "grad_norm": 0.5826921463012695, "learning_rate": 6.247223259238565e-06, "loss": 0.0133, "step": 22460 }, { "epoch": 10.442584844258484, "grad_norm": 0.1650768667459488, "learning_rate": 6.299255027415398e-06, "loss": 0.0032, "step": 22462 }, { "epoch": 10.443514644351465, "grad_norm": 0.14601534605026245, "learning_rate": 6.351471364395435e-06, "loss": 0.0035, "step": 22464 }, { "epoch": 10.444444444444445, "grad_norm": 0.32534059882164, "learning_rate": 6.403871754824389e-06, "loss": 0.0043, "step": 22466 }, { "epoch": 10.445374244537424, "grad_norm": 0.27625831961631775, "learning_rate": 6.456455681531442e-06, "loss": 0.0119, "step": 22468 }, { "epoch": 10.446304044630404, "grad_norm": 0.2917118966579437, "learning_rate": 6.5092226255347045e-06, "loss": 0.0045, "step": 22470 }, { "epoch": 10.447233844723385, "grad_norm": 0.4337752163410187, "learning_rate": 6.562172066045625e-06, "loss": 0.013, "step": 22472 }, { "epoch": 10.448163644816365, "grad_norm": 0.197195902466774, "learning_rate": 6.615303480474483e-06, "loss": 0.0086, "step": 22474 }, { "epoch": 10.449093444909344, "grad_norm": 1.4358623027801514, "learning_rate": 6.668616344435905e-06, "loss": 0.0156, "step": 22476 }, { "epoch": 10.450023245002324, "grad_norm": 0.7031444311141968, "learning_rate": 6.722110131753329e-06, "loss": 0.0068, "step": 22478 }, { "epoch": 10.450953045095304, "grad_norm": 0.290190726518631, "learning_rate": 6.775784314464793e-06, "loss": 0.006, "step": 22480 }, { "epoch": 10.451882845188285, "grad_norm": 0.21120233833789825, "learning_rate": 6.829638362827417e-06, "loss": 0.004, "step": 22482 }, { "epoch": 10.452812645281265, "grad_norm": 0.2701131999492645, "learning_rate": 6.8836717453238505e-06, "loss": 0.0047, "step": 22484 }, { "epoch": 10.453742445374244, "grad_norm": 0.2720363438129425, "learning_rate": 6.9378839286662975e-06, "loss": 0.0054, "step": 22486 }, { "epoch": 10.454672245467224, "grad_norm": 0.12361060082912445, "learning_rate": 6.992274377802274e-06, "loss": 0.0039, "step": 22488 }, { "epoch": 10.455602045560205, "grad_norm": 1.0261560678482056, "learning_rate": 7.046842555920255e-06, "loss": 0.0055, "step": 22490 }, { "epoch": 10.456531845653185, "grad_norm": 0.17213110625743866, "learning_rate": 7.101587924454223e-06, "loss": 0.0036, "step": 22492 }, { "epoch": 10.457461645746164, "grad_norm": 0.1809944361448288, "learning_rate": 7.156509943089498e-06, "loss": 0.005, "step": 22494 }, { "epoch": 10.458391445839144, "grad_norm": 0.12739291787147522, "learning_rate": 7.2116080697678e-06, "loss": 0.0035, "step": 22496 }, { "epoch": 10.459321245932125, "grad_norm": 1.000383973121643, "learning_rate": 7.266881760693115e-06, "loss": 0.0219, "step": 22498 }, { "epoch": 10.460251046025105, "grad_norm": 0.5865033864974976, "learning_rate": 7.322330470336301e-06, "loss": 0.0073, "step": 22500 }, { "epoch": 10.461180846118085, "grad_norm": 0.30863720178604126, "learning_rate": 7.377953651440856e-06, "loss": 0.0046, "step": 22502 }, { "epoch": 10.462110646211064, "grad_norm": 0.474889874458313, "learning_rate": 7.433750755028692e-06, "loss": 0.0057, "step": 22504 }, { "epoch": 10.463040446304044, "grad_norm": 0.2152971476316452, "learning_rate": 7.489721230404791e-06, "loss": 0.0057, "step": 22506 }, { "epoch": 10.463970246397025, "grad_norm": 0.4917824864387512, "learning_rate": 7.545864525163158e-06, "loss": 0.0091, "step": 22508 }, { "epoch": 10.464900046490005, "grad_norm": 0.8389261364936829, "learning_rate": 7.602180085192147e-06, "loss": 0.0068, "step": 22510 }, { "epoch": 10.465829846582984, "grad_norm": 0.38131874799728394, "learning_rate": 7.65866735467991e-06, "loss": 0.0259, "step": 22512 }, { "epoch": 10.466759646675964, "grad_norm": 0.18526369333267212, "learning_rate": 7.715325776119903e-06, "loss": 0.0041, "step": 22514 }, { "epoch": 10.467689446768945, "grad_norm": 0.3451327681541443, "learning_rate": 7.772154790316254e-06, "loss": 0.0056, "step": 22516 }, { "epoch": 10.468619246861925, "grad_norm": 0.08538637310266495, "learning_rate": 7.829153836389786e-06, "loss": 0.0031, "step": 22518 }, { "epoch": 10.469549046954905, "grad_norm": 0.36054813861846924, "learning_rate": 7.886322351782805e-06, "loss": 0.0045, "step": 22520 }, { "epoch": 10.470478847047884, "grad_norm": 0.6889427900314331, "learning_rate": 7.943659772265008e-06, "loss": 0.0162, "step": 22522 }, { "epoch": 10.471408647140866, "grad_norm": 0.5967922806739807, "learning_rate": 8.001165531939469e-06, "loss": 0.0055, "step": 22524 }, { "epoch": 10.472338447233845, "grad_norm": 0.19347800314426422, "learning_rate": 8.058839063247418e-06, "loss": 0.0049, "step": 22526 }, { "epoch": 10.473268247326825, "grad_norm": 0.15321624279022217, "learning_rate": 8.116679796974261e-06, "loss": 0.0074, "step": 22528 }, { "epoch": 10.474198047419804, "grad_norm": 0.5531384944915771, "learning_rate": 8.174687162255567e-06, "loss": 0.0066, "step": 22530 }, { "epoch": 10.475127847512784, "grad_norm": 0.40029194951057434, "learning_rate": 8.23286058658193e-06, "loss": 0.021, "step": 22532 }, { "epoch": 10.476057647605765, "grad_norm": 0.1126125156879425, "learning_rate": 8.291199495805132e-06, "loss": 0.0025, "step": 22534 }, { "epoch": 10.476987447698745, "grad_norm": 0.11905751377344131, "learning_rate": 8.349703314143696e-06, "loss": 0.0033, "step": 22536 }, { "epoch": 10.477917247791725, "grad_norm": 0.557388186454773, "learning_rate": 8.408371464188554e-06, "loss": 0.0094, "step": 22538 }, { "epoch": 10.478847047884704, "grad_norm": 1.0474581718444824, "learning_rate": 8.467203366908751e-06, "loss": 0.0067, "step": 22540 }, { "epoch": 10.479776847977686, "grad_norm": 0.13991093635559082, "learning_rate": 8.526198441657018e-06, "loss": 0.0064, "step": 22542 }, { "epoch": 10.480706648070665, "grad_norm": 0.09236449748277664, "learning_rate": 8.58535610617606e-06, "loss": 0.0023, "step": 22544 }, { "epoch": 10.481636448163645, "grad_norm": 0.07748722285032272, "learning_rate": 8.644675776603476e-06, "loss": 0.0022, "step": 22546 }, { "epoch": 10.482566248256624, "grad_norm": 0.2844489514827728, "learning_rate": 8.704156867478061e-06, "loss": 0.0063, "step": 22548 }, { "epoch": 10.483496048349604, "grad_norm": 0.5121362209320068, "learning_rate": 8.763798791745333e-06, "loss": 0.0065, "step": 22550 }, { "epoch": 10.484425848442585, "grad_norm": 0.7656883001327515, "learning_rate": 8.823600960763847e-06, "loss": 0.0076, "step": 22552 }, { "epoch": 10.485355648535565, "grad_norm": 0.2442542463541031, "learning_rate": 8.883562784310203e-06, "loss": 0.0045, "step": 22554 }, { "epoch": 10.486285448628545, "grad_norm": 0.34002411365509033, "learning_rate": 8.94368367058527e-06, "loss": 0.0063, "step": 22556 }, { "epoch": 10.487215248721524, "grad_norm": 0.33986932039260864, "learning_rate": 9.00396302622045e-06, "loss": 0.0033, "step": 22558 }, { "epoch": 10.488145048814506, "grad_norm": 0.0596858412027359, "learning_rate": 9.0644002562827e-06, "loss": 0.0022, "step": 22560 }, { "epoch": 10.489074848907485, "grad_norm": 0.3104974329471588, "learning_rate": 9.12499476428082e-06, "loss": 0.0089, "step": 22562 }, { "epoch": 10.490004649000465, "grad_norm": 0.3401077091693878, "learning_rate": 9.18574595217189e-06, "loss": 0.0045, "step": 22564 }, { "epoch": 10.490934449093444, "grad_norm": 0.04083971679210663, "learning_rate": 9.246653220365806e-06, "loss": 0.0089, "step": 22566 }, { "epoch": 10.491864249186424, "grad_norm": 0.12125978618860245, "learning_rate": 9.307715967732554e-06, "loss": 0.006, "step": 22568 }, { "epoch": 10.492794049279405, "grad_norm": 0.35303157567977905, "learning_rate": 9.368933591607327e-06, "loss": 0.0082, "step": 22570 }, { "epoch": 10.493723849372385, "grad_norm": 0.10247022658586502, "learning_rate": 9.430305487797174e-06, "loss": 0.0037, "step": 22572 }, { "epoch": 10.494653649465365, "grad_norm": 0.3270120322704315, "learning_rate": 9.491831050586115e-06, "loss": 0.004, "step": 22574 }, { "epoch": 10.495583449558344, "grad_norm": 0.09879304468631744, "learning_rate": 9.553509672741548e-06, "loss": 0.003, "step": 22576 }, { "epoch": 10.496513249651326, "grad_norm": 0.5492229461669922, "learning_rate": 9.615340745520652e-06, "loss": 0.0056, "step": 22578 }, { "epoch": 10.497443049744305, "grad_norm": 0.41931381821632385, "learning_rate": 9.677323658675557e-06, "loss": 0.0076, "step": 22580 }, { "epoch": 10.498372849837285, "grad_norm": 1.2610020637512207, "learning_rate": 9.739457800459936e-06, "loss": 0.0182, "step": 22582 }, { "epoch": 10.499302649930264, "grad_norm": 1.1422462463378906, "learning_rate": 9.801742557634755e-06, "loss": 0.008, "step": 22584 }, { "epoch": 10.500232450023246, "grad_norm": 0.11143627762794495, "learning_rate": 9.86417731547489e-06, "loss": 0.0034, "step": 22586 }, { "epoch": 10.501162250116225, "grad_norm": 0.19117452204227448, "learning_rate": 9.926761457774338e-06, "loss": 0.0056, "step": 22588 }, { "epoch": 10.502092050209205, "grad_norm": 0.1681513637304306, "learning_rate": 9.989494366852885e-06, "loss": 0.0021, "step": 22590 }, { "epoch": 10.503021850302185, "grad_norm": 0.16896633803844452, "learning_rate": 1.0052375423562044e-05, "loss": 0.0096, "step": 22592 }, { "epoch": 10.503951650395164, "grad_norm": 0.18751420080661774, "learning_rate": 1.0115404007291174e-05, "loss": 0.0039, "step": 22594 }, { "epoch": 10.504881450488146, "grad_norm": 0.434760719537735, "learning_rate": 1.0178579495973456e-05, "loss": 0.0177, "step": 22596 }, { "epoch": 10.505811250581125, "grad_norm": 0.13066914677619934, "learning_rate": 1.0241901266092606e-05, "loss": 0.0037, "step": 22598 }, { "epoch": 10.506741050674105, "grad_norm": 0.15350641310214996, "learning_rate": 1.0305368692688174e-05, "loss": 0.0042, "step": 22600 }, { "epoch": 10.507670850767084, "grad_norm": 0.2592790722846985, "learning_rate": 1.0368981149362285e-05, "loss": 0.0052, "step": 22602 }, { "epoch": 10.508600650860066, "grad_norm": 0.31360337138175964, "learning_rate": 1.0432738008285524e-05, "loss": 0.0036, "step": 22604 }, { "epoch": 10.509530450953045, "grad_norm": 0.5508562922477722, "learning_rate": 1.0496638640203723e-05, "loss": 0.0045, "step": 22606 }, { "epoch": 10.510460251046025, "grad_norm": 0.29629606008529663, "learning_rate": 1.0560682414443297e-05, "loss": 0.0111, "step": 22608 }, { "epoch": 10.511390051139005, "grad_norm": 0.06403326243162155, "learning_rate": 1.062486869891791e-05, "loss": 0.0032, "step": 22610 }, { "epoch": 10.512319851231986, "grad_norm": 0.14351579546928406, "learning_rate": 1.0689196860135134e-05, "loss": 0.006, "step": 22612 }, { "epoch": 10.513249651324966, "grad_norm": 0.2491249442100525, "learning_rate": 1.0753666263201846e-05, "loss": 0.0076, "step": 22614 }, { "epoch": 10.514179451417945, "grad_norm": 0.15846601128578186, "learning_rate": 1.0818276271831059e-05, "loss": 0.0036, "step": 22616 }, { "epoch": 10.515109251510925, "grad_norm": 0.08259318023920059, "learning_rate": 1.0883026248348076e-05, "loss": 0.0042, "step": 22618 }, { "epoch": 10.516039051603904, "grad_norm": 0.27626413106918335, "learning_rate": 1.0947915553696769e-05, "loss": 0.0047, "step": 22620 }, { "epoch": 10.516968851696886, "grad_norm": 0.11128726601600647, "learning_rate": 1.101294354744589e-05, "loss": 0.0054, "step": 22622 }, { "epoch": 10.517898651789865, "grad_norm": 0.2702813744544983, "learning_rate": 1.1078109587795249e-05, "loss": 0.0092, "step": 22624 }, { "epoch": 10.518828451882845, "grad_norm": 0.17337565124034882, "learning_rate": 1.1143413031582633e-05, "loss": 0.0046, "step": 22626 }, { "epoch": 10.519758251975825, "grad_norm": 0.3394123911857605, "learning_rate": 1.120885323428926e-05, "loss": 0.0036, "step": 22628 }, { "epoch": 10.520688052068806, "grad_norm": 0.32422611117362976, "learning_rate": 1.1274429550046607e-05, "loss": 0.0046, "step": 22630 }, { "epoch": 10.521617852161786, "grad_norm": 0.6002445816993713, "learning_rate": 1.1340141331643208e-05, "loss": 0.0161, "step": 22632 }, { "epoch": 10.522547652254765, "grad_norm": 0.5492870211601257, "learning_rate": 1.1405987930530157e-05, "loss": 0.0183, "step": 22634 }, { "epoch": 10.523477452347745, "grad_norm": 0.08584175258874893, "learning_rate": 1.1471968696828106e-05, "loss": 0.005, "step": 22636 }, { "epoch": 10.524407252440724, "grad_norm": 1.515620470046997, "learning_rate": 1.1538082979333381e-05, "loss": 0.0085, "step": 22638 }, { "epoch": 10.525337052533706, "grad_norm": 0.10233911871910095, "learning_rate": 1.1604330125525004e-05, "loss": 0.0057, "step": 22640 }, { "epoch": 10.526266852626685, "grad_norm": 0.15788708627223969, "learning_rate": 1.1670709481570238e-05, "loss": 0.004, "step": 22642 }, { "epoch": 10.527196652719665, "grad_norm": 0.09653967618942261, "learning_rate": 1.1737220392331485e-05, "loss": 0.0029, "step": 22644 }, { "epoch": 10.528126452812645, "grad_norm": 0.35341116786003113, "learning_rate": 1.1803862201373362e-05, "loss": 0.008, "step": 22646 }, { "epoch": 10.529056252905626, "grad_norm": 0.5448359251022339, "learning_rate": 1.1870634250967664e-05, "loss": 0.0074, "step": 22648 }, { "epoch": 10.529986052998606, "grad_norm": 0.7446950078010559, "learning_rate": 1.1937535882101217e-05, "loss": 0.0053, "step": 22650 }, { "epoch": 10.530915853091585, "grad_norm": 0.1811714470386505, "learning_rate": 1.2004566434482236e-05, "loss": 0.0068, "step": 22652 }, { "epoch": 10.531845653184565, "grad_norm": 0.29603147506713867, "learning_rate": 1.2071725246546076e-05, "loss": 0.004, "step": 22654 }, { "epoch": 10.532775453277544, "grad_norm": 0.557526707649231, "learning_rate": 1.2139011655462377e-05, "loss": 0.0073, "step": 22656 }, { "epoch": 10.533705253370526, "grad_norm": 0.4994624853134155, "learning_rate": 1.22064249971413e-05, "loss": 0.0054, "step": 22658 }, { "epoch": 10.534635053463505, "grad_norm": 0.15850022435188293, "learning_rate": 1.2273964606240672e-05, "loss": 0.0104, "step": 22660 }, { "epoch": 10.535564853556485, "grad_norm": 0.1108400970697403, "learning_rate": 1.2341629816171676e-05, "loss": 0.0043, "step": 22662 }, { "epoch": 10.536494653649465, "grad_norm": 0.3118402659893036, "learning_rate": 1.2409419959105847e-05, "loss": 0.0067, "step": 22664 }, { "epoch": 10.537424453742446, "grad_norm": 0.5598556399345398, "learning_rate": 1.2477334365982155e-05, "loss": 0.0069, "step": 22666 }, { "epoch": 10.538354253835426, "grad_norm": 0.12355009466409683, "learning_rate": 1.2545372366512666e-05, "loss": 0.0036, "step": 22668 }, { "epoch": 10.539284053928405, "grad_norm": 0.3084408938884735, "learning_rate": 1.2613533289189783e-05, "loss": 0.0035, "step": 22670 }, { "epoch": 10.540213854021385, "grad_norm": 0.09643596410751343, "learning_rate": 1.2681816461292718e-05, "loss": 0.0027, "step": 22672 }, { "epoch": 10.541143654114366, "grad_norm": 0.08917459100484848, "learning_rate": 1.2750221208894126e-05, "loss": 0.005, "step": 22674 }, { "epoch": 10.542073454207346, "grad_norm": 0.9377461075782776, "learning_rate": 1.2818746856866765e-05, "loss": 0.0093, "step": 22676 }, { "epoch": 10.543003254300325, "grad_norm": 0.6485529541969299, "learning_rate": 1.2887392728890002e-05, "loss": 0.0099, "step": 22678 }, { "epoch": 10.543933054393305, "grad_norm": 1.4629589319229126, "learning_rate": 1.2956158147457101e-05, "loss": 0.0295, "step": 22680 }, { "epoch": 10.544862854486286, "grad_norm": 0.12338091433048248, "learning_rate": 1.302504243388099e-05, "loss": 0.0032, "step": 22682 }, { "epoch": 10.545792654579266, "grad_norm": 0.18223831057548523, "learning_rate": 1.309404490830142e-05, "loss": 0.0041, "step": 22684 }, { "epoch": 10.546722454672246, "grad_norm": 0.48156335949897766, "learning_rate": 1.3163164889692128e-05, "loss": 0.0085, "step": 22686 }, { "epoch": 10.547652254765225, "grad_norm": 1.137645959854126, "learning_rate": 1.3232401695866653e-05, "loss": 0.0172, "step": 22688 }, { "epoch": 10.548582054858205, "grad_norm": 0.596655011177063, "learning_rate": 1.3301754643485664e-05, "loss": 0.0054, "step": 22690 }, { "epoch": 10.549511854951186, "grad_norm": 0.1905122846364975, "learning_rate": 1.337122304806342e-05, "loss": 0.004, "step": 22692 }, { "epoch": 10.550441655044166, "grad_norm": 0.17548340559005737, "learning_rate": 1.3440806223975117e-05, "loss": 0.0045, "step": 22694 }, { "epoch": 10.551371455137145, "grad_norm": 0.6011749505996704, "learning_rate": 1.351050348446275e-05, "loss": 0.0069, "step": 22696 }, { "epoch": 10.552301255230125, "grad_norm": 0.12605319917201996, "learning_rate": 1.358031414164234e-05, "loss": 0.0044, "step": 22698 }, { "epoch": 10.553231055323106, "grad_norm": 0.11375939846038818, "learning_rate": 1.3650237506511353e-05, "loss": 0.0029, "step": 22700 }, { "epoch": 10.554160855416086, "grad_norm": 0.14369754493236542, "learning_rate": 1.3720272888953932e-05, "loss": 0.0044, "step": 22702 }, { "epoch": 10.555090655509066, "grad_norm": 0.3424500524997711, "learning_rate": 1.3790419597749287e-05, "loss": 0.0081, "step": 22704 }, { "epoch": 10.556020455602045, "grad_norm": 0.5185351371765137, "learning_rate": 1.386067694057756e-05, "loss": 0.0073, "step": 22706 }, { "epoch": 10.556950255695025, "grad_norm": 0.21651050448417664, "learning_rate": 1.3931044224027465e-05, "loss": 0.0035, "step": 22708 }, { "epoch": 10.557880055788006, "grad_norm": 0.7775160074234009, "learning_rate": 1.400152075360216e-05, "loss": 0.0054, "step": 22710 }, { "epoch": 10.558809855880986, "grad_norm": 0.47531378269195557, "learning_rate": 1.4072105833726594e-05, "loss": 0.0091, "step": 22712 }, { "epoch": 10.559739655973965, "grad_norm": 0.22538062930107117, "learning_rate": 1.4142798767754837e-05, "loss": 0.0044, "step": 22714 }, { "epoch": 10.560669456066945, "grad_norm": 0.729753315448761, "learning_rate": 1.421359885797601e-05, "loss": 0.0069, "step": 22716 }, { "epoch": 10.561599256159926, "grad_norm": 0.17387817800045013, "learning_rate": 1.4284505405621655e-05, "loss": 0.0096, "step": 22718 }, { "epoch": 10.562529056252906, "grad_norm": 0.5130066275596619, "learning_rate": 1.435551771087308e-05, "loss": 0.0079, "step": 22720 }, { "epoch": 10.563458856345886, "grad_norm": 0.8799412846565247, "learning_rate": 1.442663507286735e-05, "loss": 0.0153, "step": 22722 }, { "epoch": 10.564388656438865, "grad_norm": 0.12854161858558655, "learning_rate": 1.449785678970481e-05, "loss": 0.0027, "step": 22724 }, { "epoch": 10.565318456531845, "grad_norm": 0.53229820728302, "learning_rate": 1.4569182158455707e-05, "loss": 0.0089, "step": 22726 }, { "epoch": 10.566248256624826, "grad_norm": 0.5461503863334656, "learning_rate": 1.464061047516793e-05, "loss": 0.009, "step": 22728 }, { "epoch": 10.567178056717806, "grad_norm": 0.3015953600406647, "learning_rate": 1.4712141034872351e-05, "loss": 0.0063, "step": 22730 }, { "epoch": 10.568107856810785, "grad_norm": 0.5202713012695312, "learning_rate": 1.4783773131591219e-05, "loss": 0.006, "step": 22732 }, { "epoch": 10.569037656903765, "grad_norm": 0.32928621768951416, "learning_rate": 1.4855506058344981e-05, "loss": 0.0065, "step": 22734 }, { "epoch": 10.569967456996746, "grad_norm": 0.44490697979927063, "learning_rate": 1.4927339107158448e-05, "loss": 0.0046, "step": 22736 }, { "epoch": 10.570897257089726, "grad_norm": 0.060439370572566986, "learning_rate": 1.4999271569068269e-05, "loss": 0.0026, "step": 22738 }, { "epoch": 10.571827057182706, "grad_norm": 0.4478328824043274, "learning_rate": 1.5071302734130413e-05, "loss": 0.0038, "step": 22740 }, { "epoch": 10.572756857275685, "grad_norm": 0.1894213855266571, "learning_rate": 1.5143431891426187e-05, "loss": 0.0122, "step": 22742 }, { "epoch": 10.573686657368667, "grad_norm": 0.14555898308753967, "learning_rate": 1.5215658329069938e-05, "loss": 0.0042, "step": 22744 }, { "epoch": 10.574616457461646, "grad_norm": 0.08482231199741364, "learning_rate": 1.5287981334215716e-05, "loss": 0.0037, "step": 22746 }, { "epoch": 10.575546257554626, "grad_norm": 0.3081377148628235, "learning_rate": 1.5360400193064988e-05, "loss": 0.005, "step": 22748 }, { "epoch": 10.576476057647605, "grad_norm": 0.7555133700370789, "learning_rate": 1.5432914190872696e-05, "loss": 0.006, "step": 22750 }, { "epoch": 10.577405857740585, "grad_norm": 0.39725029468536377, "learning_rate": 1.5505522611954785e-05, "loss": 0.0067, "step": 22752 }, { "epoch": 10.578335657833566, "grad_norm": 0.14415855705738068, "learning_rate": 1.5578224739695782e-05, "loss": 0.0074, "step": 22754 }, { "epoch": 10.579265457926546, "grad_norm": 0.20522409677505493, "learning_rate": 1.565101985655504e-05, "loss": 0.0045, "step": 22756 }, { "epoch": 10.580195258019526, "grad_norm": 0.15495365858078003, "learning_rate": 1.572390724407381e-05, "loss": 0.0048, "step": 22758 }, { "epoch": 10.581125058112505, "grad_norm": 0.5639813542366028, "learning_rate": 1.5796886182883012e-05, "loss": 0.0106, "step": 22760 }, { "epoch": 10.582054858205487, "grad_norm": 0.4853547215461731, "learning_rate": 1.58699559527103e-05, "loss": 0.0069, "step": 22762 }, { "epoch": 10.582984658298466, "grad_norm": 0.8916776776313782, "learning_rate": 1.594311583238639e-05, "loss": 0.0063, "step": 22764 }, { "epoch": 10.583914458391446, "grad_norm": 0.1342780739068985, "learning_rate": 1.601636509985264e-05, "loss": 0.0041, "step": 22766 }, { "epoch": 10.584844258484425, "grad_norm": 0.21337388455867767, "learning_rate": 1.608970303216868e-05, "loss": 0.004, "step": 22768 }, { "epoch": 10.585774058577407, "grad_norm": 0.6666198968887329, "learning_rate": 1.6163128905518554e-05, "loss": 0.0117, "step": 22770 }, { "epoch": 10.586703858670386, "grad_norm": 0.17873556911945343, "learning_rate": 1.6236641995218382e-05, "loss": 0.0052, "step": 22772 }, { "epoch": 10.587633658763366, "grad_norm": 0.23219525814056396, "learning_rate": 1.631024157572397e-05, "loss": 0.0041, "step": 22774 }, { "epoch": 10.588563458856346, "grad_norm": 0.8408297896385193, "learning_rate": 1.6383926920637005e-05, "loss": 0.0069, "step": 22776 }, { "epoch": 10.589493258949325, "grad_norm": 0.07044872641563416, "learning_rate": 1.6457697302712887e-05, "loss": 0.0035, "step": 22778 }, { "epoch": 10.590423059042307, "grad_norm": 0.255619615316391, "learning_rate": 1.6531551993867554e-05, "loss": 0.0071, "step": 22780 }, { "epoch": 10.591352859135286, "grad_norm": 0.2903232276439667, "learning_rate": 1.660549026518553e-05, "loss": 0.0082, "step": 22782 }, { "epoch": 10.592282659228266, "grad_norm": 0.2374744862318039, "learning_rate": 1.6679511386925418e-05, "loss": 0.007, "step": 22784 }, { "epoch": 10.593212459321245, "grad_norm": 0.13213902711868286, "learning_rate": 1.6753614628528628e-05, "loss": 0.0047, "step": 22786 }, { "epoch": 10.594142259414227, "grad_norm": 0.08866722881793976, "learning_rate": 1.6827799258626425e-05, "loss": 0.0183, "step": 22788 }, { "epoch": 10.595072059507206, "grad_norm": 0.17729850113391876, "learning_rate": 1.6902064545046288e-05, "loss": 0.0051, "step": 22790 }, { "epoch": 10.596001859600186, "grad_norm": 0.335582971572876, "learning_rate": 1.6976409754819825e-05, "loss": 0.0045, "step": 22792 }, { "epoch": 10.596931659693166, "grad_norm": 0.3580114245414734, "learning_rate": 1.7050834154189652e-05, "loss": 0.0043, "step": 22794 }, { "epoch": 10.597861459786145, "grad_norm": 0.43957701325416565, "learning_rate": 1.7125337008617342e-05, "loss": 0.0055, "step": 22796 }, { "epoch": 10.598791259879127, "grad_norm": 0.20671026408672333, "learning_rate": 1.719991758278963e-05, "loss": 0.0104, "step": 22798 }, { "epoch": 10.599721059972106, "grad_norm": 0.5289489030838013, "learning_rate": 1.7274575140626182e-05, "loss": 0.0107, "step": 22800 }, { "epoch": 10.600650860065086, "grad_norm": 0.9579562544822693, "learning_rate": 1.7349308945287382e-05, "loss": 0.0094, "step": 22802 }, { "epoch": 10.601580660158065, "grad_norm": 0.31517085433006287, "learning_rate": 1.742411825918059e-05, "loss": 0.0084, "step": 22804 }, { "epoch": 10.602510460251047, "grad_norm": 0.1506280153989792, "learning_rate": 1.7499002343967898e-05, "loss": 0.0033, "step": 22806 }, { "epoch": 10.603440260344026, "grad_norm": 0.15492600202560425, "learning_rate": 1.7573960460573974e-05, "loss": 0.0056, "step": 22808 }, { "epoch": 10.604370060437006, "grad_norm": 0.7925997376441956, "learning_rate": 1.7648991869192456e-05, "loss": 0.0136, "step": 22810 }, { "epoch": 10.605299860529986, "grad_norm": 1.036083698272705, "learning_rate": 1.7724095829293297e-05, "loss": 0.0073, "step": 22812 }, { "epoch": 10.606229660622965, "grad_norm": 0.1985749900341034, "learning_rate": 1.779927159963073e-05, "loss": 0.0036, "step": 22814 }, { "epoch": 10.607159460715947, "grad_norm": 0.21001023054122925, "learning_rate": 1.7874518438250594e-05, "loss": 0.0069, "step": 22816 }, { "epoch": 10.608089260808926, "grad_norm": 1.8213242292404175, "learning_rate": 1.79498356024968e-05, "loss": 0.022, "step": 22818 }, { "epoch": 10.609019060901906, "grad_norm": 0.08736580610275269, "learning_rate": 1.802522234901917e-05, "loss": 0.0115, "step": 22820 }, { "epoch": 10.609948860994885, "grad_norm": 0.1729145497083664, "learning_rate": 1.8100677933781296e-05, "loss": 0.0039, "step": 22822 }, { "epoch": 10.610878661087867, "grad_norm": 1.1554335355758667, "learning_rate": 1.8176201612066845e-05, "loss": 0.0068, "step": 22824 }, { "epoch": 10.611808461180846, "grad_norm": 0.2675023078918457, "learning_rate": 1.8251792638487434e-05, "loss": 0.0065, "step": 22826 }, { "epoch": 10.612738261273826, "grad_norm": 0.43095239996910095, "learning_rate": 1.8327450266990494e-05, "loss": 0.0051, "step": 22828 }, { "epoch": 10.613668061366806, "grad_norm": 1.2364130020141602, "learning_rate": 1.8403173750865597e-05, "loss": 0.0188, "step": 22830 }, { "epoch": 10.614597861459787, "grad_norm": 0.6728449463844299, "learning_rate": 1.8478962342752536e-05, "loss": 0.013, "step": 22832 }, { "epoch": 10.615527661552767, "grad_norm": 0.46822014451026917, "learning_rate": 1.8554815294648327e-05, "loss": 0.0111, "step": 22834 }, { "epoch": 10.616457461645746, "grad_norm": 0.12614677846431732, "learning_rate": 1.863073185791531e-05, "loss": 0.0108, "step": 22836 }, { "epoch": 10.617387261738726, "grad_norm": 0.11386823654174805, "learning_rate": 1.8706711283287647e-05, "loss": 0.0037, "step": 22838 }, { "epoch": 10.618317061831707, "grad_norm": 0.24477499723434448, "learning_rate": 1.878275282087857e-05, "loss": 0.006, "step": 22840 }, { "epoch": 10.619246861924687, "grad_norm": 0.17588980495929718, "learning_rate": 1.885885572018932e-05, "loss": 0.0038, "step": 22842 }, { "epoch": 10.620176662017666, "grad_norm": 0.14097468554973602, "learning_rate": 1.893501923011483e-05, "loss": 0.0071, "step": 22844 }, { "epoch": 10.621106462110646, "grad_norm": 0.2770836651325226, "learning_rate": 1.901124259895201e-05, "loss": 0.0096, "step": 22846 }, { "epoch": 10.622036262203626, "grad_norm": 1.391757845878601, "learning_rate": 1.90875250744068e-05, "loss": 0.0115, "step": 22848 }, { "epoch": 10.622966062296607, "grad_norm": 0.7563337683677673, "learning_rate": 1.916386590360232e-05, "loss": 0.0044, "step": 22850 }, { "epoch": 10.623895862389586, "grad_norm": 0.12931643426418304, "learning_rate": 1.9240264333085225e-05, "loss": 0.0037, "step": 22852 }, { "epoch": 10.624825662482566, "grad_norm": 0.25058016180992126, "learning_rate": 1.93167196088337e-05, "loss": 0.0102, "step": 22854 }, { "epoch": 10.625755462575546, "grad_norm": 0.20004801452159882, "learning_rate": 1.939323097626537e-05, "loss": 0.0105, "step": 22856 }, { "epoch": 10.626685262668527, "grad_norm": 0.7300770282745361, "learning_rate": 1.9469797680243754e-05, "loss": 0.0113, "step": 22858 }, { "epoch": 10.627615062761507, "grad_norm": 1.0763893127441406, "learning_rate": 1.9546418965086235e-05, "loss": 0.0181, "step": 22860 }, { "epoch": 10.628544862854486, "grad_norm": 0.2725412845611572, "learning_rate": 1.9623094074572007e-05, "loss": 0.0128, "step": 22862 }, { "epoch": 10.629474662947466, "grad_norm": 0.17217792570590973, "learning_rate": 1.9699822251948684e-05, "loss": 0.01, "step": 22864 }, { "epoch": 10.630404463040446, "grad_norm": 0.2010309398174286, "learning_rate": 1.9776602739939792e-05, "loss": 0.0073, "step": 22866 }, { "epoch": 10.631334263133427, "grad_norm": 0.8487581610679626, "learning_rate": 1.985343478075292e-05, "loss": 0.0259, "step": 22868 }, { "epoch": 10.632264063226406, "grad_norm": 4.195456504821777, "learning_rate": 1.9930317616087176e-05, "loss": 0.0221, "step": 22870 }, { "epoch": 10.633193863319386, "grad_norm": 1.6786143779754639, "learning_rate": 2.0007250487139853e-05, "loss": 0.0154, "step": 22872 }, { "epoch": 10.634123663412366, "grad_norm": 0.570094108581543, "learning_rate": 2.0084232634614388e-05, "loss": 0.0193, "step": 22874 }, { "epoch": 10.635053463505347, "grad_norm": 0.7358109951019287, "learning_rate": 2.0161263298728424e-05, "loss": 0.016, "step": 22876 }, { "epoch": 10.635983263598327, "grad_norm": 0.7672385573387146, "learning_rate": 2.023834171922022e-05, "loss": 0.0107, "step": 22878 }, { "epoch": 10.636913063691306, "grad_norm": 0.6218075752258301, "learning_rate": 2.0315467135356887e-05, "loss": 0.0056, "step": 22880 }, { "epoch": 10.637842863784286, "grad_norm": 0.23817703127861023, "learning_rate": 2.0392638785941535e-05, "loss": 0.0071, "step": 22882 }, { "epoch": 10.638772663877266, "grad_norm": 0.23405565321445465, "learning_rate": 2.0469855909321476e-05, "loss": 0.0042, "step": 22884 }, { "epoch": 10.639702463970247, "grad_norm": 0.6403294205665588, "learning_rate": 2.0547117743394693e-05, "loss": 0.0125, "step": 22886 }, { "epoch": 10.640632264063226, "grad_norm": 0.6268934011459351, "learning_rate": 2.062442352561791e-05, "loss": 0.007, "step": 22888 }, { "epoch": 10.641562064156206, "grad_norm": 0.16105586290359497, "learning_rate": 2.0701772493014614e-05, "loss": 0.007, "step": 22890 }, { "epoch": 10.642491864249186, "grad_norm": 1.0119338035583496, "learning_rate": 2.0779163882181716e-05, "loss": 0.0127, "step": 22892 }, { "epoch": 10.643421664342167, "grad_norm": 0.18098635971546173, "learning_rate": 2.085659692929693e-05, "loss": 0.008, "step": 22894 }, { "epoch": 10.644351464435147, "grad_norm": 0.22407926619052887, "learning_rate": 2.0934070870127874e-05, "loss": 0.0037, "step": 22896 }, { "epoch": 10.645281264528126, "grad_norm": 0.8223317265510559, "learning_rate": 2.1011584940037846e-05, "loss": 0.0165, "step": 22898 }, { "epoch": 10.646211064621106, "grad_norm": 0.6572918891906738, "learning_rate": 2.1089138373994274e-05, "loss": 0.0143, "step": 22900 }, { "epoch": 10.647140864714087, "grad_norm": 0.4221748113632202, "learning_rate": 2.1166730406575893e-05, "loss": 0.0109, "step": 22902 }, { "epoch": 10.648070664807067, "grad_norm": 0.7956501841545105, "learning_rate": 2.124436027198102e-05, "loss": 0.0166, "step": 22904 }, { "epoch": 10.649000464900046, "grad_norm": 0.22467471659183502, "learning_rate": 2.1322027204034056e-05, "loss": 0.005, "step": 22906 }, { "epoch": 10.649930264993026, "grad_norm": 0.11025435477495193, "learning_rate": 2.1399730436193548e-05, "loss": 0.0019, "step": 22908 }, { "epoch": 10.650860065086006, "grad_norm": 1.105094075202942, "learning_rate": 2.1477469201560327e-05, "loss": 0.0132, "step": 22910 }, { "epoch": 10.651789865178987, "grad_norm": 0.1728152483701706, "learning_rate": 2.1555242732883977e-05, "loss": 0.0086, "step": 22912 }, { "epoch": 10.652719665271967, "grad_norm": 0.2071048468351364, "learning_rate": 2.1633050262570982e-05, "loss": 0.0068, "step": 22914 }, { "epoch": 10.653649465364946, "grad_norm": 0.36849918961524963, "learning_rate": 2.1710891022692773e-05, "loss": 0.0076, "step": 22916 }, { "epoch": 10.654579265457926, "grad_norm": 0.36251717805862427, "learning_rate": 2.1788764244992305e-05, "loss": 0.0124, "step": 22918 }, { "epoch": 10.655509065550907, "grad_norm": 0.8086998462677002, "learning_rate": 2.1866669160892486e-05, "loss": 0.0077, "step": 22920 }, { "epoch": 10.656438865643887, "grad_norm": 1.656042456626892, "learning_rate": 2.1944605001502718e-05, "loss": 0.0187, "step": 22922 }, { "epoch": 10.657368665736866, "grad_norm": 0.1510259360074997, "learning_rate": 2.2022570997628246e-05, "loss": 0.0039, "step": 22924 }, { "epoch": 10.658298465829846, "grad_norm": 0.8558750152587891, "learning_rate": 2.2100566379775994e-05, "loss": 0.0109, "step": 22926 }, { "epoch": 10.659228265922827, "grad_norm": 0.3504299819469452, "learning_rate": 2.2178590378162847e-05, "loss": 0.0075, "step": 22928 }, { "epoch": 10.660158066015807, "grad_norm": 0.2532398998737335, "learning_rate": 2.2256642222723798e-05, "loss": 0.0053, "step": 22930 }, { "epoch": 10.661087866108787, "grad_norm": 0.16361813247203827, "learning_rate": 2.233472114311847e-05, "loss": 0.0107, "step": 22932 }, { "epoch": 10.662017666201766, "grad_norm": 0.20479516685009003, "learning_rate": 2.241282636873945e-05, "loss": 0.005, "step": 22934 }, { "epoch": 10.662947466294746, "grad_norm": 0.9936437010765076, "learning_rate": 2.2490957128719502e-05, "loss": 0.017, "step": 22936 }, { "epoch": 10.663877266387727, "grad_norm": 0.22494655847549438, "learning_rate": 2.2569112651939924e-05, "loss": 0.0256, "step": 22938 }, { "epoch": 10.664807066480707, "grad_norm": 0.8132175207138062, "learning_rate": 2.2647292167037087e-05, "loss": 0.0106, "step": 22940 }, { "epoch": 10.665736866573686, "grad_norm": 1.3903089761734009, "learning_rate": 2.2725494902410557e-05, "loss": 0.0245, "step": 22942 }, { "epoch": 10.666666666666666, "grad_norm": 1.3674107789993286, "learning_rate": 2.2803720086231262e-05, "loss": 0.0178, "step": 22944 }, { "epoch": 10.667596466759647, "grad_norm": 0.6414633393287659, "learning_rate": 2.288196694644822e-05, "loss": 0.0109, "step": 22946 }, { "epoch": 10.668526266852627, "grad_norm": 1.0026284456253052, "learning_rate": 2.2960234710795982e-05, "loss": 0.0183, "step": 22948 }, { "epoch": 10.669456066945607, "grad_norm": 0.8474233150482178, "learning_rate": 2.3038522606803836e-05, "loss": 0.0095, "step": 22950 }, { "epoch": 10.670385867038586, "grad_norm": 0.7837043404579163, "learning_rate": 2.3116829861801683e-05, "loss": 0.0123, "step": 22952 }, { "epoch": 10.671315667131566, "grad_norm": 0.16905754804611206, "learning_rate": 2.3195155702928528e-05, "loss": 0.0067, "step": 22954 }, { "epoch": 10.672245467224547, "grad_norm": 0.3487825095653534, "learning_rate": 2.327349935713976e-05, "loss": 0.0099, "step": 22956 }, { "epoch": 10.673175267317527, "grad_norm": 1.1940475702285767, "learning_rate": 2.3351860051215498e-05, "loss": 0.0202, "step": 22958 }, { "epoch": 10.674105067410506, "grad_norm": 0.12465672940015793, "learning_rate": 2.3430237011767147e-05, "loss": 0.015, "step": 22960 }, { "epoch": 10.675034867503486, "grad_norm": 0.6752698421478271, "learning_rate": 2.3508629465245582e-05, "loss": 0.0255, "step": 22962 }, { "epoch": 10.675964667596467, "grad_norm": 1.2607554197311401, "learning_rate": 2.3587036637949276e-05, "loss": 0.0216, "step": 22964 }, { "epoch": 10.676894467689447, "grad_norm": 0.4245295524597168, "learning_rate": 2.3665457756030914e-05, "loss": 0.0103, "step": 22966 }, { "epoch": 10.677824267782427, "grad_norm": 0.9590421319007874, "learning_rate": 2.374389204550573e-05, "loss": 0.0105, "step": 22968 }, { "epoch": 10.678754067875406, "grad_norm": 1.3745394945144653, "learning_rate": 2.382233873225876e-05, "loss": 0.0183, "step": 22970 }, { "epoch": 10.679683867968386, "grad_norm": 0.8555580973625183, "learning_rate": 2.3900797042053246e-05, "loss": 0.0104, "step": 22972 }, { "epoch": 10.680613668061367, "grad_norm": 1.115627646446228, "learning_rate": 2.3979266200537337e-05, "loss": 0.0162, "step": 22974 }, { "epoch": 10.681543468154347, "grad_norm": 1.3790943622589111, "learning_rate": 2.4057745433251587e-05, "loss": 0.0141, "step": 22976 }, { "epoch": 10.682473268247326, "grad_norm": 0.5103768706321716, "learning_rate": 2.4136233965638185e-05, "loss": 0.0112, "step": 22978 }, { "epoch": 10.683403068340306, "grad_norm": 1.9008800983428955, "learning_rate": 2.4214731023046827e-05, "loss": 0.0193, "step": 22980 }, { "epoch": 10.684332868433287, "grad_norm": 0.3564934730529785, "learning_rate": 2.4293235830743063e-05, "loss": 0.0087, "step": 22982 }, { "epoch": 10.685262668526267, "grad_norm": 0.21232077479362488, "learning_rate": 2.4371747613916498e-05, "loss": 0.0051, "step": 22984 }, { "epoch": 10.686192468619247, "grad_norm": 0.8195787072181702, "learning_rate": 2.4450265597687345e-05, "loss": 0.0101, "step": 22986 }, { "epoch": 10.687122268712226, "grad_norm": 1.2497893571853638, "learning_rate": 2.452878900711481e-05, "loss": 0.0221, "step": 22988 }, { "epoch": 10.688052068805208, "grad_norm": 0.7467361688613892, "learning_rate": 2.4607317067204364e-05, "loss": 0.0105, "step": 22990 }, { "epoch": 10.688981868898187, "grad_norm": 0.9449247121810913, "learning_rate": 2.46858490029161e-05, "loss": 0.0083, "step": 22992 }, { "epoch": 10.689911668991167, "grad_norm": 0.8456018567085266, "learning_rate": 2.476438403917136e-05, "loss": 0.0178, "step": 22994 }, { "epoch": 10.690841469084146, "grad_norm": 0.5778029561042786, "learning_rate": 2.4842921400860846e-05, "loss": 0.0075, "step": 22996 }, { "epoch": 10.691771269177128, "grad_norm": 0.7022621631622314, "learning_rate": 2.492146031285287e-05, "loss": 0.0083, "step": 22998 }, { "epoch": 10.692701069270107, "grad_norm": 1.2288038730621338, "learning_rate": 2.4999999999999896e-05, "loss": 0.0097, "step": 23000 }, { "epoch": 10.692701069270107, "eval_cer": 0.1288261980497471, "eval_loss": 0.20956149697303772, "eval_runtime": 398.3378, "eval_samples_per_second": 31.867, "eval_steps_per_second": 0.997, "step": 23000 }, { "epoch": 10.693630869363087, "grad_norm": 0.8201067447662354, "learning_rate": 2.507853968714693e-05, "loss": 0.0403, "step": 23002 }, { "epoch": 10.694560669456067, "grad_norm": 1.2071304321289062, "learning_rate": 2.5157078599138953e-05, "loss": 0.0298, "step": 23004 }, { "epoch": 10.695490469549046, "grad_norm": 0.7394473552703857, "learning_rate": 2.523561596082862e-05, "loss": 0.0187, "step": 23006 }, { "epoch": 10.696420269642028, "grad_norm": 0.7067710757255554, "learning_rate": 2.5314150997083874e-05, "loss": 0.0142, "step": 23008 }, { "epoch": 10.697350069735007, "grad_norm": 1.4752377271652222, "learning_rate": 2.5392682932795435e-05, "loss": 0.0222, "step": 23010 }, { "epoch": 10.698279869827987, "grad_norm": 1.1638498306274414, "learning_rate": 2.5471210992885168e-05, "loss": 0.0188, "step": 23012 }, { "epoch": 10.699209669920966, "grad_norm": 0.23454156517982483, "learning_rate": 2.554973440231263e-05, "loss": 0.0299, "step": 23014 }, { "epoch": 10.700139470013948, "grad_norm": 1.0957752466201782, "learning_rate": 2.56282523860833e-05, "loss": 0.0098, "step": 23016 }, { "epoch": 10.701069270106927, "grad_norm": 0.4520934820175171, "learning_rate": 2.5706764169256733e-05, "loss": 0.0081, "step": 23018 }, { "epoch": 10.701999070199907, "grad_norm": 1.0784863233566284, "learning_rate": 2.5785268976953152e-05, "loss": 0.0289, "step": 23020 }, { "epoch": 10.702928870292887, "grad_norm": 0.6399142742156982, "learning_rate": 2.586376603436179e-05, "loss": 0.0109, "step": 23022 }, { "epoch": 10.703858670385866, "grad_norm": 1.7582039833068848, "learning_rate": 2.5942254566748208e-05, "loss": 0.0158, "step": 23024 }, { "epoch": 10.704788470478848, "grad_norm": 0.6165465116500854, "learning_rate": 2.6020733799462635e-05, "loss": 0.0088, "step": 23026 }, { "epoch": 10.705718270571827, "grad_norm": 0.17531093955039978, "learning_rate": 2.6099202957946553e-05, "loss": 0.0193, "step": 23028 }, { "epoch": 10.706648070664807, "grad_norm": 0.3332619369029999, "learning_rate": 2.617766126774103e-05, "loss": 0.0073, "step": 23030 }, { "epoch": 10.707577870757786, "grad_norm": 0.3554869294166565, "learning_rate": 2.6256107954494242e-05, "loss": 0.0171, "step": 23032 }, { "epoch": 10.708507670850768, "grad_norm": 0.5297207236289978, "learning_rate": 2.6334542243969058e-05, "loss": 0.0106, "step": 23034 }, { "epoch": 10.709437470943747, "grad_norm": 1.2479946613311768, "learning_rate": 2.641296336205052e-05, "loss": 0.0208, "step": 23036 }, { "epoch": 10.710367271036727, "grad_norm": 1.4682824611663818, "learning_rate": 2.649137053475421e-05, "loss": 0.0136, "step": 23038 }, { "epoch": 10.711297071129707, "grad_norm": 1.990801215171814, "learning_rate": 2.6569762988232818e-05, "loss": 0.0159, "step": 23040 }, { "epoch": 10.712226871222686, "grad_norm": 0.7166390419006348, "learning_rate": 2.6648139948784477e-05, "loss": 0.0119, "step": 23042 }, { "epoch": 10.713156671315668, "grad_norm": 1.4081639051437378, "learning_rate": 2.672650064286004e-05, "loss": 0.0106, "step": 23044 }, { "epoch": 10.714086471408647, "grad_norm": 0.9663865566253662, "learning_rate": 2.6804844297071448e-05, "loss": 0.0348, "step": 23046 }, { "epoch": 10.715016271501627, "grad_norm": 0.2907750606536865, "learning_rate": 2.688317013819829e-05, "loss": 0.0068, "step": 23048 }, { "epoch": 10.715946071594606, "grad_norm": 0.7330861687660217, "learning_rate": 2.696147739319596e-05, "loss": 0.0406, "step": 23050 }, { "epoch": 10.716875871687588, "grad_norm": 0.7794568538665771, "learning_rate": 2.7039765289203814e-05, "loss": 0.019, "step": 23052 }, { "epoch": 10.717805671780567, "grad_norm": 0.6224231123924255, "learning_rate": 2.7118033053551748e-05, "loss": 0.0307, "step": 23054 }, { "epoch": 10.718735471873547, "grad_norm": 0.7219163775444031, "learning_rate": 2.7196279913768706e-05, "loss": 0.01, "step": 23056 }, { "epoch": 10.719665271966527, "grad_norm": 0.70976322889328, "learning_rate": 2.7274505097589232e-05, "loss": 0.0127, "step": 23058 }, { "epoch": 10.720595072059508, "grad_norm": 1.3702698945999146, "learning_rate": 2.7352707832962885e-05, "loss": 0.0225, "step": 23060 }, { "epoch": 10.721524872152488, "grad_norm": 0.7564202547073364, "learning_rate": 2.7430887348060054e-05, "loss": 0.0102, "step": 23062 }, { "epoch": 10.722454672245467, "grad_norm": 0.3703429400920868, "learning_rate": 2.7509042871280307e-05, "loss": 0.0403, "step": 23064 }, { "epoch": 10.723384472338447, "grad_norm": 1.518027901649475, "learning_rate": 2.758717363126054e-05, "loss": 0.0339, "step": 23066 }, { "epoch": 10.724314272431426, "grad_norm": 0.5072012543678284, "learning_rate": 2.766527885688152e-05, "loss": 0.0074, "step": 23068 }, { "epoch": 10.725244072524408, "grad_norm": 0.4646470248699188, "learning_rate": 2.7743357777276018e-05, "loss": 0.0119, "step": 23070 }, { "epoch": 10.726173872617387, "grad_norm": 0.1276303231716156, "learning_rate": 2.7821409621836965e-05, "loss": 0.0044, "step": 23072 }, { "epoch": 10.727103672710367, "grad_norm": 0.8329805731773376, "learning_rate": 2.7899433620224e-05, "loss": 0.0178, "step": 23074 }, { "epoch": 10.728033472803347, "grad_norm": 0.4759310185909271, "learning_rate": 2.7977429002371747e-05, "loss": 0.0114, "step": 23076 }, { "epoch": 10.728963272896328, "grad_norm": 0.75608891248703, "learning_rate": 2.8055394998497108e-05, "loss": 0.0159, "step": 23078 }, { "epoch": 10.729893072989308, "grad_norm": 0.3132500946521759, "learning_rate": 2.813333083910751e-05, "loss": 0.0134, "step": 23080 }, { "epoch": 10.730822873082287, "grad_norm": 0.8404637575149536, "learning_rate": 2.8211235755007518e-05, "loss": 0.0075, "step": 23082 }, { "epoch": 10.731752673175267, "grad_norm": 0.21625874936580658, "learning_rate": 2.8289108977307043e-05, "loss": 0.0117, "step": 23084 }, { "epoch": 10.732682473268248, "grad_norm": 0.2655988931655884, "learning_rate": 2.836694973742884e-05, "loss": 0.0146, "step": 23086 }, { "epoch": 10.733612273361228, "grad_norm": 0.6322298645973206, "learning_rate": 2.8444757267116012e-05, "loss": 0.0092, "step": 23088 }, { "epoch": 10.734542073454207, "grad_norm": 1.0489224195480347, "learning_rate": 2.8522530798439493e-05, "loss": 0.0228, "step": 23090 }, { "epoch": 10.735471873547187, "grad_norm": 0.38567474484443665, "learning_rate": 2.860026956380627e-05, "loss": 0.0089, "step": 23092 }, { "epoch": 10.736401673640167, "grad_norm": 1.0016534328460693, "learning_rate": 2.8677972795965936e-05, "loss": 0.0114, "step": 23094 }, { "epoch": 10.737331473733148, "grad_norm": 0.3353368937969208, "learning_rate": 2.875563972801897e-05, "loss": 0.0147, "step": 23096 }, { "epoch": 10.738261273826128, "grad_norm": 0.5315009951591492, "learning_rate": 2.8833269593423926e-05, "loss": 0.0255, "step": 23098 }, { "epoch": 10.739191073919107, "grad_norm": 0.3518451452255249, "learning_rate": 2.8910861626005718e-05, "loss": 0.0143, "step": 23100 }, { "epoch": 10.740120874012087, "grad_norm": 0.5111885070800781, "learning_rate": 2.8988415059962146e-05, "loss": 0.0089, "step": 23102 }, { "epoch": 10.741050674105068, "grad_norm": 0.19525249302387238, "learning_rate": 2.906592912987194e-05, "loss": 0.0223, "step": 23104 }, { "epoch": 10.741980474198048, "grad_norm": 3.24288272857666, "learning_rate": 2.9143403070702878e-05, "loss": 0.0375, "step": 23106 }, { "epoch": 10.742910274291027, "grad_norm": 2.0523757934570312, "learning_rate": 2.9220836117818276e-05, "loss": 0.0185, "step": 23108 }, { "epoch": 10.743840074384007, "grad_norm": 0.6690067648887634, "learning_rate": 2.92982275069852e-05, "loss": 0.0074, "step": 23110 }, { "epoch": 10.744769874476987, "grad_norm": 1.7016727924346924, "learning_rate": 2.9375576474381908e-05, "loss": 0.014, "step": 23112 }, { "epoch": 10.745699674569968, "grad_norm": 0.26203417778015137, "learning_rate": 2.94528822566053e-05, "loss": 0.0128, "step": 23114 }, { "epoch": 10.746629474662948, "grad_norm": 1.0793721675872803, "learning_rate": 2.9530144090678513e-05, "loss": 0.0199, "step": 23116 }, { "epoch": 10.747559274755927, "grad_norm": 0.21389827132225037, "learning_rate": 2.9607361214058277e-05, "loss": 0.0046, "step": 23118 }, { "epoch": 10.748489074848907, "grad_norm": 0.7717699408531189, "learning_rate": 2.96845328646431e-05, "loss": 0.0184, "step": 23120 }, { "epoch": 10.749418874941888, "grad_norm": 0.17971758544445038, "learning_rate": 2.976165828077977e-05, "loss": 0.0092, "step": 23122 }, { "epoch": 10.750348675034868, "grad_norm": 0.9746326804161072, "learning_rate": 2.9838736701271396e-05, "loss": 0.0184, "step": 23124 }, { "epoch": 10.751278475127847, "grad_norm": 0.7459456324577332, "learning_rate": 2.991576736538542e-05, "loss": 0.0434, "step": 23126 }, { "epoch": 10.752208275220827, "grad_norm": 1.0268982648849487, "learning_rate": 2.999274951286014e-05, "loss": 0.0167, "step": 23128 }, { "epoch": 10.753138075313807, "grad_norm": 0.8809245228767395, "learning_rate": 3.006968238391282e-05, "loss": 0.0169, "step": 23130 }, { "epoch": 10.754067875406788, "grad_norm": 0.20097997784614563, "learning_rate": 3.0146565219246904e-05, "loss": 0.0061, "step": 23132 }, { "epoch": 10.754997675499768, "grad_norm": 1.3820667266845703, "learning_rate": 3.02233972600602e-05, "loss": 0.0331, "step": 23134 }, { "epoch": 10.755927475592747, "grad_norm": 1.5444587469100952, "learning_rate": 3.030017774805132e-05, "loss": 0.0148, "step": 23136 }, { "epoch": 10.756857275685727, "grad_norm": 0.6607491970062256, "learning_rate": 3.0376905925427653e-05, "loss": 0.0113, "step": 23138 }, { "epoch": 10.757787075778708, "grad_norm": 1.8618332147598267, "learning_rate": 3.0453581034913594e-05, "loss": 0.0212, "step": 23140 }, { "epoch": 10.758716875871688, "grad_norm": 1.918606162071228, "learning_rate": 3.0530202319756245e-05, "loss": 0.0442, "step": 23142 }, { "epoch": 10.759646675964667, "grad_norm": 1.3806170225143433, "learning_rate": 3.060676902373463e-05, "loss": 0.0309, "step": 23144 }, { "epoch": 10.760576476057647, "grad_norm": 0.9517885446548462, "learning_rate": 3.068328039116613e-05, "loss": 0.012, "step": 23146 }, { "epoch": 10.761506276150628, "grad_norm": 1.4554312229156494, "learning_rate": 3.075973566691477e-05, "loss": 0.0225, "step": 23148 }, { "epoch": 10.762436076243608, "grad_norm": 1.292619228363037, "learning_rate": 3.083613409639768e-05, "loss": 0.0207, "step": 23150 }, { "epoch": 10.763365876336588, "grad_norm": 1.2737162113189697, "learning_rate": 3.0912474925593025e-05, "loss": 0.0208, "step": 23152 }, { "epoch": 10.764295676429567, "grad_norm": 0.6569473743438721, "learning_rate": 3.098875740104799e-05, "loss": 0.0159, "step": 23154 }, { "epoch": 10.765225476522549, "grad_norm": 1.8764636516571045, "learning_rate": 3.1064980769885166e-05, "loss": 0.0335, "step": 23156 }, { "epoch": 10.766155276615528, "grad_norm": 1.7928638458251953, "learning_rate": 3.11411442798105e-05, "loss": 0.0209, "step": 23158 }, { "epoch": 10.767085076708508, "grad_norm": 1.0664901733398438, "learning_rate": 3.121724717912125e-05, "loss": 0.0464, "step": 23160 }, { "epoch": 10.768014876801487, "grad_norm": 0.23072189092636108, "learning_rate": 3.129328871671234e-05, "loss": 0.0117, "step": 23162 }, { "epoch": 10.768944676894467, "grad_norm": 1.3933056592941284, "learning_rate": 3.13692681420845e-05, "loss": 0.0267, "step": 23164 }, { "epoch": 10.769874476987448, "grad_norm": 1.8457368612289429, "learning_rate": 3.1445184705351486e-05, "loss": 0.0397, "step": 23166 }, { "epoch": 10.770804277080428, "grad_norm": 1.6150047779083252, "learning_rate": 3.152103765724745e-05, "loss": 0.0357, "step": 23168 }, { "epoch": 10.771734077173408, "grad_norm": 1.1908050775527954, "learning_rate": 3.1596826249134385e-05, "loss": 0.0174, "step": 23170 }, { "epoch": 10.772663877266387, "grad_norm": 1.2122341394424438, "learning_rate": 3.167254973300933e-05, "loss": 0.0122, "step": 23172 }, { "epoch": 10.773593677359369, "grad_norm": 0.6211990118026733, "learning_rate": 3.174820736151239e-05, "loss": 0.0204, "step": 23174 }, { "epoch": 10.774523477452348, "grad_norm": 1.8446499109268188, "learning_rate": 3.1823798387933154e-05, "loss": 0.0275, "step": 23176 }, { "epoch": 10.775453277545328, "grad_norm": 1.3060389757156372, "learning_rate": 3.189932206621854e-05, "loss": 0.0279, "step": 23178 }, { "epoch": 10.776383077638307, "grad_norm": 0.9895806908607483, "learning_rate": 3.197477765098066e-05, "loss": 0.0131, "step": 23180 }, { "epoch": 10.777312877731287, "grad_norm": 0.41785117983818054, "learning_rate": 3.2050164397503206e-05, "loss": 0.0116, "step": 23182 }, { "epoch": 10.778242677824268, "grad_norm": 0.5234819650650024, "learning_rate": 3.2125481561749415e-05, "loss": 0.0223, "step": 23184 }, { "epoch": 10.779172477917248, "grad_norm": 0.19119277596473694, "learning_rate": 3.2200728400369104e-05, "loss": 0.0172, "step": 23186 }, { "epoch": 10.780102278010228, "grad_norm": 0.6999291181564331, "learning_rate": 3.227590417070671e-05, "loss": 0.0342, "step": 23188 }, { "epoch": 10.781032078103207, "grad_norm": 0.8047962188720703, "learning_rate": 3.2351008130807546e-05, "loss": 0.018, "step": 23190 }, { "epoch": 10.781961878196189, "grad_norm": 1.6124354600906372, "learning_rate": 3.242603953942569e-05, "loss": 0.0283, "step": 23192 }, { "epoch": 10.782891678289168, "grad_norm": 0.25415390729904175, "learning_rate": 3.250099765603193e-05, "loss": 0.0423, "step": 23194 }, { "epoch": 10.783821478382148, "grad_norm": 3.062241792678833, "learning_rate": 3.2575881740819416e-05, "loss": 0.0382, "step": 23196 }, { "epoch": 10.784751278475127, "grad_norm": 1.5903151035308838, "learning_rate": 3.265069105471263e-05, "loss": 0.0394, "step": 23198 }, { "epoch": 10.785681078568107, "grad_norm": 1.80011785030365, "learning_rate": 3.2725424859373664e-05, "loss": 0.0353, "step": 23200 }, { "epoch": 10.786610878661088, "grad_norm": 0.8013615608215332, "learning_rate": 3.280008241721038e-05, "loss": 0.0234, "step": 23202 }, { "epoch": 10.787540678754068, "grad_norm": 1.452552318572998, "learning_rate": 3.287466299138267e-05, "loss": 0.0703, "step": 23204 }, { "epoch": 10.788470478847048, "grad_norm": 0.7948247790336609, "learning_rate": 3.2949165845810195e-05, "loss": 0.024, "step": 23206 }, { "epoch": 10.789400278940027, "grad_norm": 0.6174435019493103, "learning_rate": 3.30235902451802e-05, "loss": 0.0178, "step": 23208 }, { "epoch": 10.790330079033009, "grad_norm": 0.6563622355461121, "learning_rate": 3.309793545495373e-05, "loss": 0.0136, "step": 23210 }, { "epoch": 10.791259879125988, "grad_norm": 0.4865036904811859, "learning_rate": 3.3172200741373414e-05, "loss": 0.0272, "step": 23212 }, { "epoch": 10.792189679218968, "grad_norm": 1.9769511222839355, "learning_rate": 3.324638537147122e-05, "loss": 0.0368, "step": 23214 }, { "epoch": 10.793119479311947, "grad_norm": 0.7835564613342285, "learning_rate": 3.3320488613074595e-05, "loss": 0.0169, "step": 23216 }, { "epoch": 10.794049279404929, "grad_norm": 1.3801062107086182, "learning_rate": 3.339450973481448e-05, "loss": 0.0319, "step": 23218 }, { "epoch": 10.794979079497908, "grad_norm": 1.1283174753189087, "learning_rate": 3.346844800613212e-05, "loss": 0.022, "step": 23220 }, { "epoch": 10.795908879590888, "grad_norm": 1.5818651914596558, "learning_rate": 3.354230269728712e-05, "loss": 0.037, "step": 23222 }, { "epoch": 10.796838679683868, "grad_norm": 1.1770697832107544, "learning_rate": 3.361607307936301e-05, "loss": 0.0274, "step": 23224 }, { "epoch": 10.797768479776847, "grad_norm": 1.335713505744934, "learning_rate": 3.368975842427587e-05, "loss": 0.0346, "step": 23226 }, { "epoch": 10.798698279869829, "grad_norm": 1.3631470203399658, "learning_rate": 3.376335800478146e-05, "loss": 0.0283, "step": 23228 }, { "epoch": 10.799628079962808, "grad_norm": 1.0382424592971802, "learning_rate": 3.383687109448145e-05, "loss": 0.0228, "step": 23230 }, { "epoch": 10.800557880055788, "grad_norm": 1.1371997594833374, "learning_rate": 3.3910296967831327e-05, "loss": 0.0272, "step": 23232 }, { "epoch": 10.801487680148767, "grad_norm": 1.0662325620651245, "learning_rate": 3.3983634900147196e-05, "loss": 0.0207, "step": 23234 }, { "epoch": 10.802417480241749, "grad_norm": 1.6556472778320312, "learning_rate": 3.4056884167613616e-05, "loss": 0.0236, "step": 23236 }, { "epoch": 10.803347280334728, "grad_norm": 0.5467392206192017, "learning_rate": 3.4130044047289714e-05, "loss": 0.0299, "step": 23238 }, { "epoch": 10.804277080427708, "grad_norm": 0.6582614779472351, "learning_rate": 3.420311381711685e-05, "loss": 0.0269, "step": 23240 }, { "epoch": 10.805206880520688, "grad_norm": 1.5426807403564453, "learning_rate": 3.427609275592621e-05, "loss": 0.0324, "step": 23242 }, { "epoch": 10.806136680613669, "grad_norm": 1.4474247694015503, "learning_rate": 3.434898014344498e-05, "loss": 0.0377, "step": 23244 }, { "epoch": 10.807066480706649, "grad_norm": 0.9902897477149963, "learning_rate": 3.44217752603039e-05, "loss": 0.0345, "step": 23246 }, { "epoch": 10.807996280799628, "grad_norm": 1.7291189432144165, "learning_rate": 3.449447738804506e-05, "loss": 0.034, "step": 23248 }, { "epoch": 10.808926080892608, "grad_norm": 0.3105907142162323, "learning_rate": 3.456708580912732e-05, "loss": 0.0254, "step": 23250 }, { "epoch": 10.809855880985587, "grad_norm": 0.5531914830207825, "learning_rate": 3.4639599806935014e-05, "loss": 0.0195, "step": 23252 }, { "epoch": 10.810785681078569, "grad_norm": 1.4289253950119019, "learning_rate": 3.471201866578413e-05, "loss": 0.0359, "step": 23254 }, { "epoch": 10.811715481171548, "grad_norm": 0.8539973497390747, "learning_rate": 3.478434167093007e-05, "loss": 0.0383, "step": 23256 }, { "epoch": 10.812645281264528, "grad_norm": 1.1409344673156738, "learning_rate": 3.485656810857383e-05, "loss": 0.0232, "step": 23258 }, { "epoch": 10.813575081357508, "grad_norm": 0.8635010719299316, "learning_rate": 3.492869726586944e-05, "loss": 0.0237, "step": 23260 }, { "epoch": 10.814504881450489, "grad_norm": 1.1317944526672363, "learning_rate": 3.500072843093158e-05, "loss": 0.0267, "step": 23262 }, { "epoch": 10.815434681543469, "grad_norm": 0.9290439486503601, "learning_rate": 3.5072660892841564e-05, "loss": 0.0202, "step": 23264 }, { "epoch": 10.816364481636448, "grad_norm": 2.046081304550171, "learning_rate": 3.5144493941654865e-05, "loss": 0.0405, "step": 23266 }, { "epoch": 10.817294281729428, "grad_norm": 1.0709718465805054, "learning_rate": 3.5216226868408624e-05, "loss": 0.0384, "step": 23268 }, { "epoch": 10.818224081822407, "grad_norm": 0.6689101457595825, "learning_rate": 3.528785896512765e-05, "loss": 0.0216, "step": 23270 }, { "epoch": 10.819153881915389, "grad_norm": 1.719362497329712, "learning_rate": 3.535938952483208e-05, "loss": 0.0259, "step": 23272 }, { "epoch": 10.820083682008368, "grad_norm": 1.3826162815093994, "learning_rate": 3.543081784154398e-05, "loss": 0.036, "step": 23274 }, { "epoch": 10.821013482101348, "grad_norm": 0.6848810911178589, "learning_rate": 3.55021432102952e-05, "loss": 0.0175, "step": 23276 }, { "epoch": 10.821943282194328, "grad_norm": 1.5425214767456055, "learning_rate": 3.5573364927132654e-05, "loss": 0.0237, "step": 23278 }, { "epoch": 10.822873082287309, "grad_norm": 2.488790273666382, "learning_rate": 3.5644482289126766e-05, "loss": 0.054, "step": 23280 }, { "epoch": 10.823802882380289, "grad_norm": 1.580936074256897, "learning_rate": 3.5715494594378195e-05, "loss": 0.0401, "step": 23282 }, { "epoch": 10.824732682473268, "grad_norm": 2.3398733139038086, "learning_rate": 3.5786401142024e-05, "loss": 0.0385, "step": 23284 }, { "epoch": 10.825662482566248, "grad_norm": 1.0624809265136719, "learning_rate": 3.5857201232245174e-05, "loss": 0.0183, "step": 23286 }, { "epoch": 10.826592282659227, "grad_norm": 1.9957388639450073, "learning_rate": 3.592789416627326e-05, "loss": 0.0308, "step": 23288 }, { "epoch": 10.827522082752209, "grad_norm": 1.56805419921875, "learning_rate": 3.599847924639785e-05, "loss": 0.0385, "step": 23290 }, { "epoch": 10.828451882845188, "grad_norm": 0.8486806750297546, "learning_rate": 3.606895577597255e-05, "loss": 0.0323, "step": 23292 }, { "epoch": 10.829381682938168, "grad_norm": 2.1476285457611084, "learning_rate": 3.613932305942229e-05, "loss": 0.0501, "step": 23294 }, { "epoch": 10.830311483031148, "grad_norm": 1.185922384262085, "learning_rate": 3.620958040225072e-05, "loss": 0.0309, "step": 23296 }, { "epoch": 10.831241283124129, "grad_norm": 1.1834073066711426, "learning_rate": 3.627972711104608e-05, "loss": 0.0272, "step": 23298 }, { "epoch": 10.832171083217109, "grad_norm": 1.3238006830215454, "learning_rate": 3.634976249348851e-05, "loss": 0.0313, "step": 23300 }, { "epoch": 10.833100883310088, "grad_norm": 1.917811632156372, "learning_rate": 3.641968585835736e-05, "loss": 0.0473, "step": 23302 }, { "epoch": 10.834030683403068, "grad_norm": 0.5110381245613098, "learning_rate": 3.6489496515537265e-05, "loss": 0.016, "step": 23304 }, { "epoch": 10.83496048349605, "grad_norm": 1.9546822309494019, "learning_rate": 3.6559193776024896e-05, "loss": 0.0359, "step": 23306 }, { "epoch": 10.835890283589029, "grad_norm": 0.4123917818069458, "learning_rate": 3.662877695193644e-05, "loss": 0.0125, "step": 23308 }, { "epoch": 10.836820083682008, "grad_norm": 1.931709885597229, "learning_rate": 3.669824535651435e-05, "loss": 0.0412, "step": 23310 }, { "epoch": 10.837749883774988, "grad_norm": 1.554097294807434, "learning_rate": 3.6767598304133364e-05, "loss": 0.0548, "step": 23312 }, { "epoch": 10.83867968386797, "grad_norm": 1.7622309923171997, "learning_rate": 3.683683511030773e-05, "loss": 0.0284, "step": 23314 }, { "epoch": 10.839609483960949, "grad_norm": 2.9314286708831787, "learning_rate": 3.6905955091698444e-05, "loss": 0.0808, "step": 23316 }, { "epoch": 10.840539284053929, "grad_norm": 0.4834071695804596, "learning_rate": 3.6974957566119024e-05, "loss": 0.0271, "step": 23318 }, { "epoch": 10.841469084146908, "grad_norm": 1.0810297727584839, "learning_rate": 3.7043841852542915e-05, "loss": 0.0206, "step": 23320 }, { "epoch": 10.842398884239888, "grad_norm": 1.2899446487426758, "learning_rate": 3.711260727110986e-05, "loss": 0.0168, "step": 23322 }, { "epoch": 10.84332868433287, "grad_norm": 1.7048248052597046, "learning_rate": 3.718125314313325e-05, "loss": 0.017, "step": 23324 }, { "epoch": 10.844258484425849, "grad_norm": 2.187329053878784, "learning_rate": 3.72497787911059e-05, "loss": 0.0528, "step": 23326 }, { "epoch": 10.845188284518828, "grad_norm": 0.5082609057426453, "learning_rate": 3.7318183538707145e-05, "loss": 0.0106, "step": 23328 }, { "epoch": 10.846118084611808, "grad_norm": 1.0169188976287842, "learning_rate": 3.738646671081008e-05, "loss": 0.025, "step": 23330 }, { "epoch": 10.84704788470479, "grad_norm": 1.8351515531539917, "learning_rate": 3.745462763348735e-05, "loss": 0.0284, "step": 23332 }, { "epoch": 10.847977684797769, "grad_norm": 2.3529560565948486, "learning_rate": 3.75226656340177e-05, "loss": 0.0538, "step": 23334 }, { "epoch": 10.848907484890749, "grad_norm": 1.448096752166748, "learning_rate": 3.7590580040894e-05, "loss": 0.031, "step": 23336 }, { "epoch": 10.849837284983728, "grad_norm": 1.9564043283462524, "learning_rate": 3.765837018382833e-05, "loss": 0.0405, "step": 23338 }, { "epoch": 10.850767085076708, "grad_norm": 1.294082522392273, "learning_rate": 3.7726035393759325e-05, "loss": 0.0507, "step": 23340 }, { "epoch": 10.851696885169689, "grad_norm": 1.5151736736297607, "learning_rate": 3.779357500285854e-05, "loss": 0.0141, "step": 23342 }, { "epoch": 10.852626685262669, "grad_norm": 0.5534736514091492, "learning_rate": 3.786098834453761e-05, "loss": 0.0228, "step": 23344 }, { "epoch": 10.853556485355648, "grad_norm": 0.8571767210960388, "learning_rate": 3.792827475345392e-05, "loss": 0.0213, "step": 23346 }, { "epoch": 10.854486285448628, "grad_norm": 2.2224838733673096, "learning_rate": 3.79954335655176e-05, "loss": 0.0289, "step": 23348 }, { "epoch": 10.85541608554161, "grad_norm": 1.1046444177627563, "learning_rate": 3.806246411789862e-05, "loss": 0.0344, "step": 23350 }, { "epoch": 10.856345885634589, "grad_norm": 1.1536287069320679, "learning_rate": 3.812936574903233e-05, "loss": 0.0278, "step": 23352 }, { "epoch": 10.857275685727569, "grad_norm": 1.663267731666565, "learning_rate": 3.819613779862648e-05, "loss": 0.0388, "step": 23354 }, { "epoch": 10.858205485820548, "grad_norm": 0.5055015087127686, "learning_rate": 3.826277960766821e-05, "loss": 0.0342, "step": 23356 }, { "epoch": 10.859135285913528, "grad_norm": 1.352064609527588, "learning_rate": 3.832929051842976e-05, "loss": 0.0424, "step": 23358 }, { "epoch": 10.860065086006509, "grad_norm": 1.791191816329956, "learning_rate": 3.839566987447499e-05, "loss": 0.0535, "step": 23360 }, { "epoch": 10.860994886099489, "grad_norm": 1.636345386505127, "learning_rate": 3.846191702066647e-05, "loss": 0.034, "step": 23362 }, { "epoch": 10.861924686192468, "grad_norm": 1.055253267288208, "learning_rate": 3.8528031303171895e-05, "loss": 0.0359, "step": 23364 }, { "epoch": 10.862854486285448, "grad_norm": 1.4091213941574097, "learning_rate": 3.8594012069469844e-05, "loss": 0.0297, "step": 23366 }, { "epoch": 10.86378428637843, "grad_norm": 1.362685203552246, "learning_rate": 3.8659858668356643e-05, "loss": 0.0244, "step": 23368 }, { "epoch": 10.864714086471409, "grad_norm": 1.7550090551376343, "learning_rate": 3.8725570449953245e-05, "loss": 0.0244, "step": 23370 }, { "epoch": 10.865643886564389, "grad_norm": 1.2632044553756714, "learning_rate": 3.879114676571074e-05, "loss": 0.03, "step": 23372 }, { "epoch": 10.866573686657368, "grad_norm": 1.8108406066894531, "learning_rate": 3.885658696841736e-05, "loss": 0.049, "step": 23374 }, { "epoch": 10.86750348675035, "grad_norm": 1.7182923555374146, "learning_rate": 3.8921890412204596e-05, "loss": 0.0302, "step": 23376 }, { "epoch": 10.868433286843329, "grad_norm": 0.8934547305107117, "learning_rate": 3.89870564525541e-05, "loss": 0.0336, "step": 23378 }, { "epoch": 10.869363086936309, "grad_norm": 1.4449901580810547, "learning_rate": 3.905208444630322e-05, "loss": 0.0391, "step": 23380 }, { "epoch": 10.870292887029288, "grad_norm": 1.030200481414795, "learning_rate": 3.911697375165176e-05, "loss": 0.0309, "step": 23382 }, { "epoch": 10.871222687122268, "grad_norm": 1.2544442415237427, "learning_rate": 3.918172372816879e-05, "loss": 0.0232, "step": 23384 }, { "epoch": 10.87215248721525, "grad_norm": 1.0929487943649292, "learning_rate": 3.924633373679815e-05, "loss": 0.0168, "step": 23386 }, { "epoch": 10.873082287308229, "grad_norm": 1.514245867729187, "learning_rate": 3.9310803139864714e-05, "loss": 0.0438, "step": 23388 }, { "epoch": 10.874012087401209, "grad_norm": 0.6735894680023193, "learning_rate": 3.937513130108194e-05, "loss": 0.0471, "step": 23390 }, { "epoch": 10.874941887494188, "grad_norm": 0.6439765095710754, "learning_rate": 3.9439317585556696e-05, "loss": 0.0126, "step": 23392 }, { "epoch": 10.87587168758717, "grad_norm": 0.7982621788978577, "learning_rate": 3.9503361359796284e-05, "loss": 0.0244, "step": 23394 }, { "epoch": 10.876801487680149, "grad_norm": 1.33121919631958, "learning_rate": 3.956726199171433e-05, "loss": 0.0391, "step": 23396 }, { "epoch": 10.877731287773129, "grad_norm": 1.0987457036972046, "learning_rate": 3.963101885063771e-05, "loss": 0.0443, "step": 23398 }, { "epoch": 10.878661087866108, "grad_norm": 1.4028242826461792, "learning_rate": 3.969463130731182e-05, "loss": 0.0516, "step": 23400 }, { "epoch": 10.87959088795909, "grad_norm": 1.2402616739273071, "learning_rate": 3.9758098733907255e-05, "loss": 0.0392, "step": 23402 }, { "epoch": 10.88052068805207, "grad_norm": 1.6204966306686401, "learning_rate": 3.9821420504026406e-05, "loss": 0.0216, "step": 23404 }, { "epoch": 10.881450488145049, "grad_norm": 2.2070281505584717, "learning_rate": 3.988459599270883e-05, "loss": 0.0353, "step": 23406 }, { "epoch": 10.882380288238028, "grad_norm": 1.0093938112258911, "learning_rate": 3.994762457643797e-05, "loss": 0.0202, "step": 23408 }, { "epoch": 10.883310088331008, "grad_norm": 1.7724214792251587, "learning_rate": 4.0010505633146984e-05, "loss": 0.0388, "step": 23410 }, { "epoch": 10.88423988842399, "grad_norm": 1.5218234062194824, "learning_rate": 4.007323854222553e-05, "loss": 0.0359, "step": 23412 }, { "epoch": 10.885169688516969, "grad_norm": 0.6843513250350952, "learning_rate": 4.0135822684525126e-05, "loss": 0.0282, "step": 23414 }, { "epoch": 10.886099488609949, "grad_norm": 1.1722639799118042, "learning_rate": 4.019825744236511e-05, "loss": 0.0231, "step": 23416 }, { "epoch": 10.887029288702928, "grad_norm": 0.7695192694664001, "learning_rate": 4.026054219954008e-05, "loss": 0.0278, "step": 23418 }, { "epoch": 10.88795908879591, "grad_norm": 1.0178159475326538, "learning_rate": 4.032267634132445e-05, "loss": 0.0264, "step": 23420 }, { "epoch": 10.88888888888889, "grad_norm": 1.4241514205932617, "learning_rate": 4.038465925447922e-05, "loss": 0.0336, "step": 23422 }, { "epoch": 10.889818688981869, "grad_norm": 0.8402019143104553, "learning_rate": 4.044649032725832e-05, "loss": 0.0228, "step": 23424 }, { "epoch": 10.890748489074848, "grad_norm": 1.456985592842102, "learning_rate": 4.050816894941389e-05, "loss": 0.0422, "step": 23426 }, { "epoch": 10.891678289167828, "grad_norm": 0.5489038825035095, "learning_rate": 4.056969451220284e-05, "loss": 0.0392, "step": 23428 }, { "epoch": 10.89260808926081, "grad_norm": 1.1031174659729004, "learning_rate": 4.063106640839255e-05, "loss": 0.0174, "step": 23430 }, { "epoch": 10.893537889353789, "grad_norm": 2.2636172771453857, "learning_rate": 4.069228403226746e-05, "loss": 0.0362, "step": 23432 }, { "epoch": 10.894467689446769, "grad_norm": 1.210607886314392, "learning_rate": 4.0753346779634204e-05, "loss": 0.032, "step": 23434 }, { "epoch": 10.895397489539748, "grad_norm": 1.6393545866012573, "learning_rate": 4.081425404782799e-05, "loss": 0.0359, "step": 23436 }, { "epoch": 10.89632728963273, "grad_norm": 2.956106662750244, "learning_rate": 4.0875005235718926e-05, "loss": 0.0262, "step": 23438 }, { "epoch": 10.89725708972571, "grad_norm": 1.3666620254516602, "learning_rate": 4.093559974371732e-05, "loss": 0.0263, "step": 23440 }, { "epoch": 10.898186889818689, "grad_norm": 1.7064107656478882, "learning_rate": 4.099603697377943e-05, "loss": 0.0443, "step": 23442 }, { "epoch": 10.899116689911668, "grad_norm": 1.611221194267273, "learning_rate": 4.10563163294146e-05, "loss": 0.0314, "step": 23444 }, { "epoch": 10.900046490004648, "grad_norm": 0.916812539100647, "learning_rate": 4.1116437215689804e-05, "loss": 0.0175, "step": 23446 }, { "epoch": 10.90097629009763, "grad_norm": 1.1909326314926147, "learning_rate": 4.117639903923616e-05, "loss": 0.0228, "step": 23448 }, { "epoch": 10.901906090190609, "grad_norm": 1.7645548582077026, "learning_rate": 4.123620120825453e-05, "loss": 0.0644, "step": 23450 }, { "epoch": 10.902835890283589, "grad_norm": 1.470897912979126, "learning_rate": 4.129584313252194e-05, "loss": 0.0306, "step": 23452 }, { "epoch": 10.903765690376568, "grad_norm": 1.5500479936599731, "learning_rate": 4.135532422339652e-05, "loss": 0.0191, "step": 23454 }, { "epoch": 10.90469549046955, "grad_norm": 3.041100263595581, "learning_rate": 4.1414643893823805e-05, "loss": 0.0414, "step": 23456 }, { "epoch": 10.90562529056253, "grad_norm": 1.0267373323440552, "learning_rate": 4.147380155834286e-05, "loss": 0.0331, "step": 23458 }, { "epoch": 10.906555090655509, "grad_norm": 1.5694067478179932, "learning_rate": 4.1532796633091275e-05, "loss": 0.0396, "step": 23460 }, { "epoch": 10.907484890748488, "grad_norm": 0.7059234380722046, "learning_rate": 4.159162853581147e-05, "loss": 0.025, "step": 23462 }, { "epoch": 10.90841469084147, "grad_norm": 1.3437014818191528, "learning_rate": 4.1650296685856203e-05, "loss": 0.026, "step": 23464 }, { "epoch": 10.90934449093445, "grad_norm": 2.680060863494873, "learning_rate": 4.1708800504194766e-05, "loss": 0.0377, "step": 23466 }, { "epoch": 10.910274291027429, "grad_norm": 1.4547228813171387, "learning_rate": 4.17671394134181e-05, "loss": 0.0497, "step": 23468 }, { "epoch": 10.911204091120409, "grad_norm": 1.0481646060943604, "learning_rate": 4.182531283774434e-05, "loss": 0.0356, "step": 23470 }, { "epoch": 10.91213389121339, "grad_norm": 1.9735994338989258, "learning_rate": 4.188332020302564e-05, "loss": 0.0327, "step": 23472 }, { "epoch": 10.91306369130637, "grad_norm": 1.767194151878357, "learning_rate": 4.1941160936752617e-05, "loss": 0.0351, "step": 23474 }, { "epoch": 10.91399349139935, "grad_norm": 1.9023675918579102, "learning_rate": 4.199883446806043e-05, "loss": 0.0334, "step": 23476 }, { "epoch": 10.914923291492329, "grad_norm": 1.657288670539856, "learning_rate": 4.2056340227734886e-05, "loss": 0.0457, "step": 23478 }, { "epoch": 10.915853091585308, "grad_norm": 0.8713844418525696, "learning_rate": 4.211367764821721e-05, "loss": 0.0752, "step": 23480 }, { "epoch": 10.91678289167829, "grad_norm": 1.8025460243225098, "learning_rate": 4.217084616361024e-05, "loss": 0.0695, "step": 23482 }, { "epoch": 10.91771269177127, "grad_norm": 2.0694057941436768, "learning_rate": 4.2227845209683635e-05, "loss": 0.0817, "step": 23484 }, { "epoch": 10.918642491864249, "grad_norm": 1.2702507972717285, "learning_rate": 4.228467422388012e-05, "loss": 0.0307, "step": 23486 }, { "epoch": 10.919572291957229, "grad_norm": 1.1832977533340454, "learning_rate": 4.234133264532011e-05, "loss": 0.0363, "step": 23488 }, { "epoch": 10.92050209205021, "grad_norm": 0.9272278547286987, "learning_rate": 4.239781991480775e-05, "loss": 0.0356, "step": 23490 }, { "epoch": 10.92143189214319, "grad_norm": 1.889477252960205, "learning_rate": 4.2454135474836736e-05, "loss": 0.0458, "step": 23492 }, { "epoch": 10.92236169223617, "grad_norm": 1.8795039653778076, "learning_rate": 4.2510278769595106e-05, "loss": 0.0342, "step": 23494 }, { "epoch": 10.923291492329149, "grad_norm": 1.138075351715088, "learning_rate": 4.2566249244971334e-05, "loss": 0.0189, "step": 23496 }, { "epoch": 10.924221292422128, "grad_norm": 1.6864882707595825, "learning_rate": 4.2622046348559044e-05, "loss": 0.0281, "step": 23498 }, { "epoch": 10.92515109251511, "grad_norm": 1.4906028509140015, "learning_rate": 4.267766952966372e-05, "loss": 0.0338, "step": 23500 }, { "epoch": 10.92608089260809, "grad_norm": 1.7707207202911377, "learning_rate": 4.273311823930691e-05, "loss": 0.0313, "step": 23502 }, { "epoch": 10.927010692701069, "grad_norm": 2.4620182514190674, "learning_rate": 4.2788391930232095e-05, "loss": 0.0341, "step": 23504 }, { "epoch": 10.927940492794049, "grad_norm": 1.862316608428955, "learning_rate": 4.284349005691053e-05, "loss": 0.0334, "step": 23506 }, { "epoch": 10.92887029288703, "grad_norm": 1.9238710403442383, "learning_rate": 4.28984120755458e-05, "loss": 0.0466, "step": 23508 }, { "epoch": 10.92980009298001, "grad_norm": 0.8853424191474915, "learning_rate": 4.2953157444079645e-05, "loss": 0.0575, "step": 23510 }, { "epoch": 10.93072989307299, "grad_norm": 2.364262819290161, "learning_rate": 4.300772562219762e-05, "loss": 0.0546, "step": 23512 }, { "epoch": 10.931659693165969, "grad_norm": 1.4798102378845215, "learning_rate": 4.306211607133371e-05, "loss": 0.042, "step": 23514 }, { "epoch": 10.932589493258948, "grad_norm": 1.6423226594924927, "learning_rate": 4.3116328254676165e-05, "loss": 0.0364, "step": 23516 }, { "epoch": 10.93351929335193, "grad_norm": 3.1643924713134766, "learning_rate": 4.317036163717248e-05, "loss": 0.0799, "step": 23518 }, { "epoch": 10.93444909344491, "grad_norm": 1.8551913499832153, "learning_rate": 4.3224215685535226e-05, "loss": 0.0522, "step": 23520 }, { "epoch": 10.935378893537889, "grad_norm": 1.013846755027771, "learning_rate": 4.327788986824669e-05, "loss": 0.0374, "step": 23522 }, { "epoch": 10.936308693630869, "grad_norm": 0.7612302899360657, "learning_rate": 4.3331383655563985e-05, "loss": 0.0202, "step": 23524 }, { "epoch": 10.93723849372385, "grad_norm": 1.3101125955581665, "learning_rate": 4.338469651952541e-05, "loss": 0.0469, "step": 23526 }, { "epoch": 10.93816829381683, "grad_norm": 1.2966210842132568, "learning_rate": 4.343782793395439e-05, "loss": 0.0148, "step": 23528 }, { "epoch": 10.93909809390981, "grad_norm": 0.5112072229385376, "learning_rate": 4.3490777374465186e-05, "loss": 0.0182, "step": 23530 }, { "epoch": 10.940027894002789, "grad_norm": 1.7041641473770142, "learning_rate": 4.3543544318468445e-05, "loss": 0.0459, "step": 23532 }, { "epoch": 10.94095769409577, "grad_norm": 1.7154302597045898, "learning_rate": 4.359612824517562e-05, "loss": 0.0313, "step": 23534 }, { "epoch": 10.94188749418875, "grad_norm": 1.2110742330551147, "learning_rate": 4.3648528635604576e-05, "loss": 0.0361, "step": 23536 }, { "epoch": 10.94281729428173, "grad_norm": 1.898024320602417, "learning_rate": 4.3700744972584494e-05, "loss": 0.0584, "step": 23538 }, { "epoch": 10.943747094374709, "grad_norm": 0.679100513458252, "learning_rate": 4.3752776740761456e-05, "loss": 0.0325, "step": 23540 }, { "epoch": 10.944676894467689, "grad_norm": 1.3908038139343262, "learning_rate": 4.380462342660277e-05, "loss": 0.0254, "step": 23542 }, { "epoch": 10.94560669456067, "grad_norm": 1.6989015340805054, "learning_rate": 4.3856284518402485e-05, "loss": 0.0394, "step": 23544 }, { "epoch": 10.94653649465365, "grad_norm": 1.2531459331512451, "learning_rate": 4.390775950628671e-05, "loss": 0.0518, "step": 23546 }, { "epoch": 10.94746629474663, "grad_norm": 2.002805471420288, "learning_rate": 4.395904788221799e-05, "loss": 0.0761, "step": 23548 }, { "epoch": 10.948396094839609, "grad_norm": 1.0534508228302002, "learning_rate": 4.401014914000085e-05, "loss": 0.0163, "step": 23550 }, { "epoch": 10.94932589493259, "grad_norm": 2.2351112365722656, "learning_rate": 4.4061062775286194e-05, "loss": 0.0535, "step": 23552 }, { "epoch": 10.95025569502557, "grad_norm": 1.8306169509887695, "learning_rate": 4.411178828557732e-05, "loss": 0.0701, "step": 23554 }, { "epoch": 10.95118549511855, "grad_norm": 2.3150806427001953, "learning_rate": 4.4162325170233806e-05, "loss": 0.066, "step": 23556 }, { "epoch": 10.952115295211529, "grad_norm": 1.101563572883606, "learning_rate": 4.4212672930476875e-05, "loss": 0.0357, "step": 23558 }, { "epoch": 10.95304509530451, "grad_norm": 1.645434021949768, "learning_rate": 4.4262831069394716e-05, "loss": 0.0396, "step": 23560 }, { "epoch": 10.95397489539749, "grad_norm": 2.3185856342315674, "learning_rate": 4.431279909194662e-05, "loss": 0.0452, "step": 23562 }, { "epoch": 10.95490469549047, "grad_norm": 0.7433179020881653, "learning_rate": 4.436257650496827e-05, "loss": 0.0237, "step": 23564 }, { "epoch": 10.95583449558345, "grad_norm": 1.53296959400177, "learning_rate": 4.441216281717692e-05, "loss": 0.0391, "step": 23566 }, { "epoch": 10.956764295676429, "grad_norm": 1.6173518896102905, "learning_rate": 4.4461557539175553e-05, "loss": 0.0517, "step": 23568 }, { "epoch": 10.95769409576941, "grad_norm": 1.1761221885681152, "learning_rate": 4.451076018345824e-05, "loss": 0.0381, "step": 23570 }, { "epoch": 10.95862389586239, "grad_norm": 1.0256394147872925, "learning_rate": 4.4559770264414614e-05, "loss": 0.0148, "step": 23572 }, { "epoch": 10.95955369595537, "grad_norm": 1.1568509340286255, "learning_rate": 4.460858729833519e-05, "loss": 0.0256, "step": 23574 }, { "epoch": 10.960483496048349, "grad_norm": 1.3186537027359009, "learning_rate": 4.465721080341542e-05, "loss": 0.035, "step": 23576 }, { "epoch": 10.96141329614133, "grad_norm": 1.798945665359497, "learning_rate": 4.470564029976097e-05, "loss": 0.0398, "step": 23578 }, { "epoch": 10.96234309623431, "grad_norm": 0.7550841569900513, "learning_rate": 4.475387530939225e-05, "loss": 0.0496, "step": 23580 }, { "epoch": 10.96327289632729, "grad_norm": 1.9378528594970703, "learning_rate": 4.480191535624919e-05, "loss": 0.0467, "step": 23582 }, { "epoch": 10.96420269642027, "grad_norm": 2.4593653678894043, "learning_rate": 4.484975996619593e-05, "loss": 0.0559, "step": 23584 }, { "epoch": 10.965132496513249, "grad_norm": 2.1547389030456543, "learning_rate": 4.489740866702535e-05, "loss": 0.0388, "step": 23586 }, { "epoch": 10.96606229660623, "grad_norm": 2.589932680130005, "learning_rate": 4.4944860988464255e-05, "loss": 0.0789, "step": 23588 }, { "epoch": 10.96699209669921, "grad_norm": 5.088874816894531, "learning_rate": 4.499211646217727e-05, "loss": 0.1009, "step": 23590 }, { "epoch": 10.96792189679219, "grad_norm": 2.2762558460235596, "learning_rate": 4.5039174621771836e-05, "loss": 0.0602, "step": 23592 }, { "epoch": 10.968851696885169, "grad_norm": 2.0795514583587646, "learning_rate": 4.5086035002803134e-05, "loss": 0.0561, "step": 23594 }, { "epoch": 10.96978149697815, "grad_norm": 1.894976019859314, "learning_rate": 4.513269714277802e-05, "loss": 0.0437, "step": 23596 }, { "epoch": 10.97071129707113, "grad_norm": 1.1613444089889526, "learning_rate": 4.51791605811599e-05, "loss": 0.0317, "step": 23598 }, { "epoch": 10.97164109716411, "grad_norm": 1.2109320163726807, "learning_rate": 4.52254248593736e-05, "loss": 0.0262, "step": 23600 }, { "epoch": 10.97257089725709, "grad_norm": 1.9774848222732544, "learning_rate": 4.527148952080928e-05, "loss": 0.0395, "step": 23602 }, { "epoch": 10.973500697350069, "grad_norm": 0.7535883188247681, "learning_rate": 4.531735411082732e-05, "loss": 0.0169, "step": 23604 }, { "epoch": 10.97443049744305, "grad_norm": 1.6812115907669067, "learning_rate": 4.536301817676272e-05, "loss": 0.0385, "step": 23606 }, { "epoch": 10.97536029753603, "grad_norm": 1.7695512771606445, "learning_rate": 4.5408481267929605e-05, "loss": 0.028, "step": 23608 }, { "epoch": 10.97629009762901, "grad_norm": 1.8354201316833496, "learning_rate": 4.545374293562562e-05, "loss": 0.0612, "step": 23610 }, { "epoch": 10.977219897721989, "grad_norm": 1.5388861894607544, "learning_rate": 4.549880273313626e-05, "loss": 0.0382, "step": 23612 }, { "epoch": 10.97814969781497, "grad_norm": 2.0208001136779785, "learning_rate": 4.5543660215739745e-05, "loss": 0.0371, "step": 23614 }, { "epoch": 10.97907949790795, "grad_norm": 1.494435429573059, "learning_rate": 4.5588314940710695e-05, "loss": 0.0585, "step": 23616 }, { "epoch": 10.98000929800093, "grad_norm": 1.6792160272598267, "learning_rate": 4.563276646732501e-05, "loss": 0.0493, "step": 23618 }, { "epoch": 10.98093909809391, "grad_norm": 1.094298005104065, "learning_rate": 4.5677014356863995e-05, "loss": 0.0391, "step": 23620 }, { "epoch": 10.98186889818689, "grad_norm": 1.7217458486557007, "learning_rate": 4.5721058172619016e-05, "loss": 0.0486, "step": 23622 }, { "epoch": 10.98279869827987, "grad_norm": 1.7585022449493408, "learning_rate": 4.576489747989531e-05, "loss": 0.062, "step": 23624 }, { "epoch": 10.98372849837285, "grad_norm": 1.2742273807525635, "learning_rate": 4.58085318460165e-05, "loss": 0.0322, "step": 23626 }, { "epoch": 10.98465829846583, "grad_norm": 1.5919653177261353, "learning_rate": 4.5851960840329215e-05, "loss": 0.0464, "step": 23628 }, { "epoch": 10.98558809855881, "grad_norm": 0.6621868014335632, "learning_rate": 4.589518403420672e-05, "loss": 0.0153, "step": 23630 }, { "epoch": 10.98651789865179, "grad_norm": 0.8823869824409485, "learning_rate": 4.593820100105343e-05, "loss": 0.0179, "step": 23632 }, { "epoch": 10.98744769874477, "grad_norm": 2.839432716369629, "learning_rate": 4.598101131630954e-05, "loss": 0.0446, "step": 23634 }, { "epoch": 10.98837749883775, "grad_norm": 2.260303497314453, "learning_rate": 4.602361455745424e-05, "loss": 0.0979, "step": 23636 }, { "epoch": 10.98930729893073, "grad_norm": 1.8339574337005615, "learning_rate": 4.606601030401085e-05, "loss": 0.0505, "step": 23638 }, { "epoch": 10.99023709902371, "grad_norm": 1.5656402111053467, "learning_rate": 4.6108198137550346e-05, "loss": 0.0382, "step": 23640 }, { "epoch": 10.99116689911669, "grad_norm": 1.848215937614441, "learning_rate": 4.615017764169605e-05, "loss": 0.0416, "step": 23642 }, { "epoch": 10.99209669920967, "grad_norm": 1.141296148300171, "learning_rate": 4.619194840212709e-05, "loss": 0.024, "step": 23644 }, { "epoch": 10.99302649930265, "grad_norm": 1.4835543632507324, "learning_rate": 4.6233510006582846e-05, "loss": 0.0309, "step": 23646 }, { "epoch": 10.99395629939563, "grad_norm": 1.6837290525436401, "learning_rate": 4.627486204486725e-05, "loss": 0.0342, "step": 23648 }, { "epoch": 10.99488609948861, "grad_norm": 2.363226890563965, "learning_rate": 4.6316004108852285e-05, "loss": 0.0599, "step": 23650 }, { "epoch": 10.99581589958159, "grad_norm": 1.7355382442474365, "learning_rate": 4.635693579248229e-05, "loss": 0.0509, "step": 23652 }, { "epoch": 10.99674569967457, "grad_norm": 1.2653151750564575, "learning_rate": 4.6397656691778257e-05, "loss": 0.0215, "step": 23654 }, { "epoch": 10.99767549976755, "grad_norm": 2.4098637104034424, "learning_rate": 4.643816640484125e-05, "loss": 0.047, "step": 23656 }, { "epoch": 10.99860529986053, "grad_norm": 1.7994836568832397, "learning_rate": 4.647846453185677e-05, "loss": 0.0411, "step": 23658 }, { "epoch": 10.99953509995351, "grad_norm": 1.5284054279327393, "learning_rate": 4.651855067509857e-05, "loss": 0.0685, "step": 23660 }, { "epoch": 11.00046490004649, "grad_norm": 2.023792266845703, "learning_rate": 4.65584244389326e-05, "loss": 0.0297, "step": 23662 }, { "epoch": 11.00139470013947, "grad_norm": 2.095740556716919, "learning_rate": 4.659808542982091e-05, "loss": 0.0492, "step": 23664 }, { "epoch": 11.00232450023245, "grad_norm": 1.3659789562225342, "learning_rate": 4.663753325632542e-05, "loss": 0.0291, "step": 23666 }, { "epoch": 11.00325430032543, "grad_norm": 1.2992576360702515, "learning_rate": 4.6676767529112225e-05, "loss": 0.0288, "step": 23668 }, { "epoch": 11.00418410041841, "grad_norm": 0.5813758969306946, "learning_rate": 4.6715787860954776e-05, "loss": 0.0154, "step": 23670 }, { "epoch": 11.00511390051139, "grad_norm": 1.6265202760696411, "learning_rate": 4.675459386673816e-05, "loss": 0.0413, "step": 23672 }, { "epoch": 11.00604370060437, "grad_norm": 1.2645459175109863, "learning_rate": 4.679318516346267e-05, "loss": 0.0288, "step": 23674 }, { "epoch": 11.00697350069735, "grad_norm": 1.1757959127426147, "learning_rate": 4.6831561370247975e-05, "loss": 0.0183, "step": 23676 }, { "epoch": 11.00790330079033, "grad_norm": 1.4628958702087402, "learning_rate": 4.686972210833632e-05, "loss": 0.0319, "step": 23678 }, { "epoch": 11.00883310088331, "grad_norm": 1.0886883735656738, "learning_rate": 4.6907667001096516e-05, "loss": 0.0245, "step": 23680 }, { "epoch": 11.00976290097629, "grad_norm": 1.4324288368225098, "learning_rate": 4.6945395674027985e-05, "loss": 0.029, "step": 23682 }, { "epoch": 11.01069270106927, "grad_norm": 0.7502135634422302, "learning_rate": 4.6982907754763866e-05, "loss": 0.022, "step": 23684 }, { "epoch": 11.01162250116225, "grad_norm": 1.6510201692581177, "learning_rate": 4.702020287307499e-05, "loss": 0.0329, "step": 23686 }, { "epoch": 11.01255230125523, "grad_norm": 1.2808277606964111, "learning_rate": 4.705728066087384e-05, "loss": 0.0402, "step": 23688 }, { "epoch": 11.01348210134821, "grad_norm": 1.6017496585845947, "learning_rate": 4.709414075221737e-05, "loss": 0.0276, "step": 23690 }, { "epoch": 11.014411901441191, "grad_norm": 1.0514556169509888, "learning_rate": 4.7130782783311435e-05, "loss": 0.0333, "step": 23692 }, { "epoch": 11.01534170153417, "grad_norm": 1.899975061416626, "learning_rate": 4.71672063925139e-05, "loss": 0.0715, "step": 23694 }, { "epoch": 11.01627150162715, "grad_norm": 2.1383907794952393, "learning_rate": 4.720341122033861e-05, "loss": 0.0758, "step": 23696 }, { "epoch": 11.01720130172013, "grad_norm": 2.6594507694244385, "learning_rate": 4.723939690945847e-05, "loss": 0.0561, "step": 23698 }, { "epoch": 11.01813110181311, "grad_norm": 2.0117461681365967, "learning_rate": 4.727516310470915e-05, "loss": 0.0837, "step": 23700 }, { "epoch": 11.01906090190609, "grad_norm": 2.8614070415496826, "learning_rate": 4.7310709453092925e-05, "loss": 0.0409, "step": 23702 }, { "epoch": 11.01999070199907, "grad_norm": 2.042248249053955, "learning_rate": 4.734603560378159e-05, "loss": 0.0782, "step": 23704 }, { "epoch": 11.02092050209205, "grad_norm": 3.3000032901763916, "learning_rate": 4.738114120812031e-05, "loss": 0.0631, "step": 23706 }, { "epoch": 11.02185030218503, "grad_norm": 1.6657586097717285, "learning_rate": 4.741602591963085e-05, "loss": 0.0303, "step": 23708 }, { "epoch": 11.022780102278011, "grad_norm": 1.1081063747406006, "learning_rate": 4.7450689394015366e-05, "loss": 0.0203, "step": 23710 }, { "epoch": 11.02370990237099, "grad_norm": 1.4836934804916382, "learning_rate": 4.748513128915926e-05, "loss": 0.0262, "step": 23712 }, { "epoch": 11.02463970246397, "grad_norm": 1.6533111333847046, "learning_rate": 4.751935126513488e-05, "loss": 0.0351, "step": 23714 }, { "epoch": 11.02556950255695, "grad_norm": 1.0940696001052856, "learning_rate": 4.75533489842051e-05, "loss": 0.0491, "step": 23716 }, { "epoch": 11.02649930264993, "grad_norm": 2.012519359588623, "learning_rate": 4.758712411082591e-05, "loss": 0.0508, "step": 23718 }, { "epoch": 11.02742910274291, "grad_norm": 1.985776662826538, "learning_rate": 4.7620676311650456e-05, "loss": 0.0339, "step": 23720 }, { "epoch": 11.02835890283589, "grad_norm": 2.0392274856567383, "learning_rate": 4.7654005255532224e-05, "loss": 0.0546, "step": 23722 }, { "epoch": 11.02928870292887, "grad_norm": 1.406490683555603, "learning_rate": 4.768711061352791e-05, "loss": 0.0586, "step": 23724 }, { "epoch": 11.03021850302185, "grad_norm": 0.5522034168243408, "learning_rate": 4.7719992058901006e-05, "loss": 0.0274, "step": 23726 }, { "epoch": 11.031148303114831, "grad_norm": 2.8699285984039307, "learning_rate": 4.775264926712484e-05, "loss": 0.0832, "step": 23728 }, { "epoch": 11.03207810320781, "grad_norm": 1.5758614540100098, "learning_rate": 4.77850819158861e-05, "loss": 0.0431, "step": 23730 }, { "epoch": 11.03300790330079, "grad_norm": 1.498839020729065, "learning_rate": 4.781728968508755e-05, "loss": 0.0459, "step": 23732 }, { "epoch": 11.03393770339377, "grad_norm": 2.0551869869232178, "learning_rate": 4.784927225685146e-05, "loss": 0.0576, "step": 23734 }, { "epoch": 11.034867503486751, "grad_norm": 1.507880687713623, "learning_rate": 4.788102931552288e-05, "loss": 0.0366, "step": 23736 }, { "epoch": 11.03579730357973, "grad_norm": 1.5286065340042114, "learning_rate": 4.791256054767241e-05, "loss": 0.0387, "step": 23738 }, { "epoch": 11.03672710367271, "grad_norm": 1.2820231914520264, "learning_rate": 4.7943865642099436e-05, "loss": 0.0333, "step": 23740 }, { "epoch": 11.03765690376569, "grad_norm": 1.6870828866958618, "learning_rate": 4.797494428983553e-05, "loss": 0.0513, "step": 23742 }, { "epoch": 11.03858670385867, "grad_norm": 2.3319997787475586, "learning_rate": 4.8005796184146775e-05, "loss": 0.0815, "step": 23744 }, { "epoch": 11.039516503951651, "grad_norm": 1.9023674726486206, "learning_rate": 4.80364210205375e-05, "loss": 0.0353, "step": 23746 }, { "epoch": 11.04044630404463, "grad_norm": 1.6778894662857056, "learning_rate": 4.8066818496752854e-05, "loss": 0.043, "step": 23748 }, { "epoch": 11.04137610413761, "grad_norm": 0.8777801990509033, "learning_rate": 4.809698831278217e-05, "loss": 0.0331, "step": 23750 }, { "epoch": 11.04230590423059, "grad_norm": 1.3998304605484009, "learning_rate": 4.8126930170861464e-05, "loss": 0.034, "step": 23752 }, { "epoch": 11.043235704323571, "grad_norm": 0.857782781124115, "learning_rate": 4.815664377547664e-05, "loss": 0.0221, "step": 23754 }, { "epoch": 11.04416550441655, "grad_norm": 1.933237910270691, "learning_rate": 4.8186128833366524e-05, "loss": 0.0504, "step": 23756 }, { "epoch": 11.04509530450953, "grad_norm": 2.655552864074707, "learning_rate": 4.8215385053525453e-05, "loss": 0.0559, "step": 23758 }, { "epoch": 11.04602510460251, "grad_norm": 1.3105244636535645, "learning_rate": 4.8244412147206325e-05, "loss": 0.0387, "step": 23760 }, { "epoch": 11.046954904695491, "grad_norm": 1.6787824630737305, "learning_rate": 4.827320982792337e-05, "loss": 0.0705, "step": 23762 }, { "epoch": 11.047884704788471, "grad_norm": 1.9533305168151855, "learning_rate": 4.830177781145526e-05, "loss": 0.0398, "step": 23764 }, { "epoch": 11.04881450488145, "grad_norm": 0.9494022727012634, "learning_rate": 4.8330115815847465e-05, "loss": 0.0856, "step": 23766 }, { "epoch": 11.04974430497443, "grad_norm": 2.510859727859497, "learning_rate": 4.8358223561415243e-05, "loss": 0.0738, "step": 23768 }, { "epoch": 11.05067410506741, "grad_norm": 0.9513728022575378, "learning_rate": 4.838610077074671e-05, "loss": 0.0206, "step": 23770 }, { "epoch": 11.051603905160391, "grad_norm": 1.0710740089416504, "learning_rate": 4.841374716870485e-05, "loss": 0.0133, "step": 23772 }, { "epoch": 11.05253370525337, "grad_norm": 2.0870277881622314, "learning_rate": 4.844116248243088e-05, "loss": 0.0377, "step": 23774 }, { "epoch": 11.05346350534635, "grad_norm": 2.0260531902313232, "learning_rate": 4.846834644134685e-05, "loss": 0.0555, "step": 23776 }, { "epoch": 11.05439330543933, "grad_norm": 1.4889624118804932, "learning_rate": 4.849529877715802e-05, "loss": 0.0319, "step": 23778 }, { "epoch": 11.055323105532311, "grad_norm": 1.7823530435562134, "learning_rate": 4.8522019223855676e-05, "loss": 0.0608, "step": 23780 }, { "epoch": 11.056252905625291, "grad_norm": 0.9012619853019714, "learning_rate": 4.854850751771976e-05, "loss": 0.0243, "step": 23782 }, { "epoch": 11.05718270571827, "grad_norm": 1.0495041608810425, "learning_rate": 4.857476339732162e-05, "loss": 0.0415, "step": 23784 }, { "epoch": 11.05811250581125, "grad_norm": 1.3079607486724854, "learning_rate": 4.860078660352627e-05, "loss": 0.0453, "step": 23786 }, { "epoch": 11.05904230590423, "grad_norm": 2.1901700496673584, "learning_rate": 4.862657687949511e-05, "loss": 0.0703, "step": 23788 }, { "epoch": 11.059972105997211, "grad_norm": 1.642669677734375, "learning_rate": 4.8652133970688636e-05, "loss": 0.023, "step": 23790 }, { "epoch": 11.06090190609019, "grad_norm": 0.7740399241447449, "learning_rate": 4.867745762486863e-05, "loss": 0.0236, "step": 23792 }, { "epoch": 11.06183170618317, "grad_norm": 1.7549289464950562, "learning_rate": 4.8702547592100837e-05, "loss": 0.0789, "step": 23794 }, { "epoch": 11.06276150627615, "grad_norm": 1.4873130321502686, "learning_rate": 4.872740362475736e-05, "loss": 0.0239, "step": 23796 }, { "epoch": 11.063691306369131, "grad_norm": 1.3029391765594482, "learning_rate": 4.8752025477519356e-05, "loss": 0.0257, "step": 23798 }, { "epoch": 11.064621106462111, "grad_norm": 1.528102159500122, "learning_rate": 4.877641290737891e-05, "loss": 0.0403, "step": 23800 }, { "epoch": 11.06555090655509, "grad_norm": 1.182408094406128, "learning_rate": 4.8800565673641955e-05, "loss": 0.0209, "step": 23802 }, { "epoch": 11.06648070664807, "grad_norm": 1.5298036336898804, "learning_rate": 4.882448353793051e-05, "loss": 0.0304, "step": 23804 }, { "epoch": 11.067410506741052, "grad_norm": 1.6551282405853271, "learning_rate": 4.88481662641849e-05, "loss": 0.0489, "step": 23806 }, { "epoch": 11.068340306834031, "grad_norm": 1.7384237051010132, "learning_rate": 4.88716136186661e-05, "loss": 0.0436, "step": 23808 }, { "epoch": 11.06927010692701, "grad_norm": 0.7466382384300232, "learning_rate": 4.8894825369958296e-05, "loss": 0.0203, "step": 23810 }, { "epoch": 11.07019990701999, "grad_norm": 1.5786206722259521, "learning_rate": 4.89178012889708e-05, "loss": 0.0351, "step": 23812 }, { "epoch": 11.07112970711297, "grad_norm": 1.6926803588867188, "learning_rate": 4.8940541148940616e-05, "loss": 0.0312, "step": 23814 }, { "epoch": 11.072059507205951, "grad_norm": 1.2640304565429688, "learning_rate": 4.8963044725434414e-05, "loss": 0.0394, "step": 23816 }, { "epoch": 11.072989307298931, "grad_norm": 1.6301518678665161, "learning_rate": 4.89853117963511e-05, "loss": 0.0213, "step": 23818 }, { "epoch": 11.07391910739191, "grad_norm": 1.7638658285140991, "learning_rate": 4.900734214192362e-05, "loss": 0.0361, "step": 23820 }, { "epoch": 11.07484890748489, "grad_norm": 3.624098300933838, "learning_rate": 4.9029135544721295e-05, "loss": 0.0663, "step": 23822 }, { "epoch": 11.075778707577872, "grad_norm": 1.943520188331604, "learning_rate": 4.905069178965221e-05, "loss": 0.0547, "step": 23824 }, { "epoch": 11.076708507670851, "grad_norm": 1.9799790382385254, "learning_rate": 4.9072010663964754e-05, "loss": 0.0368, "step": 23826 }, { "epoch": 11.07763830776383, "grad_norm": 1.7268956899642944, "learning_rate": 4.909309195725026e-05, "loss": 0.0485, "step": 23828 }, { "epoch": 11.07856810785681, "grad_norm": 2.3194029331207275, "learning_rate": 4.911393546144498e-05, "loss": 0.0275, "step": 23830 }, { "epoch": 11.07949790794979, "grad_norm": 1.2089667320251465, "learning_rate": 4.9134540970831886e-05, "loss": 0.0585, "step": 23832 }, { "epoch": 11.080427708042771, "grad_norm": 0.7592040300369263, "learning_rate": 4.915490828204291e-05, "loss": 0.0436, "step": 23834 }, { "epoch": 11.081357508135751, "grad_norm": 0.7351528406143188, "learning_rate": 4.917503719406089e-05, "loss": 0.0255, "step": 23836 }, { "epoch": 11.08228730822873, "grad_norm": 1.404676079750061, "learning_rate": 4.919492750822166e-05, "loss": 0.0682, "step": 23838 }, { "epoch": 11.08321710832171, "grad_norm": 0.5008339285850525, "learning_rate": 4.921457902821581e-05, "loss": 0.0246, "step": 23840 }, { "epoch": 11.084146908414692, "grad_norm": 2.793675661087036, "learning_rate": 4.923399156009073e-05, "loss": 0.0669, "step": 23842 }, { "epoch": 11.085076708507671, "grad_norm": 1.2105101346969604, "learning_rate": 4.9253164912252654e-05, "loss": 0.0311, "step": 23844 }, { "epoch": 11.08600650860065, "grad_norm": 2.3740155696868896, "learning_rate": 4.92720988954683e-05, "loss": 0.0611, "step": 23846 }, { "epoch": 11.08693630869363, "grad_norm": 1.6864924430847168, "learning_rate": 4.929079332286688e-05, "loss": 0.0573, "step": 23848 }, { "epoch": 11.087866108786612, "grad_norm": 1.1450958251953125, "learning_rate": 4.9309248009941914e-05, "loss": 0.0231, "step": 23850 }, { "epoch": 11.088795908879591, "grad_norm": 2.3819620609283447, "learning_rate": 4.932746277455322e-05, "loss": 0.0485, "step": 23852 }, { "epoch": 11.089725708972571, "grad_norm": 1.4704006910324097, "learning_rate": 4.934543743692829e-05, "loss": 0.0287, "step": 23854 }, { "epoch": 11.09065550906555, "grad_norm": 1.3039369583129883, "learning_rate": 4.936317181966447e-05, "loss": 0.0232, "step": 23856 }, { "epoch": 11.09158530915853, "grad_norm": 1.348779320716858, "learning_rate": 4.9380665747730624e-05, "loss": 0.0201, "step": 23858 }, { "epoch": 11.092515109251512, "grad_norm": 1.4051322937011719, "learning_rate": 4.9397919048468745e-05, "loss": 0.0399, "step": 23860 }, { "epoch": 11.093444909344491, "grad_norm": 2.710296392440796, "learning_rate": 4.941493155159564e-05, "loss": 0.0658, "step": 23862 }, { "epoch": 11.09437470943747, "grad_norm": 0.5584664344787598, "learning_rate": 4.943170308920487e-05, "loss": 0.0295, "step": 23864 }, { "epoch": 11.09530450953045, "grad_norm": 1.5536335706710815, "learning_rate": 4.944823349576809e-05, "loss": 0.0713, "step": 23866 }, { "epoch": 11.096234309623432, "grad_norm": 2.4001657962799072, "learning_rate": 4.946452260813685e-05, "loss": 0.0346, "step": 23868 }, { "epoch": 11.097164109716411, "grad_norm": 1.4845813512802124, "learning_rate": 4.948057026554417e-05, "loss": 0.0288, "step": 23870 }, { "epoch": 11.098093909809391, "grad_norm": 3.48915433883667, "learning_rate": 4.949637630960622e-05, "loss": 0.0402, "step": 23872 }, { "epoch": 11.09902370990237, "grad_norm": 1.081565499305725, "learning_rate": 4.9511940584323665e-05, "loss": 0.0263, "step": 23874 }, { "epoch": 11.09995350999535, "grad_norm": 1.27531898021698, "learning_rate": 4.9527262936083376e-05, "loss": 0.0506, "step": 23876 }, { "epoch": 11.100883310088332, "grad_norm": 2.204586982727051, "learning_rate": 4.954234321366e-05, "loss": 0.033, "step": 23878 }, { "epoch": 11.101813110181311, "grad_norm": 2.2878024578094482, "learning_rate": 4.955718126821729e-05, "loss": 0.0738, "step": 23880 }, { "epoch": 11.10274291027429, "grad_norm": 1.2638479471206665, "learning_rate": 4.957177695330956e-05, "loss": 0.035, "step": 23882 }, { "epoch": 11.10367271036727, "grad_norm": 1.034497618675232, "learning_rate": 4.958613012488329e-05, "loss": 0.0373, "step": 23884 }, { "epoch": 11.104602510460252, "grad_norm": 1.9750422239303589, "learning_rate": 4.9600240641278544e-05, "loss": 0.0419, "step": 23886 }, { "epoch": 11.105532310553231, "grad_norm": 2.83280873298645, "learning_rate": 4.961410836323019e-05, "loss": 0.0451, "step": 23888 }, { "epoch": 11.106462110646211, "grad_norm": 2.495549440383911, "learning_rate": 4.962773315386938e-05, "loss": 0.0432, "step": 23890 }, { "epoch": 11.10739191073919, "grad_norm": 1.0660635232925415, "learning_rate": 4.9641114878724996e-05, "loss": 0.0395, "step": 23892 }, { "epoch": 11.108321710832172, "grad_norm": 1.3142811059951782, "learning_rate": 4.9654253405724765e-05, "loss": 0.0282, "step": 23894 }, { "epoch": 11.109251510925152, "grad_norm": 2.51883864402771, "learning_rate": 4.966714860519671e-05, "loss": 0.0677, "step": 23896 }, { "epoch": 11.110181311018131, "grad_norm": 1.4639338254928589, "learning_rate": 4.96798003498705e-05, "loss": 0.0288, "step": 23898 }, { "epoch": 11.11111111111111, "grad_norm": 1.547784447669983, "learning_rate": 4.969220851487847e-05, "loss": 0.0241, "step": 23900 }, { "epoch": 11.11204091120409, "grad_norm": 2.0725908279418945, "learning_rate": 4.970437297775705e-05, "loss": 0.0513, "step": 23902 }, { "epoch": 11.112970711297072, "grad_norm": 2.1916921138763428, "learning_rate": 4.9716293618447863e-05, "loss": 0.042, "step": 23904 }, { "epoch": 11.113900511390051, "grad_norm": 2.3868906497955322, "learning_rate": 4.972797031929907e-05, "loss": 0.0606, "step": 23906 }, { "epoch": 11.114830311483031, "grad_norm": 1.9134399890899658, "learning_rate": 4.973940296506633e-05, "loss": 0.0512, "step": 23908 }, { "epoch": 11.11576011157601, "grad_norm": 2.283670425415039, "learning_rate": 4.975059144291398e-05, "loss": 0.0772, "step": 23910 }, { "epoch": 11.116689911668992, "grad_norm": 1.5638148784637451, "learning_rate": 4.9761535642416333e-05, "loss": 0.0215, "step": 23912 }, { "epoch": 11.117619711761972, "grad_norm": 2.5647435188293457, "learning_rate": 4.977223545555853e-05, "loss": 0.0724, "step": 23914 }, { "epoch": 11.118549511854951, "grad_norm": 1.0605711936950684, "learning_rate": 4.978269077673771e-05, "loss": 0.0491, "step": 23916 }, { "epoch": 11.11947931194793, "grad_norm": 1.487044334411621, "learning_rate": 4.9792901502764116e-05, "loss": 0.0287, "step": 23918 }, { "epoch": 11.12040911204091, "grad_norm": 2.360588312149048, "learning_rate": 4.9802867532862003e-05, "loss": 0.044, "step": 23920 }, { "epoch": 11.121338912133892, "grad_norm": 1.9432023763656616, "learning_rate": 4.9812588768670655e-05, "loss": 0.0343, "step": 23922 }, { "epoch": 11.122268712226871, "grad_norm": 1.3104101419448853, "learning_rate": 4.982206511424538e-05, "loss": 0.0285, "step": 23924 }, { "epoch": 11.123198512319851, "grad_norm": 2.153488874435425, "learning_rate": 4.9831296476058524e-05, "loss": 0.0536, "step": 23926 }, { "epoch": 11.12412831241283, "grad_norm": 1.702890396118164, "learning_rate": 4.984028276300026e-05, "loss": 0.0334, "step": 23928 }, { "epoch": 11.125058112505812, "grad_norm": 1.8716737031936646, "learning_rate": 4.984902388637952e-05, "loss": 0.0374, "step": 23930 }, { "epoch": 11.125987912598791, "grad_norm": 1.7632709741592407, "learning_rate": 4.9857519759924994e-05, "loss": 0.0379, "step": 23932 }, { "epoch": 11.126917712691771, "grad_norm": 1.7034722566604614, "learning_rate": 4.986577029978586e-05, "loss": 0.0634, "step": 23934 }, { "epoch": 11.12784751278475, "grad_norm": 3.7655081748962402, "learning_rate": 4.987377542453256e-05, "loss": 0.0883, "step": 23936 }, { "epoch": 11.128777312877732, "grad_norm": 3.398811101913452, "learning_rate": 4.988153505515775e-05, "loss": 0.079, "step": 23938 }, { "epoch": 11.129707112970712, "grad_norm": 1.4579435586929321, "learning_rate": 4.988904911507704e-05, "loss": 0.0308, "step": 23940 }, { "epoch": 11.130636913063691, "grad_norm": 0.892159104347229, "learning_rate": 4.9896317530129683e-05, "loss": 0.0364, "step": 23942 }, { "epoch": 11.131566713156671, "grad_norm": 1.5764013528823853, "learning_rate": 4.990334022857934e-05, "loss": 0.0406, "step": 23944 }, { "epoch": 11.13249651324965, "grad_norm": 1.9806532859802246, "learning_rate": 4.991011714111484e-05, "loss": 0.0385, "step": 23946 }, { "epoch": 11.133426313342632, "grad_norm": 1.7919933795928955, "learning_rate": 4.991664820085078e-05, "loss": 0.0573, "step": 23948 }, { "epoch": 11.134356113435611, "grad_norm": 1.9723941087722778, "learning_rate": 4.992293334332822e-05, "loss": 0.046, "step": 23950 }, { "epoch": 11.135285913528591, "grad_norm": 0.8417448401451111, "learning_rate": 4.992897250651537e-05, "loss": 0.021, "step": 23952 }, { "epoch": 11.13621571362157, "grad_norm": 4.528562545776367, "learning_rate": 4.993476563080811e-05, "loss": 0.0694, "step": 23954 }, { "epoch": 11.137145513714552, "grad_norm": 1.423067331314087, "learning_rate": 4.994031265903066e-05, "loss": 0.0506, "step": 23956 }, { "epoch": 11.138075313807532, "grad_norm": 1.5763777494430542, "learning_rate": 4.994561353643605e-05, "loss": 0.0258, "step": 23958 }, { "epoch": 11.139005113900511, "grad_norm": 2.8143043518066406, "learning_rate": 4.99506682107068e-05, "loss": 0.0621, "step": 23960 }, { "epoch": 11.139934913993491, "grad_norm": 0.7204034328460693, "learning_rate": 4.995547663195531e-05, "loss": 0.0341, "step": 23962 }, { "epoch": 11.140864714086472, "grad_norm": 1.8516148328781128, "learning_rate": 4.996003875272439e-05, "loss": 0.0719, "step": 23964 }, { "epoch": 11.141794514179452, "grad_norm": 1.784765601158142, "learning_rate": 4.9964354527987755e-05, "loss": 0.0284, "step": 23966 }, { "epoch": 11.142724314272431, "grad_norm": 0.7580801248550415, "learning_rate": 4.996842391515045e-05, "loss": 0.0279, "step": 23968 }, { "epoch": 11.143654114365411, "grad_norm": 2.2127532958984375, "learning_rate": 4.9972246874049275e-05, "loss": 0.0687, "step": 23970 }, { "epoch": 11.14458391445839, "grad_norm": 2.015451431274414, "learning_rate": 4.9975823366953145e-05, "loss": 0.0467, "step": 23972 }, { "epoch": 11.145513714551372, "grad_norm": 0.7745525240898132, "learning_rate": 4.9979153358563537e-05, "loss": 0.0312, "step": 23974 }, { "epoch": 11.146443514644352, "grad_norm": 1.339116096496582, "learning_rate": 4.9982236816014766e-05, "loss": 0.029, "step": 23976 }, { "epoch": 11.147373314737331, "grad_norm": 1.3137989044189453, "learning_rate": 4.998507370887434e-05, "loss": 0.0332, "step": 23978 }, { "epoch": 11.148303114830311, "grad_norm": 1.3934063911437988, "learning_rate": 4.99876640091433e-05, "loss": 0.0376, "step": 23980 }, { "epoch": 11.149232914923292, "grad_norm": 1.3972861766815186, "learning_rate": 4.9990007691256426e-05, "loss": 0.0311, "step": 23982 }, { "epoch": 11.150162715016272, "grad_norm": 2.593893051147461, "learning_rate": 4.9992104732082516e-05, "loss": 0.0594, "step": 23984 }, { "epoch": 11.151092515109251, "grad_norm": 0.8381537795066833, "learning_rate": 4.999395511092462e-05, "loss": 0.0261, "step": 23986 }, { "epoch": 11.152022315202231, "grad_norm": 2.193253755569458, "learning_rate": 4.999555880952025e-05, "loss": 0.0426, "step": 23988 }, { "epoch": 11.15295211529521, "grad_norm": 1.3984804153442383, "learning_rate": 4.9996915812041545e-05, "loss": 0.0309, "step": 23990 }, { "epoch": 11.153881915388192, "grad_norm": 0.6479981541633606, "learning_rate": 4.999802610509543e-05, "loss": 0.021, "step": 23992 }, { "epoch": 11.154811715481172, "grad_norm": 1.4305211305618286, "learning_rate": 4.999888967772377e-05, "loss": 0.0406, "step": 23994 }, { "epoch": 11.155741515574151, "grad_norm": 1.4065601825714111, "learning_rate": 4.999950652140345e-05, "loss": 0.0418, "step": 23996 }, { "epoch": 11.15667131566713, "grad_norm": 1.1982147693634033, "learning_rate": 4.9999876630046474e-05, "loss": 0.0505, "step": 23998 }, { "epoch": 11.157601115760112, "grad_norm": 1.3137269020080566, "learning_rate": 5.000000000000003e-05, "loss": 0.0302, "step": 24000 }, { "epoch": 11.157601115760112, "eval_cer": 0.17960197133290695, "eval_loss": 0.32409268617630005, "eval_runtime": 411.9726, "eval_samples_per_second": 30.813, "eval_steps_per_second": 0.964, "step": 24000 }, { "epoch": 11.158530915853092, "grad_norm": 2.8886470794677734, "learning_rate": 4.999987663004649e-05, "loss": 0.0956, "step": 24002 }, { "epoch": 11.159460715946071, "grad_norm": 1.965094804763794, "learning_rate": 4.9999506521403454e-05, "loss": 0.0416, "step": 24004 }, { "epoch": 11.160390516039051, "grad_norm": 2.637758255004883, "learning_rate": 4.999888967772378e-05, "loss": 0.0942, "step": 24006 }, { "epoch": 11.161320316132032, "grad_norm": 1.7164031267166138, "learning_rate": 4.999802610509544e-05, "loss": 0.0575, "step": 24008 }, { "epoch": 11.162250116225012, "grad_norm": 1.888987421989441, "learning_rate": 4.999691581204155e-05, "loss": 0.0386, "step": 24010 }, { "epoch": 11.163179916317992, "grad_norm": 1.1736388206481934, "learning_rate": 4.999555880952026e-05, "loss": 0.0287, "step": 24012 }, { "epoch": 11.164109716410971, "grad_norm": 2.3840243816375732, "learning_rate": 4.999395511092464e-05, "loss": 0.0599, "step": 24014 }, { "epoch": 11.16503951650395, "grad_norm": 2.6957571506500244, "learning_rate": 4.9992104732082536e-05, "loss": 0.0423, "step": 24016 }, { "epoch": 11.165969316596932, "grad_norm": 1.2658684253692627, "learning_rate": 4.999000769125644e-05, "loss": 0.0484, "step": 24018 }, { "epoch": 11.166899116689912, "grad_norm": 1.2298074960708618, "learning_rate": 4.998766400914332e-05, "loss": 0.0477, "step": 24020 }, { "epoch": 11.167828916782891, "grad_norm": 1.4223592281341553, "learning_rate": 4.998507370887434e-05, "loss": 0.0476, "step": 24022 }, { "epoch": 11.168758716875871, "grad_norm": 1.4290871620178223, "learning_rate": 4.9982236816014746e-05, "loss": 0.0293, "step": 24024 }, { "epoch": 11.169688516968852, "grad_norm": 1.825223445892334, "learning_rate": 4.997915335856352e-05, "loss": 0.025, "step": 24026 }, { "epoch": 11.170618317061832, "grad_norm": 2.6006271839141846, "learning_rate": 4.997582336695314e-05, "loss": 0.0531, "step": 24028 }, { "epoch": 11.171548117154812, "grad_norm": 1.151145100593567, "learning_rate": 4.997224687404927e-05, "loss": 0.0504, "step": 24030 }, { "epoch": 11.172477917247791, "grad_norm": 1.3812227249145508, "learning_rate": 4.996842391515046e-05, "loss": 0.0341, "step": 24032 }, { "epoch": 11.17340771734077, "grad_norm": 1.359911561012268, "learning_rate": 4.996435452798777e-05, "loss": 0.0783, "step": 24034 }, { "epoch": 11.174337517433752, "grad_norm": 1.8971755504608154, "learning_rate": 4.99600387527244e-05, "loss": 0.039, "step": 24036 }, { "epoch": 11.175267317526732, "grad_norm": 1.8027451038360596, "learning_rate": 4.995547663195533e-05, "loss": 0.0281, "step": 24038 }, { "epoch": 11.176197117619711, "grad_norm": 1.7998489141464233, "learning_rate": 4.995066821070681e-05, "loss": 0.0463, "step": 24040 }, { "epoch": 11.177126917712691, "grad_norm": 1.6675055027008057, "learning_rate": 4.994561353643606e-05, "loss": 0.0294, "step": 24042 }, { "epoch": 11.178056717805672, "grad_norm": 1.1237752437591553, "learning_rate": 4.9940312659030644e-05, "loss": 0.0318, "step": 24044 }, { "epoch": 11.178986517898652, "grad_norm": 1.5263457298278809, "learning_rate": 4.993476563080811e-05, "loss": 0.031, "step": 24046 }, { "epoch": 11.179916317991632, "grad_norm": 1.7287187576293945, "learning_rate": 4.992897250651537e-05, "loss": 0.0349, "step": 24048 }, { "epoch": 11.180846118084611, "grad_norm": 1.4542843103408813, "learning_rate": 4.9922933343328217e-05, "loss": 0.0411, "step": 24050 }, { "epoch": 11.181775918177593, "grad_norm": 2.142479419708252, "learning_rate": 4.991664820085078e-05, "loss": 0.072, "step": 24052 }, { "epoch": 11.182705718270572, "grad_norm": 1.117822527885437, "learning_rate": 4.991011714111483e-05, "loss": 0.0242, "step": 24054 }, { "epoch": 11.183635518363552, "grad_norm": 1.9833413362503052, "learning_rate": 4.990334022857933e-05, "loss": 0.0571, "step": 24056 }, { "epoch": 11.184565318456531, "grad_norm": 1.7174147367477417, "learning_rate": 4.989631753012966e-05, "loss": 0.0828, "step": 24058 }, { "epoch": 11.185495118549511, "grad_norm": 1.6625171899795532, "learning_rate": 4.988904911507703e-05, "loss": 0.0484, "step": 24060 }, { "epoch": 11.186424918642492, "grad_norm": 1.9115025997161865, "learning_rate": 4.988153505515774e-05, "loss": 0.0429, "step": 24062 }, { "epoch": 11.187354718735472, "grad_norm": 1.332054615020752, "learning_rate": 4.987377542453253e-05, "loss": 0.055, "step": 24064 }, { "epoch": 11.188284518828452, "grad_norm": 2.914592981338501, "learning_rate": 4.986577029978584e-05, "loss": 0.0436, "step": 24066 }, { "epoch": 11.189214318921431, "grad_norm": 2.0248987674713135, "learning_rate": 4.9857519759925e-05, "loss": 0.059, "step": 24068 }, { "epoch": 11.190144119014413, "grad_norm": 1.513539433479309, "learning_rate": 4.984902388637951e-05, "loss": 0.028, "step": 24070 }, { "epoch": 11.191073919107392, "grad_norm": 1.6247215270996094, "learning_rate": 4.984028276300022e-05, "loss": 0.0407, "step": 24072 }, { "epoch": 11.192003719200372, "grad_norm": 1.8613051176071167, "learning_rate": 4.983129647605849e-05, "loss": 0.0504, "step": 24074 }, { "epoch": 11.192933519293351, "grad_norm": 1.5059446096420288, "learning_rate": 4.982206511424535e-05, "loss": 0.039, "step": 24076 }, { "epoch": 11.193863319386331, "grad_norm": 2.385962963104248, "learning_rate": 4.981258876867059e-05, "loss": 0.0482, "step": 24078 }, { "epoch": 11.194793119479312, "grad_norm": 1.8450846672058105, "learning_rate": 4.9802867532861956e-05, "loss": 0.0355, "step": 24080 }, { "epoch": 11.195722919572292, "grad_norm": 1.994418978691101, "learning_rate": 4.979290150276408e-05, "loss": 0.0765, "step": 24082 }, { "epoch": 11.196652719665272, "grad_norm": 2.876708984375, "learning_rate": 4.978269077673766e-05, "loss": 0.0795, "step": 24084 }, { "epoch": 11.197582519758251, "grad_norm": 2.023653507232666, "learning_rate": 4.977223545555849e-05, "loss": 0.0395, "step": 24086 }, { "epoch": 11.198512319851233, "grad_norm": 1.5025256872177124, "learning_rate": 4.976153564241629e-05, "loss": 0.0435, "step": 24088 }, { "epoch": 11.199442119944212, "grad_norm": 1.7096915245056152, "learning_rate": 4.975059144291395e-05, "loss": 0.0393, "step": 24090 }, { "epoch": 11.200371920037192, "grad_norm": 1.1487971544265747, "learning_rate": 4.97394029650663e-05, "loss": 0.0546, "step": 24092 }, { "epoch": 11.201301720130171, "grad_norm": 2.1017563343048096, "learning_rate": 4.972797031929907e-05, "loss": 0.0488, "step": 24094 }, { "epoch": 11.202231520223153, "grad_norm": 1.330673336982727, "learning_rate": 4.971629361844787e-05, "loss": 0.035, "step": 24096 }, { "epoch": 11.203161320316132, "grad_norm": 1.6780108213424683, "learning_rate": 4.9704372977757e-05, "loss": 0.0456, "step": 24098 }, { "epoch": 11.204091120409112, "grad_norm": 2.3135879039764404, "learning_rate": 4.969220851487845e-05, "loss": 0.0602, "step": 24100 }, { "epoch": 11.205020920502092, "grad_norm": 1.5072312355041504, "learning_rate": 4.967980034987049e-05, "loss": 0.0354, "step": 24102 }, { "epoch": 11.205950720595071, "grad_norm": 2.516664981842041, "learning_rate": 4.966714860519671e-05, "loss": 0.0523, "step": 24104 }, { "epoch": 11.206880520688053, "grad_norm": 1.787317156791687, "learning_rate": 4.9654253405724744e-05, "loss": 0.0576, "step": 24106 }, { "epoch": 11.207810320781032, "grad_norm": 1.5590624809265137, "learning_rate": 4.964111487872496e-05, "loss": 0.0461, "step": 24108 }, { "epoch": 11.208740120874012, "grad_norm": 2.725470781326294, "learning_rate": 4.962773315386935e-05, "loss": 0.0796, "step": 24110 }, { "epoch": 11.209669920966991, "grad_norm": 1.6147931814193726, "learning_rate": 4.9614108363230135e-05, "loss": 0.0361, "step": 24112 }, { "epoch": 11.210599721059973, "grad_norm": 1.8302899599075317, "learning_rate": 4.9600240641278517e-05, "loss": 0.0394, "step": 24114 }, { "epoch": 11.211529521152952, "grad_norm": 1.695679783821106, "learning_rate": 4.958613012488326e-05, "loss": 0.0456, "step": 24116 }, { "epoch": 11.212459321245932, "grad_norm": 1.3447788953781128, "learning_rate": 4.9571776953309486e-05, "loss": 0.0368, "step": 24118 }, { "epoch": 11.213389121338912, "grad_norm": 0.8512775897979736, "learning_rate": 4.955718126821725e-05, "loss": 0.0458, "step": 24120 }, { "epoch": 11.214318921431893, "grad_norm": 0.9660794734954834, "learning_rate": 4.9542343213659994e-05, "loss": 0.0392, "step": 24122 }, { "epoch": 11.215248721524873, "grad_norm": 1.7517869472503662, "learning_rate": 4.9527262936083376e-05, "loss": 0.0347, "step": 24124 }, { "epoch": 11.216178521617852, "grad_norm": 2.2307822704315186, "learning_rate": 4.9511940584323624e-05, "loss": 0.0831, "step": 24126 }, { "epoch": 11.217108321710832, "grad_norm": 3.7260565757751465, "learning_rate": 4.949637630960617e-05, "loss": 0.0896, "step": 24128 }, { "epoch": 11.218038121803811, "grad_norm": 1.0810632705688477, "learning_rate": 4.948057026554414e-05, "loss": 0.0306, "step": 24130 }, { "epoch": 11.218967921896793, "grad_norm": 1.7124155759811401, "learning_rate": 4.9464522608136785e-05, "loss": 0.021, "step": 24132 }, { "epoch": 11.219897721989772, "grad_norm": 1.7060765027999878, "learning_rate": 4.944823349576806e-05, "loss": 0.0427, "step": 24134 }, { "epoch": 11.220827522082752, "grad_norm": 0.7768317461013794, "learning_rate": 4.943170308920484e-05, "loss": 0.0197, "step": 24136 }, { "epoch": 11.221757322175732, "grad_norm": 1.7826651334762573, "learning_rate": 4.941493155159561e-05, "loss": 0.041, "step": 24138 }, { "epoch": 11.222687122268713, "grad_norm": 1.271782636642456, "learning_rate": 4.939791904846871e-05, "loss": 0.0355, "step": 24140 }, { "epoch": 11.223616922361693, "grad_norm": 3.4012539386749268, "learning_rate": 4.93806657477306e-05, "loss": 0.0744, "step": 24142 }, { "epoch": 11.224546722454672, "grad_norm": 1.571152925491333, "learning_rate": 4.936317181966444e-05, "loss": 0.0554, "step": 24144 }, { "epoch": 11.225476522547652, "grad_norm": 1.971073031425476, "learning_rate": 4.9345437436928215e-05, "loss": 0.0437, "step": 24146 }, { "epoch": 11.226406322640631, "grad_norm": 2.467416286468506, "learning_rate": 4.932746277455319e-05, "loss": 0.0253, "step": 24148 }, { "epoch": 11.227336122733613, "grad_norm": 2.012716770172119, "learning_rate": 4.930924800994194e-05, "loss": 0.0276, "step": 24150 }, { "epoch": 11.228265922826592, "grad_norm": 1.2271475791931152, "learning_rate": 4.929079332286686e-05, "loss": 0.0426, "step": 24152 }, { "epoch": 11.229195722919572, "grad_norm": 0.7907717227935791, "learning_rate": 4.9272098895468277e-05, "loss": 0.0385, "step": 24154 }, { "epoch": 11.230125523012552, "grad_norm": 1.2921093702316284, "learning_rate": 4.925316491225264e-05, "loss": 0.0435, "step": 24156 }, { "epoch": 11.231055323105533, "grad_norm": 2.013719081878662, "learning_rate": 4.9233991560090716e-05, "loss": 0.0498, "step": 24158 }, { "epoch": 11.231985123198513, "grad_norm": 1.7175085544586182, "learning_rate": 4.92145790282158e-05, "loss": 0.0298, "step": 24160 }, { "epoch": 11.232914923291492, "grad_norm": 0.6866182088851929, "learning_rate": 4.919492750822164e-05, "loss": 0.0158, "step": 24162 }, { "epoch": 11.233844723384472, "grad_norm": 1.1326669454574585, "learning_rate": 4.917503719406087e-05, "loss": 0.0526, "step": 24164 }, { "epoch": 11.234774523477453, "grad_norm": 2.2021408081054688, "learning_rate": 4.915490828204284e-05, "loss": 0.0626, "step": 24166 }, { "epoch": 11.235704323570433, "grad_norm": 2.140709161758423, "learning_rate": 4.9134540970831866e-05, "loss": 0.0652, "step": 24168 }, { "epoch": 11.236634123663412, "grad_norm": 0.9828194379806519, "learning_rate": 4.9113935461444955e-05, "loss": 0.0242, "step": 24170 }, { "epoch": 11.237563923756392, "grad_norm": 2.1636157035827637, "learning_rate": 4.909309195725023e-05, "loss": 0.0548, "step": 24172 }, { "epoch": 11.238493723849372, "grad_norm": 1.0749574899673462, "learning_rate": 4.9072010663964714e-05, "loss": 0.0214, "step": 24174 }, { "epoch": 11.239423523942353, "grad_norm": 2.0431272983551025, "learning_rate": 4.905069178965217e-05, "loss": 0.0325, "step": 24176 }, { "epoch": 11.240353324035333, "grad_norm": 0.7815976142883301, "learning_rate": 4.902913554472131e-05, "loss": 0.024, "step": 24178 }, { "epoch": 11.241283124128312, "grad_norm": 1.6532042026519775, "learning_rate": 4.900734214192363e-05, "loss": 0.0493, "step": 24180 }, { "epoch": 11.242212924221292, "grad_norm": 1.615424394607544, "learning_rate": 4.8985311796351066e-05, "loss": 0.0433, "step": 24182 }, { "epoch": 11.243142724314273, "grad_norm": 1.642198920249939, "learning_rate": 4.896304472543438e-05, "loss": 0.0322, "step": 24184 }, { "epoch": 11.244072524407253, "grad_norm": 0.8937569856643677, "learning_rate": 4.894054114894053e-05, "loss": 0.0245, "step": 24186 }, { "epoch": 11.245002324500232, "grad_norm": 1.433370590209961, "learning_rate": 4.891780128897077e-05, "loss": 0.0393, "step": 24188 }, { "epoch": 11.245932124593212, "grad_norm": 1.2414780855178833, "learning_rate": 4.889482536995825e-05, "loss": 0.0493, "step": 24190 }, { "epoch": 11.246861924686192, "grad_norm": 2.0745491981506348, "learning_rate": 4.887161361866605e-05, "loss": 0.0616, "step": 24192 }, { "epoch": 11.247791724779173, "grad_norm": 1.6847282648086548, "learning_rate": 4.8848166264184865e-05, "loss": 0.0759, "step": 24194 }, { "epoch": 11.248721524872153, "grad_norm": 2.1163156032562256, "learning_rate": 4.8824483537930486e-05, "loss": 0.074, "step": 24196 }, { "epoch": 11.249651324965132, "grad_norm": 1.4638444185256958, "learning_rate": 4.880056567364193e-05, "loss": 0.0255, "step": 24198 }, { "epoch": 11.250581125058112, "grad_norm": 1.533205270767212, "learning_rate": 4.8776412907378836e-05, "loss": 0.036, "step": 24200 }, { "epoch": 11.251510925151093, "grad_norm": 1.7800225019454956, "learning_rate": 4.875202547751933e-05, "loss": 0.0313, "step": 24202 }, { "epoch": 11.252440725244073, "grad_norm": 2.322877883911133, "learning_rate": 4.872740362475739e-05, "loss": 0.0623, "step": 24204 }, { "epoch": 11.253370525337052, "grad_norm": 1.7338225841522217, "learning_rate": 4.870254759210082e-05, "loss": 0.0486, "step": 24206 }, { "epoch": 11.254300325430032, "grad_norm": 1.4232938289642334, "learning_rate": 4.867745762486867e-05, "loss": 0.0431, "step": 24208 }, { "epoch": 11.255230125523013, "grad_norm": 1.9371330738067627, "learning_rate": 4.865213397068863e-05, "loss": 0.0331, "step": 24210 }, { "epoch": 11.256159925615993, "grad_norm": 2.5836212635040283, "learning_rate": 4.8626576879495106e-05, "loss": 0.0476, "step": 24212 }, { "epoch": 11.257089725708973, "grad_norm": 0.923859715461731, "learning_rate": 4.8600786603526275e-05, "loss": 0.0226, "step": 24214 }, { "epoch": 11.258019525801952, "grad_norm": 1.6002436876296997, "learning_rate": 4.857476339732163e-05, "loss": 0.0485, "step": 24216 }, { "epoch": 11.258949325894932, "grad_norm": 0.6485012173652649, "learning_rate": 4.8548507517719775e-05, "loss": 0.0297, "step": 24218 }, { "epoch": 11.259879125987913, "grad_norm": 1.264620065689087, "learning_rate": 4.852201922385563e-05, "loss": 0.0506, "step": 24220 }, { "epoch": 11.260808926080893, "grad_norm": 1.499274730682373, "learning_rate": 4.849529877715803e-05, "loss": 0.0453, "step": 24222 }, { "epoch": 11.261738726173872, "grad_norm": 2.013843297958374, "learning_rate": 4.8468346441346874e-05, "loss": 0.0331, "step": 24224 }, { "epoch": 11.262668526266852, "grad_norm": 0.9493628144264221, "learning_rate": 4.8441162482430904e-05, "loss": 0.0201, "step": 24226 }, { "epoch": 11.263598326359833, "grad_norm": 2.3391172885894775, "learning_rate": 4.841374716870487e-05, "loss": 0.0426, "step": 24228 }, { "epoch": 11.264528126452813, "grad_norm": 3.1507489681243896, "learning_rate": 4.8386100770746735e-05, "loss": 0.0646, "step": 24230 }, { "epoch": 11.265457926545793, "grad_norm": 1.418782114982605, "learning_rate": 4.835822356141534e-05, "loss": 0.0711, "step": 24232 }, { "epoch": 11.266387726638772, "grad_norm": 1.0171880722045898, "learning_rate": 4.8330115815847486e-05, "loss": 0.0384, "step": 24234 }, { "epoch": 11.267317526731752, "grad_norm": 1.4240562915802002, "learning_rate": 4.830177781145528e-05, "loss": 0.0276, "step": 24236 }, { "epoch": 11.268247326824733, "grad_norm": 1.136643886566162, "learning_rate": 4.8273209827923384e-05, "loss": 0.0352, "step": 24238 }, { "epoch": 11.269177126917713, "grad_norm": 1.544273853302002, "learning_rate": 4.824441214720626e-05, "loss": 0.0346, "step": 24240 }, { "epoch": 11.270106927010692, "grad_norm": 2.168604612350464, "learning_rate": 4.8215385053525453e-05, "loss": 0.0795, "step": 24242 }, { "epoch": 11.271036727103672, "grad_norm": 2.2646567821502686, "learning_rate": 4.818612883336654e-05, "loss": 0.03, "step": 24244 }, { "epoch": 11.271966527196653, "grad_norm": 2.1531920433044434, "learning_rate": 4.815664377547666e-05, "loss": 0.0478, "step": 24246 }, { "epoch": 11.272896327289633, "grad_norm": 1.4391043186187744, "learning_rate": 4.812693017086149e-05, "loss": 0.0414, "step": 24248 }, { "epoch": 11.273826127382613, "grad_norm": 1.2522836923599243, "learning_rate": 4.809698831278219e-05, "loss": 0.0167, "step": 24250 }, { "epoch": 11.274755927475592, "grad_norm": 1.3668333292007446, "learning_rate": 4.806681849675287e-05, "loss": 0.0275, "step": 24252 }, { "epoch": 11.275685727568574, "grad_norm": 1.4160360097885132, "learning_rate": 4.8036421020537436e-05, "loss": 0.0309, "step": 24254 }, { "epoch": 11.276615527661553, "grad_norm": 1.0218604803085327, "learning_rate": 4.800579618414679e-05, "loss": 0.0567, "step": 24256 }, { "epoch": 11.277545327754533, "grad_norm": 1.6408946514129639, "learning_rate": 4.7974944289835546e-05, "loss": 0.0389, "step": 24258 }, { "epoch": 11.278475127847512, "grad_norm": 1.6981772184371948, "learning_rate": 4.794386564209953e-05, "loss": 0.0485, "step": 24260 }, { "epoch": 11.279404927940492, "grad_norm": 1.188564419746399, "learning_rate": 4.791256054767251e-05, "loss": 0.0436, "step": 24262 }, { "epoch": 11.280334728033473, "grad_norm": 2.125584602355957, "learning_rate": 4.7881029315522914e-05, "loss": 0.0821, "step": 24264 }, { "epoch": 11.281264528126453, "grad_norm": 2.1747443675994873, "learning_rate": 4.784927225685149e-05, "loss": 0.0788, "step": 24266 }, { "epoch": 11.282194328219433, "grad_norm": 1.4601455926895142, "learning_rate": 4.781728968508758e-05, "loss": 0.0471, "step": 24268 }, { "epoch": 11.283124128312412, "grad_norm": 1.7527326345443726, "learning_rate": 4.778508191588612e-05, "loss": 0.0461, "step": 24270 }, { "epoch": 11.284053928405394, "grad_norm": 1.0771270990371704, "learning_rate": 4.7752649267124865e-05, "loss": 0.0426, "step": 24272 }, { "epoch": 11.284983728498373, "grad_norm": 1.585099220275879, "learning_rate": 4.771999205890096e-05, "loss": 0.0457, "step": 24274 }, { "epoch": 11.285913528591353, "grad_norm": 1.7580313682556152, "learning_rate": 4.768711061352793e-05, "loss": 0.0356, "step": 24276 }, { "epoch": 11.286843328684332, "grad_norm": 2.013526439666748, "learning_rate": 4.7654005255532244e-05, "loss": 0.0614, "step": 24278 }, { "epoch": 11.287773128777314, "grad_norm": 1.5976120233535767, "learning_rate": 4.762067631165047e-05, "loss": 0.0587, "step": 24280 }, { "epoch": 11.288702928870293, "grad_norm": 0.6039189100265503, "learning_rate": 4.7587124110825916e-05, "loss": 0.0239, "step": 24282 }, { "epoch": 11.289632728963273, "grad_norm": 1.758039116859436, "learning_rate": 4.75533489842051e-05, "loss": 0.0758, "step": 24284 }, { "epoch": 11.290562529056253, "grad_norm": 1.305109977722168, "learning_rate": 4.751935126513496e-05, "loss": 0.0246, "step": 24286 }, { "epoch": 11.291492329149232, "grad_norm": 2.5322816371917725, "learning_rate": 4.748513128915926e-05, "loss": 0.0569, "step": 24288 }, { "epoch": 11.292422129242214, "grad_norm": 1.496744155883789, "learning_rate": 4.745068939401544e-05, "loss": 0.0232, "step": 24290 }, { "epoch": 11.293351929335193, "grad_norm": 1.8287662267684937, "learning_rate": 4.741602591963084e-05, "loss": 0.0617, "step": 24292 }, { "epoch": 11.294281729428173, "grad_norm": 1.2237062454223633, "learning_rate": 4.7381141208120215e-05, "loss": 0.058, "step": 24294 }, { "epoch": 11.295211529521152, "grad_norm": 1.8582820892333984, "learning_rate": 4.734603560378158e-05, "loss": 0.0753, "step": 24296 }, { "epoch": 11.296141329614134, "grad_norm": 2.046602725982666, "learning_rate": 4.731070945309291e-05, "loss": 0.0456, "step": 24298 }, { "epoch": 11.297071129707113, "grad_norm": 2.6788647174835205, "learning_rate": 4.727516310470914e-05, "loss": 0.0633, "step": 24300 }, { "epoch": 11.298000929800093, "grad_norm": 1.5810434818267822, "learning_rate": 4.723939690945845e-05, "loss": 0.0539, "step": 24302 }, { "epoch": 11.298930729893073, "grad_norm": 1.6951013803482056, "learning_rate": 4.72034112203386e-05, "loss": 0.041, "step": 24304 }, { "epoch": 11.299860529986052, "grad_norm": 1.3191264867782593, "learning_rate": 4.716720639251388e-05, "loss": 0.0492, "step": 24306 }, { "epoch": 11.300790330079034, "grad_norm": 2.3142237663269043, "learning_rate": 4.713078278331133e-05, "loss": 0.0559, "step": 24308 }, { "epoch": 11.301720130172013, "grad_norm": 1.2478970289230347, "learning_rate": 4.709414075221735e-05, "loss": 0.067, "step": 24310 }, { "epoch": 11.302649930264993, "grad_norm": 2.745288610458374, "learning_rate": 4.705728066087382e-05, "loss": 0.0311, "step": 24312 }, { "epoch": 11.303579730357972, "grad_norm": 1.7049946784973145, "learning_rate": 4.702020287307506e-05, "loss": 0.0312, "step": 24314 }, { "epoch": 11.304509530450954, "grad_norm": 2.3773956298828125, "learning_rate": 4.698290775476395e-05, "loss": 0.0466, "step": 24316 }, { "epoch": 11.305439330543933, "grad_norm": 1.5744974613189697, "learning_rate": 4.6945395674027985e-05, "loss": 0.0329, "step": 24318 }, { "epoch": 11.306369130636913, "grad_norm": 2.576788902282715, "learning_rate": 4.690766700109651e-05, "loss": 0.0628, "step": 24320 }, { "epoch": 11.307298930729893, "grad_norm": 1.3612873554229736, "learning_rate": 4.686972210833622e-05, "loss": 0.0343, "step": 24322 }, { "epoch": 11.308228730822874, "grad_norm": 1.1443932056427002, "learning_rate": 4.683156137024798e-05, "loss": 0.0218, "step": 24324 }, { "epoch": 11.309158530915854, "grad_norm": 1.1055686473846436, "learning_rate": 4.679318516346267e-05, "loss": 0.0415, "step": 24326 }, { "epoch": 11.310088331008833, "grad_norm": 1.7225918769836426, "learning_rate": 4.675459386673807e-05, "loss": 0.0278, "step": 24328 }, { "epoch": 11.311018131101813, "grad_norm": 0.8345402479171753, "learning_rate": 4.671578786095479e-05, "loss": 0.0382, "step": 24330 }, { "epoch": 11.311947931194792, "grad_norm": 1.800767421722412, "learning_rate": 4.667676752911223e-05, "loss": 0.0475, "step": 24332 }, { "epoch": 11.312877731287774, "grad_norm": 1.4933874607086182, "learning_rate": 4.6637533256325443e-05, "loss": 0.0663, "step": 24334 }, { "epoch": 11.313807531380753, "grad_norm": 0.3870992958545685, "learning_rate": 4.659808542982092e-05, "loss": 0.0123, "step": 24336 }, { "epoch": 11.314737331473733, "grad_norm": 1.025052785873413, "learning_rate": 4.6558424438932626e-05, "loss": 0.0319, "step": 24338 }, { "epoch": 11.315667131566713, "grad_norm": 3.58964467048645, "learning_rate": 4.65185506750986e-05, "loss": 0.0645, "step": 24340 }, { "epoch": 11.316596931659694, "grad_norm": 1.855090856552124, "learning_rate": 4.64784645318568e-05, "loss": 0.0428, "step": 24342 }, { "epoch": 11.317526731752674, "grad_norm": 1.846028447151184, "learning_rate": 4.643816640484137e-05, "loss": 0.0494, "step": 24344 }, { "epoch": 11.318456531845653, "grad_norm": 0.776448130607605, "learning_rate": 4.6397656691778284e-05, "loss": 0.0371, "step": 24346 }, { "epoch": 11.319386331938633, "grad_norm": 1.6244287490844727, "learning_rate": 4.635693579248232e-05, "loss": 0.0475, "step": 24348 }, { "epoch": 11.320316132031612, "grad_norm": 1.712429404258728, "learning_rate": 4.631600410885231e-05, "loss": 0.0347, "step": 24350 }, { "epoch": 11.321245932124594, "grad_norm": 2.0071945190429688, "learning_rate": 4.6274862044867283e-05, "loss": 0.0455, "step": 24352 }, { "epoch": 11.322175732217573, "grad_norm": 1.38556706905365, "learning_rate": 4.623351000658288e-05, "loss": 0.0595, "step": 24354 }, { "epoch": 11.323105532310553, "grad_norm": 2.234830856323242, "learning_rate": 4.6191948402127115e-05, "loss": 0.0449, "step": 24356 }, { "epoch": 11.324035332403533, "grad_norm": 0.7195268869400024, "learning_rate": 4.615017764169606e-05, "loss": 0.017, "step": 24358 }, { "epoch": 11.324965132496514, "grad_norm": 0.7810609340667725, "learning_rate": 4.6108198137550366e-05, "loss": 0.0287, "step": 24360 }, { "epoch": 11.325894932589494, "grad_norm": 2.216721296310425, "learning_rate": 4.6066010304010773e-05, "loss": 0.0374, "step": 24362 }, { "epoch": 11.326824732682473, "grad_norm": 1.689477562904358, "learning_rate": 4.602361455745427e-05, "loss": 0.0513, "step": 24364 }, { "epoch": 11.327754532775453, "grad_norm": 1.005026936531067, "learning_rate": 4.598101131630957e-05, "loss": 0.0154, "step": 24366 }, { "epoch": 11.328684332868434, "grad_norm": 2.367959976196289, "learning_rate": 4.5938201001053556e-05, "loss": 0.0628, "step": 24368 }, { "epoch": 11.329614132961414, "grad_norm": 1.5210126638412476, "learning_rate": 4.5895184034206846e-05, "loss": 0.0383, "step": 24370 }, { "epoch": 11.330543933054393, "grad_norm": 1.2153948545455933, "learning_rate": 4.585196084032935e-05, "loss": 0.0359, "step": 24372 }, { "epoch": 11.331473733147373, "grad_norm": 1.412820816040039, "learning_rate": 4.580853184601653e-05, "loss": 0.0394, "step": 24374 }, { "epoch": 11.332403533240353, "grad_norm": 0.6674173474311829, "learning_rate": 4.576489747989524e-05, "loss": 0.015, "step": 24376 }, { "epoch": 11.333333333333334, "grad_norm": 2.1527798175811768, "learning_rate": 4.572105817261906e-05, "loss": 0.0512, "step": 24378 }, { "epoch": 11.334263133426314, "grad_norm": 1.2793185710906982, "learning_rate": 4.5677014356864035e-05, "loss": 0.0536, "step": 24380 }, { "epoch": 11.335192933519293, "grad_norm": 2.0344035625457764, "learning_rate": 4.563276646732495e-05, "loss": 0.0416, "step": 24382 }, { "epoch": 11.336122733612273, "grad_norm": 1.3048213720321655, "learning_rate": 4.558831494071073e-05, "loss": 0.0377, "step": 24384 }, { "epoch": 11.337052533705254, "grad_norm": 0.7604084610939026, "learning_rate": 4.554366021573978e-05, "loss": 0.0355, "step": 24386 }, { "epoch": 11.337982333798234, "grad_norm": 1.707979440689087, "learning_rate": 4.549880273313631e-05, "loss": 0.0408, "step": 24388 }, { "epoch": 11.338912133891213, "grad_norm": 1.104038953781128, "learning_rate": 4.545374293562567e-05, "loss": 0.0308, "step": 24390 }, { "epoch": 11.339841933984193, "grad_norm": 2.187119483947754, "learning_rate": 4.540848126792965e-05, "loss": 0.0446, "step": 24392 }, { "epoch": 11.340771734077173, "grad_norm": 0.6421685218811035, "learning_rate": 4.536301817676277e-05, "loss": 0.0269, "step": 24394 }, { "epoch": 11.341701534170154, "grad_norm": 1.7385061979293823, "learning_rate": 4.531735411082736e-05, "loss": 0.0284, "step": 24396 }, { "epoch": 11.342631334263134, "grad_norm": 1.744205117225647, "learning_rate": 4.527148952080942e-05, "loss": 0.053, "step": 24398 }, { "epoch": 11.343561134356113, "grad_norm": 1.7304893732070923, "learning_rate": 4.5225424859373636e-05, "loss": 0.0471, "step": 24400 }, { "epoch": 11.344490934449093, "grad_norm": 1.85393226146698, "learning_rate": 4.517916058115994e-05, "loss": 0.0406, "step": 24402 }, { "epoch": 11.345420734542074, "grad_norm": 1.2333160638809204, "learning_rate": 4.513269714277806e-05, "loss": 0.0219, "step": 24404 }, { "epoch": 11.346350534635054, "grad_norm": 1.508315086364746, "learning_rate": 4.508603500280318e-05, "loss": 0.032, "step": 24406 }, { "epoch": 11.347280334728033, "grad_norm": 0.7638140916824341, "learning_rate": 4.503917462177188e-05, "loss": 0.0318, "step": 24408 }, { "epoch": 11.348210134821013, "grad_norm": 1.7286674976348877, "learning_rate": 4.4992116462177215e-05, "loss": 0.0398, "step": 24410 }, { "epoch": 11.349139934913994, "grad_norm": 1.420051097869873, "learning_rate": 4.494486098846431e-05, "loss": 0.0278, "step": 24412 }, { "epoch": 11.350069735006974, "grad_norm": 2.010179042816162, "learning_rate": 4.4897408667025405e-05, "loss": 0.0336, "step": 24414 }, { "epoch": 11.350999535099954, "grad_norm": 1.181753396987915, "learning_rate": 4.484975996619587e-05, "loss": 0.024, "step": 24416 }, { "epoch": 11.351929335192933, "grad_norm": 1.9364330768585205, "learning_rate": 4.480191535624925e-05, "loss": 0.0599, "step": 24418 }, { "epoch": 11.352859135285913, "grad_norm": 0.6842238903045654, "learning_rate": 4.475387530939229e-05, "loss": 0.0175, "step": 24420 }, { "epoch": 11.353788935378894, "grad_norm": 3.0385046005249023, "learning_rate": 4.470564029976101e-05, "loss": 0.037, "step": 24422 }, { "epoch": 11.354718735471874, "grad_norm": 2.102611780166626, "learning_rate": 4.465721080341557e-05, "loss": 0.0517, "step": 24424 }, { "epoch": 11.355648535564853, "grad_norm": 0.8447253704071045, "learning_rate": 4.460858729833533e-05, "loss": 0.0202, "step": 24426 }, { "epoch": 11.356578335657833, "grad_norm": 1.2361043691635132, "learning_rate": 4.4559770264414634e-05, "loss": 0.0401, "step": 24428 }, { "epoch": 11.357508135750814, "grad_norm": 2.609229564666748, "learning_rate": 4.451076018345815e-05, "loss": 0.066, "step": 24430 }, { "epoch": 11.358437935843794, "grad_norm": 0.5752732753753662, "learning_rate": 4.446155753917558e-05, "loss": 0.0132, "step": 24432 }, { "epoch": 11.359367735936774, "grad_norm": 0.8741409182548523, "learning_rate": 4.4412162817176944e-05, "loss": 0.028, "step": 24434 }, { "epoch": 11.360297536029753, "grad_norm": 2.253936290740967, "learning_rate": 4.4362576504968296e-05, "loss": 0.0331, "step": 24436 }, { "epoch": 11.361227336122734, "grad_norm": 2.720980167388916, "learning_rate": 4.431279909194665e-05, "loss": 0.0453, "step": 24438 }, { "epoch": 11.362157136215714, "grad_norm": 1.158791184425354, "learning_rate": 4.426283106939475e-05, "loss": 0.0505, "step": 24440 }, { "epoch": 11.363086936308694, "grad_norm": 0.7450485825538635, "learning_rate": 4.4212672930476915e-05, "loss": 0.0152, "step": 24442 }, { "epoch": 11.364016736401673, "grad_norm": 1.3129011392593384, "learning_rate": 4.416232517023384e-05, "loss": 0.0379, "step": 24444 }, { "epoch": 11.364946536494653, "grad_norm": 1.09360933303833, "learning_rate": 4.4111788285577355e-05, "loss": 0.039, "step": 24446 }, { "epoch": 11.365876336587634, "grad_norm": 2.034060001373291, "learning_rate": 4.406106277528624e-05, "loss": 0.04, "step": 24448 }, { "epoch": 11.366806136680614, "grad_norm": 0.5995509028434753, "learning_rate": 4.4010149140000785e-05, "loss": 0.0317, "step": 24450 }, { "epoch": 11.367735936773594, "grad_norm": 1.9004650115966797, "learning_rate": 4.3959047882218146e-05, "loss": 0.03, "step": 24452 }, { "epoch": 11.368665736866573, "grad_norm": 1.854841709136963, "learning_rate": 4.390775950628688e-05, "loss": 0.0501, "step": 24454 }, { "epoch": 11.369595536959554, "grad_norm": 0.3687758147716522, "learning_rate": 4.385628451840254e-05, "loss": 0.0208, "step": 24456 }, { "epoch": 11.370525337052534, "grad_norm": 1.6847580671310425, "learning_rate": 4.3804623426602825e-05, "loss": 0.0504, "step": 24458 }, { "epoch": 11.371455137145514, "grad_norm": 1.8825856447219849, "learning_rate": 4.37527767407615e-05, "loss": 0.0349, "step": 24460 }, { "epoch": 11.372384937238493, "grad_norm": 1.9708163738250732, "learning_rate": 4.370074497258454e-05, "loss": 0.0377, "step": 24462 }, { "epoch": 11.373314737331473, "grad_norm": 3.6370484828948975, "learning_rate": 4.364852863560451e-05, "loss": 0.0479, "step": 24464 }, { "epoch": 11.374244537424454, "grad_norm": 1.760080099105835, "learning_rate": 4.359612824517567e-05, "loss": 0.052, "step": 24466 }, { "epoch": 11.375174337517434, "grad_norm": 2.440584659576416, "learning_rate": 4.35435443184685e-05, "loss": 0.0433, "step": 24468 }, { "epoch": 11.376104137610414, "grad_norm": 2.907963514328003, "learning_rate": 4.349077737446524e-05, "loss": 0.0429, "step": 24470 }, { "epoch": 11.377033937703393, "grad_norm": 1.574262022972107, "learning_rate": 4.343782793395444e-05, "loss": 0.0395, "step": 24472 }, { "epoch": 11.377963737796374, "grad_norm": 1.0407874584197998, "learning_rate": 4.338469651952546e-05, "loss": 0.0386, "step": 24474 }, { "epoch": 11.378893537889354, "grad_norm": 0.5684640407562256, "learning_rate": 4.333138365556404e-05, "loss": 0.0247, "step": 24476 }, { "epoch": 11.379823337982334, "grad_norm": 0.9556950330734253, "learning_rate": 4.327788986824674e-05, "loss": 0.0488, "step": 24478 }, { "epoch": 11.380753138075313, "grad_norm": 1.0732461214065552, "learning_rate": 4.32242156855354e-05, "loss": 0.0309, "step": 24480 }, { "epoch": 11.381682938168293, "grad_norm": 1.79683518409729, "learning_rate": 4.3170361637172656e-05, "loss": 0.0334, "step": 24482 }, { "epoch": 11.382612738261274, "grad_norm": 2.3303589820861816, "learning_rate": 4.3116328254676104e-05, "loss": 0.0348, "step": 24484 }, { "epoch": 11.383542538354254, "grad_norm": 2.0227575302124023, "learning_rate": 4.306211607133378e-05, "loss": 0.0287, "step": 24486 }, { "epoch": 11.384472338447233, "grad_norm": 1.4103713035583496, "learning_rate": 4.3007725622197674e-05, "loss": 0.0463, "step": 24488 }, { "epoch": 11.385402138540213, "grad_norm": 1.5206235647201538, "learning_rate": 4.295315744407969e-05, "loss": 0.0271, "step": 24490 }, { "epoch": 11.386331938633194, "grad_norm": 1.5259993076324463, "learning_rate": 4.289841207554585e-05, "loss": 0.0249, "step": 24492 }, { "epoch": 11.387261738726174, "grad_norm": 1.245419979095459, "learning_rate": 4.2843490056910575e-05, "loss": 0.0183, "step": 24494 }, { "epoch": 11.388191538819154, "grad_norm": 1.3163182735443115, "learning_rate": 4.278839193023215e-05, "loss": 0.0412, "step": 24496 }, { "epoch": 11.389121338912133, "grad_norm": 1.244261384010315, "learning_rate": 4.273311823930683e-05, "loss": 0.0368, "step": 24498 }, { "epoch": 11.390051139005115, "grad_norm": 1.3002431392669678, "learning_rate": 4.267766952966378e-05, "loss": 0.0399, "step": 24500 }, { "epoch": 11.390980939098094, "grad_norm": 0.8744835257530212, "learning_rate": 4.26220463485591e-05, "loss": 0.0499, "step": 24502 }, { "epoch": 11.391910739191074, "grad_norm": 1.451857566833496, "learning_rate": 4.2566249244971266e-05, "loss": 0.0473, "step": 24504 }, { "epoch": 11.392840539284053, "grad_norm": 1.208357810974121, "learning_rate": 4.2510278769595296e-05, "loss": 0.0295, "step": 24506 }, { "epoch": 11.393770339377033, "grad_norm": 1.0709065198898315, "learning_rate": 4.2454135474836926e-05, "loss": 0.0289, "step": 24508 }, { "epoch": 11.394700139470014, "grad_norm": 1.3010456562042236, "learning_rate": 4.239781991480781e-05, "loss": 0.0216, "step": 24510 }, { "epoch": 11.395629939562994, "grad_norm": 1.0017203092575073, "learning_rate": 4.234133264532017e-05, "loss": 0.0415, "step": 24512 }, { "epoch": 11.396559739655974, "grad_norm": 1.2033562660217285, "learning_rate": 4.228467422388018e-05, "loss": 0.023, "step": 24514 }, { "epoch": 11.397489539748953, "grad_norm": 1.718226671218872, "learning_rate": 4.22278452096837e-05, "loss": 0.0518, "step": 24516 }, { "epoch": 11.398419339841935, "grad_norm": 0.976963996887207, "learning_rate": 4.217084616361017e-05, "loss": 0.0341, "step": 24518 }, { "epoch": 11.399349139934914, "grad_norm": 1.696603536605835, "learning_rate": 4.2113677648217286e-05, "loss": 0.0788, "step": 24520 }, { "epoch": 11.400278940027894, "grad_norm": 1.457445740699768, "learning_rate": 4.2056340227734954e-05, "loss": 0.0407, "step": 24522 }, { "epoch": 11.401208740120873, "grad_norm": 1.2922115325927734, "learning_rate": 4.199883446806049e-05, "loss": 0.0254, "step": 24524 }, { "epoch": 11.402138540213855, "grad_norm": 1.5960607528686523, "learning_rate": 4.1941160936752664e-05, "loss": 0.0637, "step": 24526 }, { "epoch": 11.403068340306834, "grad_norm": 1.6980279684066772, "learning_rate": 4.1883320203025694e-05, "loss": 0.0787, "step": 24528 }, { "epoch": 11.403998140399814, "grad_norm": 2.177778959274292, "learning_rate": 4.1825312837744384e-05, "loss": 0.0357, "step": 24530 }, { "epoch": 11.404927940492794, "grad_norm": 2.700270652770996, "learning_rate": 4.1767139413418156e-05, "loss": 0.0628, "step": 24532 }, { "epoch": 11.405857740585773, "grad_norm": 0.9599686861038208, "learning_rate": 4.1708800504194955e-05, "loss": 0.0284, "step": 24534 }, { "epoch": 11.406787540678755, "grad_norm": 1.7742345333099365, "learning_rate": 4.16502966858564e-05, "loss": 0.0313, "step": 24536 }, { "epoch": 11.407717340771734, "grad_norm": 4.7738447189331055, "learning_rate": 4.1591628535811406e-05, "loss": 0.0306, "step": 24538 }, { "epoch": 11.408647140864714, "grad_norm": 0.7805984616279602, "learning_rate": 4.153279663309134e-05, "loss": 0.0225, "step": 24540 }, { "epoch": 11.409576940957693, "grad_norm": 1.9966272115707397, "learning_rate": 4.147380155834294e-05, "loss": 0.0367, "step": 24542 }, { "epoch": 11.410506741050675, "grad_norm": 0.9544456005096436, "learning_rate": 4.141464389382389e-05, "loss": 0.0631, "step": 24544 }, { "epoch": 11.411436541143654, "grad_norm": 0.5319624543190002, "learning_rate": 4.135532422339661e-05, "loss": 0.0328, "step": 24546 }, { "epoch": 11.412366341236634, "grad_norm": 1.2858401536941528, "learning_rate": 4.129584313252203e-05, "loss": 0.0396, "step": 24548 }, { "epoch": 11.413296141329614, "grad_norm": 1.5630288124084473, "learning_rate": 4.1236201208254617e-05, "loss": 0.0236, "step": 24550 }, { "epoch": 11.414225941422593, "grad_norm": 2.19012188911438, "learning_rate": 4.1176399039236096e-05, "loss": 0.0416, "step": 24552 }, { "epoch": 11.415155741515575, "grad_norm": 0.9437068700790405, "learning_rate": 4.111643721568987e-05, "loss": 0.0312, "step": 24554 }, { "epoch": 11.416085541608554, "grad_norm": 1.7192898988723755, "learning_rate": 4.105631632941466e-05, "loss": 0.0214, "step": 24556 }, { "epoch": 11.417015341701534, "grad_norm": 2.0029938220977783, "learning_rate": 4.0996036973779485e-05, "loss": 0.0602, "step": 24558 }, { "epoch": 11.417945141794513, "grad_norm": 1.7287342548370361, "learning_rate": 4.0935599743717376e-05, "loss": 0.0512, "step": 24560 }, { "epoch": 11.418874941887495, "grad_norm": 1.3061769008636475, "learning_rate": 4.0875005235719116e-05, "loss": 0.0519, "step": 24562 }, { "epoch": 11.419804741980474, "grad_norm": 1.1846693754196167, "learning_rate": 4.0814254047828184e-05, "loss": 0.0182, "step": 24564 }, { "epoch": 11.420734542073454, "grad_norm": 1.24000883102417, "learning_rate": 4.0753346779634265e-05, "loss": 0.0369, "step": 24566 }, { "epoch": 11.421664342166434, "grad_norm": 0.9590824246406555, "learning_rate": 4.0692284032267516e-05, "loss": 0.0338, "step": 24568 }, { "epoch": 11.422594142259415, "grad_norm": 1.655572772026062, "learning_rate": 4.0631066408392604e-05, "loss": 0.0227, "step": 24570 }, { "epoch": 11.423523942352395, "grad_norm": 1.2181063890457153, "learning_rate": 4.056969451220276e-05, "loss": 0.0329, "step": 24572 }, { "epoch": 11.424453742445374, "grad_norm": 0.8820245265960693, "learning_rate": 4.050816894941395e-05, "loss": 0.0152, "step": 24574 }, { "epoch": 11.425383542538354, "grad_norm": 0.9530096054077148, "learning_rate": 4.0446490327258375e-05, "loss": 0.0207, "step": 24576 }, { "epoch": 11.426313342631333, "grad_norm": 1.4502811431884766, "learning_rate": 4.038465925447927e-05, "loss": 0.0331, "step": 24578 }, { "epoch": 11.427243142724315, "grad_norm": 1.5209532976150513, "learning_rate": 4.032267634132451e-05, "loss": 0.0577, "step": 24580 }, { "epoch": 11.428172942817294, "grad_norm": 0.6434080600738525, "learning_rate": 4.0260542199540125e-05, "loss": 0.0289, "step": 24582 }, { "epoch": 11.429102742910274, "grad_norm": 1.1298459768295288, "learning_rate": 4.0198257442365166e-05, "loss": 0.024, "step": 24584 }, { "epoch": 11.430032543003254, "grad_norm": 2.353729486465454, "learning_rate": 4.013582268452503e-05, "loss": 0.0452, "step": 24586 }, { "epoch": 11.430962343096235, "grad_norm": 0.857197642326355, "learning_rate": 4.007323854222573e-05, "loss": 0.0213, "step": 24588 }, { "epoch": 11.431892143189215, "grad_norm": 0.8932888507843018, "learning_rate": 4.001050563314719e-05, "loss": 0.0194, "step": 24590 }, { "epoch": 11.432821943282194, "grad_norm": 1.1178741455078125, "learning_rate": 3.994762457643788e-05, "loss": 0.0366, "step": 24592 }, { "epoch": 11.433751743375174, "grad_norm": 1.7536962032318115, "learning_rate": 3.988459599270889e-05, "loss": 0.0369, "step": 24594 }, { "epoch": 11.434681543468155, "grad_norm": 1.6218969821929932, "learning_rate": 3.982142050402647e-05, "loss": 0.0264, "step": 24596 }, { "epoch": 11.435611343561135, "grad_norm": 1.4472100734710693, "learning_rate": 3.975809873390732e-05, "loss": 0.0421, "step": 24598 }, { "epoch": 11.436541143654114, "grad_norm": 0.6361250281333923, "learning_rate": 3.96946313073119e-05, "loss": 0.0179, "step": 24600 }, { "epoch": 11.437470943747094, "grad_norm": 0.6912854313850403, "learning_rate": 3.9631018850637794e-05, "loss": 0.017, "step": 24602 }, { "epoch": 11.438400743840074, "grad_norm": 0.571756899356842, "learning_rate": 3.956726199171441e-05, "loss": 0.0206, "step": 24604 }, { "epoch": 11.439330543933055, "grad_norm": 0.48807188868522644, "learning_rate": 3.9503361359796216e-05, "loss": 0.028, "step": 24606 }, { "epoch": 11.440260344026035, "grad_norm": 1.038185715675354, "learning_rate": 3.943931758555679e-05, "loss": 0.0362, "step": 24608 }, { "epoch": 11.441190144119014, "grad_norm": 1.6547964811325073, "learning_rate": 3.937513130108202e-05, "loss": 0.0299, "step": 24610 }, { "epoch": 11.442119944211994, "grad_norm": 2.1807594299316406, "learning_rate": 3.93108031398648e-05, "loss": 0.0298, "step": 24612 }, { "epoch": 11.443049744304975, "grad_norm": 0.8105455636978149, "learning_rate": 3.9246333736798244e-05, "loss": 0.0248, "step": 24614 }, { "epoch": 11.443979544397955, "grad_norm": 0.8020782470703125, "learning_rate": 3.918172372816904e-05, "loss": 0.0323, "step": 24616 }, { "epoch": 11.444909344490934, "grad_norm": 1.8194133043289185, "learning_rate": 3.9116973751652014e-05, "loss": 0.0369, "step": 24618 }, { "epoch": 11.445839144583914, "grad_norm": 1.3397347927093506, "learning_rate": 3.905208444630317e-05, "loss": 0.022, "step": 24620 }, { "epoch": 11.446768944676894, "grad_norm": 1.923173427581787, "learning_rate": 3.898705645255419e-05, "loss": 0.0551, "step": 24622 }, { "epoch": 11.447698744769875, "grad_norm": 1.5428107976913452, "learning_rate": 3.8921890412204684e-05, "loss": 0.048, "step": 24624 }, { "epoch": 11.448628544862855, "grad_norm": 1.4954665899276733, "learning_rate": 3.885658696841731e-05, "loss": 0.0368, "step": 24626 }, { "epoch": 11.449558344955834, "grad_norm": 1.1696616411209106, "learning_rate": 3.8791146765710826e-05, "loss": 0.0368, "step": 24628 }, { "epoch": 11.450488145048814, "grad_norm": 2.1727592945098877, "learning_rate": 3.872557044995333e-05, "loss": 0.0652, "step": 24630 }, { "epoch": 11.451417945141795, "grad_norm": 1.3037621974945068, "learning_rate": 3.865985866835673e-05, "loss": 0.0191, "step": 24632 }, { "epoch": 11.452347745234775, "grad_norm": 1.9320520162582397, "learning_rate": 3.859401206946993e-05, "loss": 0.0511, "step": 24634 }, { "epoch": 11.453277545327754, "grad_norm": 0.8830679059028625, "learning_rate": 3.8528031303171976e-05, "loss": 0.0332, "step": 24636 }, { "epoch": 11.454207345420734, "grad_norm": 0.3306661546230316, "learning_rate": 3.846191702066655e-05, "loss": 0.0119, "step": 24638 }, { "epoch": 11.455137145513714, "grad_norm": 1.5123422145843506, "learning_rate": 3.839566987447492e-05, "loss": 0.0208, "step": 24640 }, { "epoch": 11.456066945606695, "grad_norm": 1.1725671291351318, "learning_rate": 3.832929051842984e-05, "loss": 0.0278, "step": 24642 }, { "epoch": 11.456996745699675, "grad_norm": 1.1841661930084229, "learning_rate": 3.826277960766845e-05, "loss": 0.0433, "step": 24644 }, { "epoch": 11.457926545792654, "grad_norm": 2.8129801750183105, "learning_rate": 3.8196137798626715e-05, "loss": 0.0632, "step": 24646 }, { "epoch": 11.458856345885634, "grad_norm": 1.681286334991455, "learning_rate": 3.812936574903241e-05, "loss": 0.0258, "step": 24648 }, { "epoch": 11.459786145978615, "grad_norm": 1.4878722429275513, "learning_rate": 3.806246411789871e-05, "loss": 0.0389, "step": 24650 }, { "epoch": 11.460715946071595, "grad_norm": 1.0668537616729736, "learning_rate": 3.7995433565517694e-05, "loss": 0.0198, "step": 24652 }, { "epoch": 11.461645746164574, "grad_norm": 2.1561198234558105, "learning_rate": 3.792827475345401e-05, "loss": 0.0447, "step": 24654 }, { "epoch": 11.462575546257554, "grad_norm": 1.3082332611083984, "learning_rate": 3.786098834453771e-05, "loss": 0.0249, "step": 24656 }, { "epoch": 11.463505346350535, "grad_norm": 1.0116440057754517, "learning_rate": 3.779357500285864e-05, "loss": 0.0191, "step": 24658 }, { "epoch": 11.464435146443515, "grad_norm": 2.077454090118408, "learning_rate": 3.772603539375927e-05, "loss": 0.0358, "step": 24660 }, { "epoch": 11.465364946536495, "grad_norm": 1.4093132019042969, "learning_rate": 3.765837018382842e-05, "loss": 0.0348, "step": 24662 }, { "epoch": 11.466294746629474, "grad_norm": 1.7940033674240112, "learning_rate": 3.75905800408941e-05, "loss": 0.0311, "step": 24664 }, { "epoch": 11.467224546722454, "grad_norm": 0.8435670733451843, "learning_rate": 3.752266563401779e-05, "loss": 0.0159, "step": 24666 }, { "epoch": 11.468154346815435, "grad_norm": 1.4586085081100464, "learning_rate": 3.7454627633487436e-05, "loss": 0.0291, "step": 24668 }, { "epoch": 11.469084146908415, "grad_norm": 1.5084953308105469, "learning_rate": 3.738646671081033e-05, "loss": 0.0435, "step": 24670 }, { "epoch": 11.470013947001394, "grad_norm": 1.2233296632766724, "learning_rate": 3.7318183538707396e-05, "loss": 0.0414, "step": 24672 }, { "epoch": 11.470943747094374, "grad_norm": 0.1126331090927124, "learning_rate": 3.724977879110583e-05, "loss": 0.0059, "step": 24674 }, { "epoch": 11.471873547187355, "grad_norm": 2.4389290809631348, "learning_rate": 3.718125314313334e-05, "loss": 0.036, "step": 24676 }, { "epoch": 11.472803347280335, "grad_norm": 1.8134716749191284, "learning_rate": 3.711260727110995e-05, "loss": 0.0461, "step": 24678 }, { "epoch": 11.473733147373315, "grad_norm": 0.9675045609474182, "learning_rate": 3.704384185254285e-05, "loss": 0.012, "step": 24680 }, { "epoch": 11.474662947466294, "grad_norm": 1.558145523071289, "learning_rate": 3.697495756611912e-05, "loss": 0.0331, "step": 24682 }, { "epoch": 11.475592747559276, "grad_norm": 2.133873224258423, "learning_rate": 3.6905955091698525e-05, "loss": 0.0252, "step": 24684 }, { "epoch": 11.476522547652255, "grad_norm": 0.5697823166847229, "learning_rate": 3.683683511030782e-05, "loss": 0.0192, "step": 24686 }, { "epoch": 11.477452347745235, "grad_norm": 1.7314434051513672, "learning_rate": 3.676759830413345e-05, "loss": 0.0385, "step": 24688 }, { "epoch": 11.478382147838214, "grad_norm": 0.9773842096328735, "learning_rate": 3.6698245356514436e-05, "loss": 0.0193, "step": 24690 }, { "epoch": 11.479311947931194, "grad_norm": 1.4686402082443237, "learning_rate": 3.6628776951936526e-05, "loss": 0.0312, "step": 24692 }, { "epoch": 11.480241748024175, "grad_norm": 0.5984960198402405, "learning_rate": 3.655919377602483e-05, "loss": 0.0156, "step": 24694 }, { "epoch": 11.481171548117155, "grad_norm": 1.2355592250823975, "learning_rate": 3.648949651553736e-05, "loss": 0.0317, "step": 24696 }, { "epoch": 11.482101348210135, "grad_norm": 1.011358380317688, "learning_rate": 3.6419685858357614e-05, "loss": 0.0231, "step": 24698 }, { "epoch": 11.483031148303114, "grad_norm": 0.9357227087020874, "learning_rate": 3.634976249348876e-05, "loss": 0.0229, "step": 24700 }, { "epoch": 11.483960948396096, "grad_norm": 1.0122469663619995, "learning_rate": 3.627972711104618e-05, "loss": 0.0396, "step": 24702 }, { "epoch": 11.484890748489075, "grad_norm": 1.8937029838562012, "learning_rate": 3.620958040225082e-05, "loss": 0.0351, "step": 24704 }, { "epoch": 11.485820548582055, "grad_norm": 0.24070262908935547, "learning_rate": 3.613932305942239e-05, "loss": 0.0146, "step": 24706 }, { "epoch": 11.486750348675034, "grad_norm": 1.8511189222335815, "learning_rate": 3.606895577597249e-05, "loss": 0.0377, "step": 24708 }, { "epoch": 11.487680148768014, "grad_norm": 0.9324151873588562, "learning_rate": 3.5998479246397954e-05, "loss": 0.0196, "step": 24710 }, { "epoch": 11.488609948860995, "grad_norm": 1.4235177040100098, "learning_rate": 3.5927894166273356e-05, "loss": 0.0328, "step": 24712 }, { "epoch": 11.489539748953975, "grad_norm": 1.6290369033813477, "learning_rate": 3.585720123224511e-05, "loss": 0.0554, "step": 24714 }, { "epoch": 11.490469549046955, "grad_norm": 1.6283035278320312, "learning_rate": 3.57864011420241e-05, "loss": 0.0474, "step": 24716 }, { "epoch": 11.491399349139934, "grad_norm": 1.47146475315094, "learning_rate": 3.571549459437829e-05, "loss": 0.0463, "step": 24718 }, { "epoch": 11.492329149232916, "grad_norm": 1.8132872581481934, "learning_rate": 3.564448228912686e-05, "loss": 0.0221, "step": 24720 }, { "epoch": 11.493258949325895, "grad_norm": 0.7397697567939758, "learning_rate": 3.557336492713275e-05, "loss": 0.0277, "step": 24722 }, { "epoch": 11.494188749418875, "grad_norm": 1.2889957427978516, "learning_rate": 3.5502143210295293e-05, "loss": 0.0275, "step": 24724 }, { "epoch": 11.495118549511854, "grad_norm": 1.9630495309829712, "learning_rate": 3.5430817841544234e-05, "loss": 0.0261, "step": 24726 }, { "epoch": 11.496048349604836, "grad_norm": 1.8590061664581299, "learning_rate": 3.535938952483217e-05, "loss": 0.0352, "step": 24728 }, { "epoch": 11.496978149697815, "grad_norm": 1.6969939470291138, "learning_rate": 3.528785896512774e-05, "loss": 0.033, "step": 24730 }, { "epoch": 11.497907949790795, "grad_norm": 2.5956978797912598, "learning_rate": 3.521622686840871e-05, "loss": 0.0453, "step": 24732 }, { "epoch": 11.498837749883775, "grad_norm": 1.0438790321350098, "learning_rate": 3.514449394165495e-05, "loss": 0.0202, "step": 24734 }, { "epoch": 11.499767549976754, "grad_norm": 1.552511215209961, "learning_rate": 3.5072660892841645e-05, "loss": 0.0345, "step": 24736 }, { "epoch": 11.500697350069736, "grad_norm": 1.1280192136764526, "learning_rate": 3.500072843093166e-05, "loss": 0.0305, "step": 24738 }, { "epoch": 11.501627150162715, "grad_norm": 0.26432496309280396, "learning_rate": 3.4928697265869515e-05, "loss": 0.0213, "step": 24740 }, { "epoch": 11.502556950255695, "grad_norm": 1.3327001333236694, "learning_rate": 3.48565681085739e-05, "loss": 0.0416, "step": 24742 }, { "epoch": 11.503486750348674, "grad_norm": 1.1307566165924072, "learning_rate": 3.4784341670930146e-05, "loss": 0.0181, "step": 24744 }, { "epoch": 11.504416550441656, "grad_norm": 0.7939684987068176, "learning_rate": 3.47120186657842e-05, "loss": 0.0191, "step": 24746 }, { "epoch": 11.505346350534635, "grad_norm": 1.2640140056610107, "learning_rate": 3.463959980693491e-05, "loss": 0.0482, "step": 24748 }, { "epoch": 11.506276150627615, "grad_norm": 1.6760472059249878, "learning_rate": 3.456708580912737e-05, "loss": 0.0169, "step": 24750 }, { "epoch": 11.507205950720595, "grad_norm": 1.5288649797439575, "learning_rate": 3.4494477388045116e-05, "loss": 0.0206, "step": 24752 }, { "epoch": 11.508135750813576, "grad_norm": 0.6209035515785217, "learning_rate": 3.4421775260304115e-05, "loss": 0.0128, "step": 24754 }, { "epoch": 11.509065550906556, "grad_norm": 1.2964988946914673, "learning_rate": 3.434898014344519e-05, "loss": 0.0213, "step": 24756 }, { "epoch": 11.509995350999535, "grad_norm": 2.384253978729248, "learning_rate": 3.427609275592625e-05, "loss": 0.0453, "step": 24758 }, { "epoch": 11.510925151092515, "grad_norm": 0.4640564024448395, "learning_rate": 3.420311381711689e-05, "loss": 0.0202, "step": 24760 }, { "epoch": 11.511854951185494, "grad_norm": 0.7527492642402649, "learning_rate": 3.41300440472896e-05, "loss": 0.0336, "step": 24762 }, { "epoch": 11.512784751278476, "grad_norm": 2.0135061740875244, "learning_rate": 3.405688416761367e-05, "loss": 0.0296, "step": 24764 }, { "epoch": 11.513714551371455, "grad_norm": 1.7740931510925293, "learning_rate": 3.398363490014726e-05, "loss": 0.0309, "step": 24766 }, { "epoch": 11.514644351464435, "grad_norm": 1.0405489206314087, "learning_rate": 3.3910296967831225e-05, "loss": 0.0237, "step": 24768 }, { "epoch": 11.515574151557415, "grad_norm": 1.7841928005218506, "learning_rate": 3.3836871094481506e-05, "loss": 0.0234, "step": 24770 }, { "epoch": 11.516503951650396, "grad_norm": 1.6111376285552979, "learning_rate": 3.376335800478152e-05, "loss": 0.0367, "step": 24772 }, { "epoch": 11.517433751743376, "grad_norm": 0.8426902890205383, "learning_rate": 3.368975842427594e-05, "loss": 0.0308, "step": 24774 }, { "epoch": 11.518363551836355, "grad_norm": 0.9091235399246216, "learning_rate": 3.361607307936307e-05, "loss": 0.0206, "step": 24776 }, { "epoch": 11.519293351929335, "grad_norm": 1.649924397468567, "learning_rate": 3.354230269728719e-05, "loss": 0.0285, "step": 24778 }, { "epoch": 11.520223152022314, "grad_norm": 1.0745145082473755, "learning_rate": 3.346844800613235e-05, "loss": 0.0201, "step": 24780 }, { "epoch": 11.521152952115296, "grad_norm": 2.3793601989746094, "learning_rate": 3.3394509734814544e-05, "loss": 0.0452, "step": 24782 }, { "epoch": 11.522082752208275, "grad_norm": 0.9155712723731995, "learning_rate": 3.3320488613074825e-05, "loss": 0.024, "step": 24784 }, { "epoch": 11.523012552301255, "grad_norm": 0.7679381370544434, "learning_rate": 3.324638537147127e-05, "loss": 0.0347, "step": 24786 }, { "epoch": 11.523942352394235, "grad_norm": 0.27256330847740173, "learning_rate": 3.3172200741373475e-05, "loss": 0.0141, "step": 24788 }, { "epoch": 11.524872152487216, "grad_norm": 0.32897183299064636, "learning_rate": 3.309793545495378e-05, "loss": 0.0127, "step": 24790 }, { "epoch": 11.525801952580196, "grad_norm": 0.27334830164909363, "learning_rate": 3.302359024518024e-05, "loss": 0.0103, "step": 24792 }, { "epoch": 11.526731752673175, "grad_norm": 0.7580292224884033, "learning_rate": 3.2949165845810235e-05, "loss": 0.0155, "step": 24794 }, { "epoch": 11.527661552766155, "grad_norm": 0.5749179720878601, "learning_rate": 3.287466299138255e-05, "loss": 0.0417, "step": 24796 }, { "epoch": 11.528591352859134, "grad_norm": 1.717756748199463, "learning_rate": 3.280008241721043e-05, "loss": 0.026, "step": 24798 }, { "epoch": 11.529521152952116, "grad_norm": 0.6106041669845581, "learning_rate": 3.2725424859373705e-05, "loss": 0.0075, "step": 24800 }, { "epoch": 11.530450953045095, "grad_norm": 1.3466992378234863, "learning_rate": 3.2650691054712505e-05, "loss": 0.0446, "step": 24802 }, { "epoch": 11.531380753138075, "grad_norm": 0.789543628692627, "learning_rate": 3.257588174081946e-05, "loss": 0.0275, "step": 24804 }, { "epoch": 11.532310553231055, "grad_norm": 1.6940633058547974, "learning_rate": 3.250099765603198e-05, "loss": 0.0422, "step": 24806 }, { "epoch": 11.533240353324036, "grad_norm": 2.2186977863311768, "learning_rate": 3.242603953942591e-05, "loss": 0.0266, "step": 24808 }, { "epoch": 11.534170153417016, "grad_norm": 1.3125720024108887, "learning_rate": 3.235100813080777e-05, "loss": 0.0338, "step": 24810 }, { "epoch": 11.535099953509995, "grad_norm": 1.3752959966659546, "learning_rate": 3.227590417070676e-05, "loss": 0.0227, "step": 24812 }, { "epoch": 11.536029753602975, "grad_norm": 1.5764449834823608, "learning_rate": 3.220072840036916e-05, "loss": 0.0167, "step": 24814 }, { "epoch": 11.536959553695956, "grad_norm": 0.8396008014678955, "learning_rate": 3.212548156174929e-05, "loss": 0.0116, "step": 24816 }, { "epoch": 11.537889353788936, "grad_norm": 0.7393609881401062, "learning_rate": 3.205016439750325e-05, "loss": 0.0178, "step": 24818 }, { "epoch": 11.538819153881915, "grad_norm": 0.8222523331642151, "learning_rate": 3.1974777650980715e-05, "loss": 0.0184, "step": 24820 }, { "epoch": 11.539748953974895, "grad_norm": 0.5934180021286011, "learning_rate": 3.189932206621859e-05, "loss": 0.0168, "step": 24822 }, { "epoch": 11.540678754067875, "grad_norm": 2.782892942428589, "learning_rate": 3.182379838793321e-05, "loss": 0.0274, "step": 24824 }, { "epoch": 11.541608554160856, "grad_norm": 0.5688310265541077, "learning_rate": 3.174820736151245e-05, "loss": 0.0115, "step": 24826 }, { "epoch": 11.542538354253836, "grad_norm": 0.4201119542121887, "learning_rate": 3.167254973300939e-05, "loss": 0.0185, "step": 24828 }, { "epoch": 11.543468154346815, "grad_norm": 1.550162672996521, "learning_rate": 3.159682624913447e-05, "loss": 0.0429, "step": 24830 }, { "epoch": 11.544397954439795, "grad_norm": 0.9277052283287048, "learning_rate": 3.1521037657247524e-05, "loss": 0.0126, "step": 24832 }, { "epoch": 11.545327754532776, "grad_norm": 0.33128559589385986, "learning_rate": 3.144518470535155e-05, "loss": 0.027, "step": 24834 }, { "epoch": 11.546257554625756, "grad_norm": 0.5084060430526733, "learning_rate": 3.1369268142084577e-05, "loss": 0.0165, "step": 24836 }, { "epoch": 11.547187354718735, "grad_norm": 2.600433111190796, "learning_rate": 3.129328871671258e-05, "loss": 0.044, "step": 24838 }, { "epoch": 11.548117154811715, "grad_norm": 1.9908283948898315, "learning_rate": 3.121724717912131e-05, "loss": 0.0545, "step": 24840 }, { "epoch": 11.549046954904696, "grad_norm": 0.9107999801635742, "learning_rate": 3.114114427981056e-05, "loss": 0.0487, "step": 24842 }, { "epoch": 11.549976754997676, "grad_norm": 1.6166695356369019, "learning_rate": 3.106498076988522e-05, "loss": 0.0257, "step": 24844 }, { "epoch": 11.550906555090656, "grad_norm": 1.8793785572052002, "learning_rate": 3.098875740104804e-05, "loss": 0.035, "step": 24846 }, { "epoch": 11.551836355183635, "grad_norm": 0.6415586471557617, "learning_rate": 3.091247492559308e-05, "loss": 0.0174, "step": 24848 }, { "epoch": 11.552766155276615, "grad_norm": 1.2135777473449707, "learning_rate": 3.083613409639756e-05, "loss": 0.0153, "step": 24850 }, { "epoch": 11.553695955369596, "grad_norm": 1.020477056503296, "learning_rate": 3.0759735666914825e-05, "loss": 0.0207, "step": 24852 }, { "epoch": 11.554625755462576, "grad_norm": 0.5022299885749817, "learning_rate": 3.068328039116618e-05, "loss": 0.0137, "step": 24854 }, { "epoch": 11.555555555555555, "grad_norm": 0.8864622712135315, "learning_rate": 3.060676902373451e-05, "loss": 0.0114, "step": 24856 }, { "epoch": 11.556485355648535, "grad_norm": 1.6208609342575073, "learning_rate": 3.053020231975629e-05, "loss": 0.0312, "step": 24858 }, { "epoch": 11.557415155741516, "grad_norm": 0.6185021996498108, "learning_rate": 3.0453581034913638e-05, "loss": 0.0187, "step": 24860 }, { "epoch": 11.558344955834496, "grad_norm": 2.2487568855285645, "learning_rate": 3.037690592542787e-05, "loss": 0.0294, "step": 24862 }, { "epoch": 11.559274755927476, "grad_norm": 1.1057523488998413, "learning_rate": 3.0300177748051535e-05, "loss": 0.0193, "step": 24864 }, { "epoch": 11.560204556020455, "grad_norm": 0.45843809843063354, "learning_rate": 3.0223397260060424e-05, "loss": 0.0127, "step": 24866 }, { "epoch": 11.561134356113435, "grad_norm": 1.3814775943756104, "learning_rate": 3.0146565219246955e-05, "loss": 0.0203, "step": 24868 }, { "epoch": 11.562064156206416, "grad_norm": 1.1407220363616943, "learning_rate": 3.006968238391269e-05, "loss": 0.0304, "step": 24870 }, { "epoch": 11.562993956299396, "grad_norm": 1.3816437721252441, "learning_rate": 2.9992749512860193e-05, "loss": 0.0264, "step": 24872 }, { "epoch": 11.563923756392375, "grad_norm": 1.4133589267730713, "learning_rate": 2.9915767365385482e-05, "loss": 0.0302, "step": 24874 }, { "epoch": 11.564853556485355, "grad_norm": 1.2205321788787842, "learning_rate": 2.983873670127145e-05, "loss": 0.0228, "step": 24876 }, { "epoch": 11.565783356578336, "grad_norm": 1.4416804313659668, "learning_rate": 2.9761658280779826e-05, "loss": 0.0345, "step": 24878 }, { "epoch": 11.566713156671316, "grad_norm": 1.2839146852493286, "learning_rate": 2.968453286464316e-05, "loss": 0.03, "step": 24880 }, { "epoch": 11.567642956764296, "grad_norm": 2.442636728286743, "learning_rate": 2.9607361214058335e-05, "loss": 0.0633, "step": 24882 }, { "epoch": 11.568572756857275, "grad_norm": 0.4442027807235718, "learning_rate": 2.9530144090678398e-05, "loss": 0.0186, "step": 24884 }, { "epoch": 11.569502556950255, "grad_norm": 0.34902381896972656, "learning_rate": 2.9452882256605353e-05, "loss": 0.0201, "step": 24886 }, { "epoch": 11.570432357043236, "grad_norm": 0.8217077851295471, "learning_rate": 2.9375576474381962e-05, "loss": 0.0123, "step": 24888 }, { "epoch": 11.571362157136216, "grad_norm": 0.21117714047431946, "learning_rate": 2.9298227506985263e-05, "loss": 0.0184, "step": 24890 }, { "epoch": 11.572291957229195, "grad_norm": 0.6937960982322693, "learning_rate": 2.9220836117818503e-05, "loss": 0.0125, "step": 24892 }, { "epoch": 11.573221757322175, "grad_norm": 0.6501390933990479, "learning_rate": 2.9143403070702936e-05, "loss": 0.0301, "step": 24894 }, { "epoch": 11.574151557415156, "grad_norm": 0.7674260139465332, "learning_rate": 2.9065929129871996e-05, "loss": 0.0431, "step": 24896 }, { "epoch": 11.575081357508136, "grad_norm": 1.0145479440689087, "learning_rate": 2.89884150599622e-05, "loss": 0.0213, "step": 24898 }, { "epoch": 11.576011157601116, "grad_norm": 1.51716947555542, "learning_rate": 2.891086162600578e-05, "loss": 0.0245, "step": 24900 }, { "epoch": 11.576940957694095, "grad_norm": 0.5388848781585693, "learning_rate": 2.883326959342398e-05, "loss": 0.0113, "step": 24902 }, { "epoch": 11.577870757787077, "grad_norm": 1.0512393712997437, "learning_rate": 2.875563972801885e-05, "loss": 0.025, "step": 24904 }, { "epoch": 11.578800557880056, "grad_norm": 1.732602596282959, "learning_rate": 2.8677972795965997e-05, "loss": 0.0339, "step": 24906 }, { "epoch": 11.579730357973036, "grad_norm": 0.786264181137085, "learning_rate": 2.8600269563806326e-05, "loss": 0.0154, "step": 24908 }, { "epoch": 11.580660158066015, "grad_norm": 1.6494271755218506, "learning_rate": 2.8522530798439543e-05, "loss": 0.0272, "step": 24910 }, { "epoch": 11.581589958158997, "grad_norm": 1.0130702257156372, "learning_rate": 2.8444757267116063e-05, "loss": 0.0181, "step": 24912 }, { "epoch": 11.582519758251976, "grad_norm": 1.1815909147262573, "learning_rate": 2.836694973742889e-05, "loss": 0.0322, "step": 24914 }, { "epoch": 11.583449558344956, "grad_norm": 2.4333860874176025, "learning_rate": 2.8289108977307094e-05, "loss": 0.0405, "step": 24916 }, { "epoch": 11.584379358437936, "grad_norm": 1.1448289155960083, "learning_rate": 2.821123575500774e-05, "loss": 0.0216, "step": 24918 }, { "epoch": 11.585309158530915, "grad_norm": 0.6331955790519714, "learning_rate": 2.8133330839107734e-05, "loss": 0.0101, "step": 24920 }, { "epoch": 11.586238958623897, "grad_norm": 1.1643009185791016, "learning_rate": 2.8055394998497152e-05, "loss": 0.0145, "step": 24922 }, { "epoch": 11.587168758716876, "grad_norm": 1.4125090837478638, "learning_rate": 2.7977429002371618e-05, "loss": 0.0359, "step": 24924 }, { "epoch": 11.588098558809856, "grad_norm": 0.9072668552398682, "learning_rate": 2.7899433620224046e-05, "loss": 0.0131, "step": 24926 }, { "epoch": 11.589028358902835, "grad_norm": 0.7372483611106873, "learning_rate": 2.782140962183702e-05, "loss": 0.0119, "step": 24928 }, { "epoch": 11.589958158995817, "grad_norm": 0.47570937871932983, "learning_rate": 2.7743357777276066e-05, "loss": 0.0197, "step": 24930 }, { "epoch": 11.590887959088796, "grad_norm": 0.9012104868888855, "learning_rate": 2.766527885688157e-05, "loss": 0.0122, "step": 24932 }, { "epoch": 11.591817759181776, "grad_norm": 0.29948657751083374, "learning_rate": 2.7587173631260596e-05, "loss": 0.0095, "step": 24934 }, { "epoch": 11.592747559274756, "grad_norm": 0.4082028865814209, "learning_rate": 2.7509042871280362e-05, "loss": 0.0076, "step": 24936 }, { "epoch": 11.593677359367735, "grad_norm": 2.221015214920044, "learning_rate": 2.743088734805993e-05, "loss": 0.0333, "step": 24938 }, { "epoch": 11.594607159460717, "grad_norm": 0.5443076491355896, "learning_rate": 2.7352707832962943e-05, "loss": 0.0061, "step": 24940 }, { "epoch": 11.595536959553696, "grad_norm": 1.7280327081680298, "learning_rate": 2.7274505097589293e-05, "loss": 0.0196, "step": 24942 }, { "epoch": 11.596466759646676, "grad_norm": 1.440984845161438, "learning_rate": 2.7196279913768588e-05, "loss": 0.0205, "step": 24944 }, { "epoch": 11.597396559739655, "grad_norm": 0.8505858182907104, "learning_rate": 2.7118033053551988e-05, "loss": 0.0472, "step": 24946 }, { "epoch": 11.598326359832637, "grad_norm": 1.5749493837356567, "learning_rate": 2.703976528920405e-05, "loss": 0.0188, "step": 24948 }, { "epoch": 11.599256159925616, "grad_norm": 0.7475440502166748, "learning_rate": 2.6961477393196024e-05, "loss": 0.0108, "step": 24950 }, { "epoch": 11.600185960018596, "grad_norm": 1.0144948959350586, "learning_rate": 2.6883170138198353e-05, "loss": 0.0159, "step": 24952 }, { "epoch": 11.601115760111576, "grad_norm": 0.3336476683616638, "learning_rate": 2.680484429707151e-05, "loss": 0.0151, "step": 24954 }, { "epoch": 11.602045560204555, "grad_norm": 0.6020286679267883, "learning_rate": 2.67265006428601e-05, "loss": 0.0061, "step": 24956 }, { "epoch": 11.602975360297537, "grad_norm": 0.9140907526016235, "learning_rate": 2.6648139948784366e-05, "loss": 0.0205, "step": 24958 }, { "epoch": 11.603905160390516, "grad_norm": 1.3599181175231934, "learning_rate": 2.6569762988232893e-05, "loss": 0.0138, "step": 24960 }, { "epoch": 11.604834960483496, "grad_norm": 0.5437216758728027, "learning_rate": 2.6491370534754285e-05, "loss": 0.0246, "step": 24962 }, { "epoch": 11.605764760576475, "grad_norm": 1.1034678220748901, "learning_rate": 2.641296336205059e-05, "loss": 0.0187, "step": 24964 }, { "epoch": 11.606694560669457, "grad_norm": 0.7059247493743896, "learning_rate": 2.6334542243969126e-05, "loss": 0.0159, "step": 24966 }, { "epoch": 11.607624360762436, "grad_norm": 0.8410675525665283, "learning_rate": 2.6256107954494313e-05, "loss": 0.0222, "step": 24968 }, { "epoch": 11.608554160855416, "grad_norm": 0.27001067996025085, "learning_rate": 2.6177661267741102e-05, "loss": 0.0122, "step": 24970 }, { "epoch": 11.609483960948396, "grad_norm": 0.5391554236412048, "learning_rate": 2.609920295794662e-05, "loss": 0.0079, "step": 24972 }, { "epoch": 11.610413761041377, "grad_norm": 0.8327795267105103, "learning_rate": 2.6020733799462886e-05, "loss": 0.0195, "step": 24974 }, { "epoch": 11.611343561134357, "grad_norm": 0.8288879990577698, "learning_rate": 2.594225456674828e-05, "loss": 0.0245, "step": 24976 }, { "epoch": 11.612273361227336, "grad_norm": 1.3852981328964233, "learning_rate": 2.5863766034361682e-05, "loss": 0.0125, "step": 24978 }, { "epoch": 11.613203161320316, "grad_norm": 0.9385446906089783, "learning_rate": 2.5785268976953223e-05, "loss": 0.0113, "step": 24980 }, { "epoch": 11.614132961413295, "grad_norm": 0.5231744647026062, "learning_rate": 2.5706764169256804e-05, "loss": 0.0142, "step": 24982 }, { "epoch": 11.615062761506277, "grad_norm": 0.703140914440155, "learning_rate": 2.5628252386083376e-05, "loss": 0.0153, "step": 24984 }, { "epoch": 11.615992561599256, "grad_norm": 1.463972568511963, "learning_rate": 2.5549734402312698e-05, "loss": 0.0224, "step": 24986 }, { "epoch": 11.616922361692236, "grad_norm": 1.6541870832443237, "learning_rate": 2.5471210992885236e-05, "loss": 0.0369, "step": 24988 }, { "epoch": 11.617852161785216, "grad_norm": 0.3997476100921631, "learning_rate": 2.539268293279551e-05, "loss": 0.0127, "step": 24990 }, { "epoch": 11.618781961878197, "grad_norm": 1.1262471675872803, "learning_rate": 2.5314150997083772e-05, "loss": 0.0171, "step": 24992 }, { "epoch": 11.619711761971177, "grad_norm": 0.6777242422103882, "learning_rate": 2.5235615960828697e-05, "loss": 0.0076, "step": 24994 }, { "epoch": 11.620641562064156, "grad_norm": 0.39773431420326233, "learning_rate": 2.5157078599139035e-05, "loss": 0.0225, "step": 24996 }, { "epoch": 11.621571362157136, "grad_norm": 0.9040188789367676, "learning_rate": 2.507853968714701e-05, "loss": 0.0173, "step": 24998 }, { "epoch": 11.622501162250117, "grad_norm": 0.8538395762443542, "learning_rate": 2.500000000000016e-05, "loss": 0.0143, "step": 25000 }, { "epoch": 11.622501162250117, "eval_cer": 0.12468631341067651, "eval_loss": 0.21214817464351654, "eval_runtime": 398.1957, "eval_samples_per_second": 31.879, "eval_steps_per_second": 0.997, "step": 25000 }, { "epoch": 11.623430962343097, "grad_norm": 1.327168345451355, "learning_rate": 2.4921460312853133e-05, "loss": 0.0125, "step": 25002 }, { "epoch": 11.624360762436076, "grad_norm": 1.1885584592819214, "learning_rate": 2.484292140086093e-05, "loss": 0.024, "step": 25004 }, { "epoch": 11.625290562529056, "grad_norm": 0.6799206137657166, "learning_rate": 2.4764384039171444e-05, "loss": 0.0174, "step": 25006 }, { "epoch": 11.626220362622036, "grad_norm": 0.5486528277397156, "learning_rate": 2.468584900291619e-05, "loss": 0.0195, "step": 25008 }, { "epoch": 11.627150162715017, "grad_norm": 0.19065681099891663, "learning_rate": 2.4607317067204445e-05, "loss": 0.0045, "step": 25010 }, { "epoch": 11.628079962807996, "grad_norm": 1.6170774698257446, "learning_rate": 2.4528789007114716e-05, "loss": 0.0178, "step": 25012 }, { "epoch": 11.629009762900976, "grad_norm": 1.5405867099761963, "learning_rate": 2.4450265597687433e-05, "loss": 0.0312, "step": 25014 }, { "epoch": 11.629939562993956, "grad_norm": 1.1867148876190186, "learning_rate": 2.4371747613916586e-05, "loss": 0.0119, "step": 25016 }, { "epoch": 11.630869363086937, "grad_norm": 0.2088034749031067, "learning_rate": 2.429323583074315e-05, "loss": 0.0112, "step": 25018 }, { "epoch": 11.631799163179917, "grad_norm": 0.9503631591796875, "learning_rate": 2.421473102304691e-05, "loss": 0.0151, "step": 25020 }, { "epoch": 11.632728963272896, "grad_norm": 1.4645779132843018, "learning_rate": 2.4136233965638273e-05, "loss": 0.0156, "step": 25022 }, { "epoch": 11.633658763365876, "grad_norm": 1.567093849182129, "learning_rate": 2.405774543325168e-05, "loss": 0.0203, "step": 25024 }, { "epoch": 11.634588563458856, "grad_norm": 1.9554264545440674, "learning_rate": 2.397926620053725e-05, "loss": 0.0243, "step": 25026 }, { "epoch": 11.635518363551837, "grad_norm": 0.8024473190307617, "learning_rate": 2.390079704205351e-05, "loss": 0.0183, "step": 25028 }, { "epoch": 11.636448163644816, "grad_norm": 1.3302290439605713, "learning_rate": 2.382233873225903e-05, "loss": 0.0332, "step": 25030 }, { "epoch": 11.637377963737796, "grad_norm": 1.3902509212493896, "learning_rate": 2.374389204550564e-05, "loss": 0.0265, "step": 25032 }, { "epoch": 11.638307763830776, "grad_norm": 0.45362356305122375, "learning_rate": 2.3665457756031005e-05, "loss": 0.0234, "step": 25034 }, { "epoch": 11.639237563923757, "grad_norm": 2.303905963897705, "learning_rate": 2.3587036637949364e-05, "loss": 0.0214, "step": 25036 }, { "epoch": 11.640167364016737, "grad_norm": 0.9293214082717896, "learning_rate": 2.350862946524567e-05, "loss": 0.0412, "step": 25038 }, { "epoch": 11.641097164109716, "grad_norm": 0.7280562520027161, "learning_rate": 2.343023701176724e-05, "loss": 0.0152, "step": 25040 }, { "epoch": 11.642026964202696, "grad_norm": 1.1898226737976074, "learning_rate": 2.335186005121559e-05, "loss": 0.0089, "step": 25042 }, { "epoch": 11.642956764295675, "grad_norm": 0.30007973313331604, "learning_rate": 2.327349935713985e-05, "loss": 0.0123, "step": 25044 }, { "epoch": 11.643886564388657, "grad_norm": 1.5212249755859375, "learning_rate": 2.319515570292844e-05, "loss": 0.0191, "step": 25046 }, { "epoch": 11.644816364481636, "grad_norm": 1.9605077505111694, "learning_rate": 2.3116829861801774e-05, "loss": 0.0303, "step": 25048 }, { "epoch": 11.645746164574616, "grad_norm": 2.059187412261963, "learning_rate": 2.303852260680393e-05, "loss": 0.0595, "step": 25050 }, { "epoch": 11.646675964667596, "grad_norm": 1.4288870096206665, "learning_rate": 2.2960234710796074e-05, "loss": 0.0243, "step": 25052 }, { "epoch": 11.647605764760577, "grad_norm": 1.1954418420791626, "learning_rate": 2.2881966946448316e-05, "loss": 0.0191, "step": 25054 }, { "epoch": 11.648535564853557, "grad_norm": 1.7388811111450195, "learning_rate": 2.2803720086231533e-05, "loss": 0.0399, "step": 25056 }, { "epoch": 11.649465364946536, "grad_norm": 0.7743150591850281, "learning_rate": 2.272549490241082e-05, "loss": 0.0237, "step": 25058 }, { "epoch": 11.650395165039516, "grad_norm": 0.42911791801452637, "learning_rate": 2.2647292167036995e-05, "loss": 0.0058, "step": 25060 }, { "epoch": 11.651324965132497, "grad_norm": 1.3817393779754639, "learning_rate": 2.256911265194001e-05, "loss": 0.0116, "step": 25062 }, { "epoch": 11.652254765225477, "grad_norm": 0.3717899024486542, "learning_rate": 2.2490957128719583e-05, "loss": 0.0469, "step": 25064 }, { "epoch": 11.653184565318456, "grad_norm": 0.7282418012619019, "learning_rate": 2.241282636873935e-05, "loss": 0.0123, "step": 25066 }, { "epoch": 11.654114365411436, "grad_norm": 1.0831254720687866, "learning_rate": 2.233472114311855e-05, "loss": 0.0107, "step": 25068 }, { "epoch": 11.655044165504417, "grad_norm": 0.3537233769893646, "learning_rate": 2.2256642222723876e-05, "loss": 0.0134, "step": 25070 }, { "epoch": 11.655973965597397, "grad_norm": 0.18623404204845428, "learning_rate": 2.217859037816293e-05, "loss": 0.0055, "step": 25072 }, { "epoch": 11.656903765690377, "grad_norm": 0.9102892279624939, "learning_rate": 2.2100566379776075e-05, "loss": 0.0097, "step": 25074 }, { "epoch": 11.657833565783356, "grad_norm": 1.6346763372421265, "learning_rate": 2.2022570997628324e-05, "loss": 0.0352, "step": 25076 }, { "epoch": 11.658763365876336, "grad_norm": 1.1407191753387451, "learning_rate": 2.1944605001502793e-05, "loss": 0.0172, "step": 25078 }, { "epoch": 11.659693165969317, "grad_norm": 1.5463718175888062, "learning_rate": 2.186666916089238e-05, "loss": 0.0306, "step": 25080 }, { "epoch": 11.660622966062297, "grad_norm": 0.8464809060096741, "learning_rate": 2.1788764244992556e-05, "loss": 0.0102, "step": 25082 }, { "epoch": 11.661552766155276, "grad_norm": 1.6268624067306519, "learning_rate": 2.1710891022693023e-05, "loss": 0.0295, "step": 25084 }, { "epoch": 11.662482566248256, "grad_norm": 0.6333129405975342, "learning_rate": 2.1633050262571053e-05, "loss": 0.0233, "step": 25086 }, { "epoch": 11.663412366341237, "grad_norm": 0.7972413301467896, "learning_rate": 2.155524273288405e-05, "loss": 0.0251, "step": 25088 }, { "epoch": 11.664342166434217, "grad_norm": 1.6171867847442627, "learning_rate": 2.1477469201560398e-05, "loss": 0.0178, "step": 25090 }, { "epoch": 11.665271966527197, "grad_norm": 1.242133617401123, "learning_rate": 2.139973043619362e-05, "loss": 0.0316, "step": 25092 }, { "epoch": 11.666201766620176, "grad_norm": 0.9491094350814819, "learning_rate": 2.1322027204034127e-05, "loss": 0.0184, "step": 25094 }, { "epoch": 11.667131566713156, "grad_norm": 0.5275582671165466, "learning_rate": 2.1244360271981097e-05, "loss": 0.0055, "step": 25096 }, { "epoch": 11.668061366806137, "grad_norm": 0.6903567910194397, "learning_rate": 2.1166730406575968e-05, "loss": 0.0222, "step": 25098 }, { "epoch": 11.668991166899117, "grad_norm": 1.296154499053955, "learning_rate": 2.1089138373994176e-05, "loss": 0.0209, "step": 25100 }, { "epoch": 11.669920966992096, "grad_norm": 0.46561068296432495, "learning_rate": 2.1011584940037924e-05, "loss": 0.0071, "step": 25102 }, { "epoch": 11.670850767085076, "grad_norm": 1.3709547519683838, "learning_rate": 2.0934070870127952e-05, "loss": 0.0273, "step": 25104 }, { "epoch": 11.671780567178057, "grad_norm": 0.5792039036750793, "learning_rate": 2.085659692929701e-05, "loss": 0.0044, "step": 25106 }, { "epoch": 11.672710367271037, "grad_norm": 0.6501759886741638, "learning_rate": 2.077916388218179e-05, "loss": 0.007, "step": 25108 }, { "epoch": 11.673640167364017, "grad_norm": 1.5073497295379639, "learning_rate": 2.0701772493014862e-05, "loss": 0.0318, "step": 25110 }, { "epoch": 11.674569967456996, "grad_norm": 0.5432714819908142, "learning_rate": 2.0624423525618162e-05, "loss": 0.0087, "step": 25112 }, { "epoch": 11.675499767549976, "grad_norm": 0.5134459733963013, "learning_rate": 2.0547117743394595e-05, "loss": 0.0113, "step": 25114 }, { "epoch": 11.676429567642957, "grad_norm": 0.16828812658786774, "learning_rate": 2.046985590932155e-05, "loss": 0.0089, "step": 25116 }, { "epoch": 11.677359367735937, "grad_norm": 1.9218897819519043, "learning_rate": 2.0392638785941613e-05, "loss": 0.0201, "step": 25118 }, { "epoch": 11.678289167828916, "grad_norm": 1.749471664428711, "learning_rate": 2.0315467135356795e-05, "loss": 0.0194, "step": 25120 }, { "epoch": 11.679218967921896, "grad_norm": 0.535064160823822, "learning_rate": 2.02383417192203e-05, "loss": 0.0065, "step": 25122 }, { "epoch": 11.680148768014877, "grad_norm": 0.4826473295688629, "learning_rate": 2.0161263298728505e-05, "loss": 0.0146, "step": 25124 }, { "epoch": 11.681078568107857, "grad_norm": 0.34289175271987915, "learning_rate": 2.0084232634614473e-05, "loss": 0.0122, "step": 25126 }, { "epoch": 11.682008368200837, "grad_norm": 0.943287193775177, "learning_rate": 2.0007250487139934e-05, "loss": 0.0119, "step": 25128 }, { "epoch": 11.682938168293816, "grad_norm": 0.8766801357269287, "learning_rate": 1.9930317616087257e-05, "loss": 0.0088, "step": 25130 }, { "epoch": 11.683867968386798, "grad_norm": 1.1971189975738525, "learning_rate": 1.9853434780752997e-05, "loss": 0.024, "step": 25132 }, { "epoch": 11.684797768479777, "grad_norm": 0.4598265290260315, "learning_rate": 1.97766027399397e-05, "loss": 0.0052, "step": 25134 }, { "epoch": 11.685727568572757, "grad_norm": 0.6610309481620789, "learning_rate": 1.969982225194876e-05, "loss": 0.008, "step": 25136 }, { "epoch": 11.686657368665736, "grad_norm": 0.05910267308354378, "learning_rate": 1.9623094074572254e-05, "loss": 0.0038, "step": 25138 }, { "epoch": 11.687587168758716, "grad_norm": 0.44112053513526917, "learning_rate": 1.9546418965086486e-05, "loss": 0.0172, "step": 25140 }, { "epoch": 11.688516968851697, "grad_norm": 0.68131422996521, "learning_rate": 1.946979768024383e-05, "loss": 0.008, "step": 25142 }, { "epoch": 11.689446768944677, "grad_norm": 0.7923974394798279, "learning_rate": 1.9393230976265443e-05, "loss": 0.0248, "step": 25144 }, { "epoch": 11.690376569037657, "grad_norm": 0.7431037425994873, "learning_rate": 1.9316719608833774e-05, "loss": 0.01, "step": 25146 }, { "epoch": 11.691306369130636, "grad_norm": 0.7571245431900024, "learning_rate": 1.924026433308513e-05, "loss": 0.0173, "step": 25148 }, { "epoch": 11.692236169223618, "grad_norm": 0.6101943850517273, "learning_rate": 1.9163865903602394e-05, "loss": 0.015, "step": 25150 }, { "epoch": 11.693165969316597, "grad_norm": 0.9713923335075378, "learning_rate": 1.9087525074406876e-05, "loss": 0.0175, "step": 25152 }, { "epoch": 11.694095769409577, "grad_norm": 0.15904128551483154, "learning_rate": 1.9011242598951908e-05, "loss": 0.0054, "step": 25154 }, { "epoch": 11.695025569502556, "grad_norm": 0.1996326595544815, "learning_rate": 1.8935019230114904e-05, "loss": 0.0044, "step": 25156 }, { "epoch": 11.695955369595538, "grad_norm": 0.27158889174461365, "learning_rate": 1.8858855720189398e-05, "loss": 0.01, "step": 25158 }, { "epoch": 11.696885169688517, "grad_norm": 1.1133044958114624, "learning_rate": 1.8782752820878648e-05, "loss": 0.0163, "step": 25160 }, { "epoch": 11.697814969781497, "grad_norm": 0.1240435317158699, "learning_rate": 1.870671128328772e-05, "loss": 0.0063, "step": 25162 }, { "epoch": 11.698744769874477, "grad_norm": 1.226615309715271, "learning_rate": 1.8630731857915555e-05, "loss": 0.0194, "step": 25164 }, { "epoch": 11.699674569967456, "grad_norm": 0.6038682460784912, "learning_rate": 1.8554815294648574e-05, "loss": 0.0092, "step": 25166 }, { "epoch": 11.700604370060438, "grad_norm": 0.33498692512512207, "learning_rate": 1.8478962342752434e-05, "loss": 0.0083, "step": 25168 }, { "epoch": 11.701534170153417, "grad_norm": 0.6143117547035217, "learning_rate": 1.8403173750865668e-05, "loss": 0.0201, "step": 25170 }, { "epoch": 11.702463970246397, "grad_norm": 0.994526743888855, "learning_rate": 1.832745026699056e-05, "loss": 0.0102, "step": 25172 }, { "epoch": 11.703393770339376, "grad_norm": 1.077033281326294, "learning_rate": 1.8251792638487505e-05, "loss": 0.0182, "step": 25174 }, { "epoch": 11.704323570432358, "grad_norm": 0.8604167103767395, "learning_rate": 1.8176201612066916e-05, "loss": 0.0122, "step": 25176 }, { "epoch": 11.705253370525337, "grad_norm": 0.714627742767334, "learning_rate": 1.8100677933781367e-05, "loss": 0.0135, "step": 25178 }, { "epoch": 11.706183170618317, "grad_norm": 1.0537084341049194, "learning_rate": 1.8025222349019243e-05, "loss": 0.014, "step": 25180 }, { "epoch": 11.707112970711297, "grad_norm": 0.17418278753757477, "learning_rate": 1.794983560249687e-05, "loss": 0.0237, "step": 25182 }, { "epoch": 11.708042770804276, "grad_norm": 0.18381428718566895, "learning_rate": 1.7874518438250665e-05, "loss": 0.0106, "step": 25184 }, { "epoch": 11.708972570897258, "grad_norm": 1.4998598098754883, "learning_rate": 1.7799271599630803e-05, "loss": 0.0118, "step": 25186 }, { "epoch": 11.709902370990237, "grad_norm": 0.6358667016029358, "learning_rate": 1.7724095829293196e-05, "loss": 0.0149, "step": 25188 }, { "epoch": 11.710832171083217, "grad_norm": 1.7902307510375977, "learning_rate": 1.7648991869192524e-05, "loss": 0.0159, "step": 25190 }, { "epoch": 11.711761971176196, "grad_norm": 0.34901729226112366, "learning_rate": 1.757396046057421e-05, "loss": 0.0063, "step": 25192 }, { "epoch": 11.712691771269178, "grad_norm": 0.7395041584968567, "learning_rate": 1.749900234396814e-05, "loss": 0.0201, "step": 25194 }, { "epoch": 11.713621571362157, "grad_norm": 0.21094252169132233, "learning_rate": 1.7424118259180658e-05, "loss": 0.0077, "step": 25196 }, { "epoch": 11.714551371455137, "grad_norm": 0.742061197757721, "learning_rate": 1.7349308945287453e-05, "loss": 0.0153, "step": 25198 }, { "epoch": 11.715481171548117, "grad_norm": 1.0917812585830688, "learning_rate": 1.727457514062625e-05, "loss": 0.0174, "step": 25200 }, { "epoch": 11.716410971641096, "grad_norm": 3.6533727645874023, "learning_rate": 1.7199917582789528e-05, "loss": 0.0301, "step": 25202 }, { "epoch": 11.717340771734078, "grad_norm": 0.5016140341758728, "learning_rate": 1.7125337008617413e-05, "loss": 0.0069, "step": 25204 }, { "epoch": 11.718270571827057, "grad_norm": 1.4664089679718018, "learning_rate": 1.7050834154189726e-05, "loss": 0.0331, "step": 25206 }, { "epoch": 11.719200371920037, "grad_norm": 0.15376253426074982, "learning_rate": 1.697640975481972e-05, "loss": 0.0049, "step": 25208 }, { "epoch": 11.720130172013016, "grad_norm": 1.6891320943832397, "learning_rate": 1.6902064545046352e-05, "loss": 0.0169, "step": 25210 }, { "epoch": 11.721059972105998, "grad_norm": 0.8558657169342041, "learning_rate": 1.6827799258626486e-05, "loss": 0.0088, "step": 25212 }, { "epoch": 11.721989772198977, "grad_norm": 0.5151406526565552, "learning_rate": 1.675361462852869e-05, "loss": 0.0082, "step": 25214 }, { "epoch": 11.722919572291957, "grad_norm": 0.17393872141838074, "learning_rate": 1.6679511386925472e-05, "loss": 0.015, "step": 25216 }, { "epoch": 11.723849372384937, "grad_norm": 0.1293196827173233, "learning_rate": 1.6605490265185583e-05, "loss": 0.0069, "step": 25218 }, { "epoch": 11.724779172477918, "grad_norm": 0.5244336724281311, "learning_rate": 1.653155199386778e-05, "loss": 0.0113, "step": 25220 }, { "epoch": 11.725708972570898, "grad_norm": 0.541942298412323, "learning_rate": 1.6457697302712945e-05, "loss": 0.0118, "step": 25222 }, { "epoch": 11.726638772663877, "grad_norm": 0.5311616659164429, "learning_rate": 1.6383926920637063e-05, "loss": 0.0075, "step": 25224 }, { "epoch": 11.727568572756857, "grad_norm": 0.6192681193351746, "learning_rate": 1.6310241575724026e-05, "loss": 0.0063, "step": 25226 }, { "epoch": 11.728498372849838, "grad_norm": 0.17532069981098175, "learning_rate": 1.6236641995218443e-05, "loss": 0.0219, "step": 25228 }, { "epoch": 11.729428172942818, "grad_norm": 1.2560758590698242, "learning_rate": 1.616312890551862e-05, "loss": 0.0153, "step": 25230 }, { "epoch": 11.730357973035797, "grad_norm": 1.3364099264144897, "learning_rate": 1.608970303216874e-05, "loss": 0.0184, "step": 25232 }, { "epoch": 11.731287773128777, "grad_norm": 1.3750640153884888, "learning_rate": 1.60163650998527e-05, "loss": 0.0218, "step": 25234 }, { "epoch": 11.732217573221757, "grad_norm": 0.7483469843864441, "learning_rate": 1.594311583238629e-05, "loss": 0.0109, "step": 25236 }, { "epoch": 11.733147373314738, "grad_norm": 2.829738140106201, "learning_rate": 1.5869955952710366e-05, "loss": 0.0236, "step": 25238 }, { "epoch": 11.734077173407718, "grad_norm": 0.6399567723274231, "learning_rate": 1.5796886182883076e-05, "loss": 0.0086, "step": 25240 }, { "epoch": 11.735006973500697, "grad_norm": 0.09961371123790741, "learning_rate": 1.5723907244073712e-05, "loss": 0.0128, "step": 25242 }, { "epoch": 11.735936773593677, "grad_norm": 0.7722499966621399, "learning_rate": 1.5651019856555107e-05, "loss": 0.0119, "step": 25244 }, { "epoch": 11.736866573686658, "grad_norm": 0.9882807731628418, "learning_rate": 1.5578224739696016e-05, "loss": 0.0177, "step": 25246 }, { "epoch": 11.737796373779638, "grad_norm": 0.23456883430480957, "learning_rate": 1.550552261195502e-05, "loss": 0.0036, "step": 25248 }, { "epoch": 11.738726173872617, "grad_norm": 0.7468592524528503, "learning_rate": 1.543291419087293e-05, "loss": 0.0071, "step": 25250 }, { "epoch": 11.739655973965597, "grad_norm": 0.27226150035858154, "learning_rate": 1.536040019306506e-05, "loss": 0.003, "step": 25252 }, { "epoch": 11.740585774058577, "grad_norm": 1.2143824100494385, "learning_rate": 1.528798133421578e-05, "loss": 0.0199, "step": 25254 }, { "epoch": 11.741515574151558, "grad_norm": 0.15566356480121613, "learning_rate": 1.5215658329069834e-05, "loss": 0.0085, "step": 25256 }, { "epoch": 11.742445374244538, "grad_norm": 0.15031450986862183, "learning_rate": 1.5143431891426245e-05, "loss": 0.0042, "step": 25258 }, { "epoch": 11.743375174337517, "grad_norm": 0.2373589724302292, "learning_rate": 1.507130273413047e-05, "loss": 0.0047, "step": 25260 }, { "epoch": 11.744304974430497, "grad_norm": 0.14314581453800201, "learning_rate": 1.4999271569068328e-05, "loss": 0.0054, "step": 25262 }, { "epoch": 11.745234774523478, "grad_norm": 1.3763316869735718, "learning_rate": 1.492733910715851e-05, "loss": 0.0149, "step": 25264 }, { "epoch": 11.746164574616458, "grad_norm": 0.6003434658050537, "learning_rate": 1.4855506058345039e-05, "loss": 0.0087, "step": 25266 }, { "epoch": 11.747094374709437, "grad_norm": 0.1819620132446289, "learning_rate": 1.4783773131591276e-05, "loss": 0.0121, "step": 25268 }, { "epoch": 11.748024174802417, "grad_norm": 0.9366594552993774, "learning_rate": 1.4712141034872409e-05, "loss": 0.0112, "step": 25270 }, { "epoch": 11.748953974895397, "grad_norm": 0.45307478308677673, "learning_rate": 1.464061047516799e-05, "loss": 0.0097, "step": 25272 }, { "epoch": 11.749883774988378, "grad_norm": 0.9305721521377563, "learning_rate": 1.4569182158455922e-05, "loss": 0.0067, "step": 25274 }, { "epoch": 11.750813575081358, "grad_norm": 0.9554603099822998, "learning_rate": 1.449785678970486e-05, "loss": 0.0111, "step": 25276 }, { "epoch": 11.751743375174337, "grad_norm": 0.7682231664657593, "learning_rate": 1.44266350728674e-05, "loss": 0.0219, "step": 25278 }, { "epoch": 11.752673175267317, "grad_norm": 0.37633779644966125, "learning_rate": 1.4355517710873133e-05, "loss": 0.0071, "step": 25280 }, { "epoch": 11.753602975360298, "grad_norm": 1.467320203781128, "learning_rate": 1.4284505405621705e-05, "loss": 0.0384, "step": 25282 }, { "epoch": 11.754532775453278, "grad_norm": 0.36132752895355225, "learning_rate": 1.4213598857976062e-05, "loss": 0.0094, "step": 25284 }, { "epoch": 11.755462575546257, "grad_norm": 0.620934247970581, "learning_rate": 1.4142798767754889e-05, "loss": 0.0084, "step": 25286 }, { "epoch": 11.756392375639237, "grad_norm": 0.9812132716178894, "learning_rate": 1.4072105833726645e-05, "loss": 0.0076, "step": 25288 }, { "epoch": 11.757322175732218, "grad_norm": 1.661712884902954, "learning_rate": 1.4001520753602053e-05, "loss": 0.0152, "step": 25290 }, { "epoch": 11.758251975825198, "grad_norm": 0.3268493413925171, "learning_rate": 1.3931044224027517e-05, "loss": 0.0041, "step": 25292 }, { "epoch": 11.759181775918178, "grad_norm": 0.3697284460067749, "learning_rate": 1.3860676940577613e-05, "loss": 0.0051, "step": 25294 }, { "epoch": 11.760111576011157, "grad_norm": 0.17269235849380493, "learning_rate": 1.3790419597749175e-05, "loss": 0.005, "step": 25296 }, { "epoch": 11.761041376104137, "grad_norm": 0.26530757546424866, "learning_rate": 1.3720272888953982e-05, "loss": 0.0059, "step": 25298 }, { "epoch": 11.761971176197118, "grad_norm": 0.982964813709259, "learning_rate": 1.3650237506511403e-05, "loss": 0.0099, "step": 25300 }, { "epoch": 11.762900976290098, "grad_norm": 0.547020673751831, "learning_rate": 1.3580314141642548e-05, "loss": 0.0103, "step": 25302 }, { "epoch": 11.763830776383077, "grad_norm": 0.9025153517723083, "learning_rate": 1.3510503484462961e-05, "loss": 0.011, "step": 25304 }, { "epoch": 11.764760576476057, "grad_norm": 0.41100576519966125, "learning_rate": 1.344080622397517e-05, "loss": 0.021, "step": 25306 }, { "epoch": 11.765690376569038, "grad_norm": 0.35226893424987793, "learning_rate": 1.3371223048063473e-05, "loss": 0.0083, "step": 25308 }, { "epoch": 11.766620176662018, "grad_norm": 1.283718466758728, "learning_rate": 1.3301754643485563e-05, "loss": 0.0172, "step": 25310 }, { "epoch": 11.767549976754998, "grad_norm": 0.6139768958091736, "learning_rate": 1.32324016958667e-05, "loss": 0.0061, "step": 25312 }, { "epoch": 11.768479776847977, "grad_norm": 0.686498761177063, "learning_rate": 1.3163164889692179e-05, "loss": 0.0062, "step": 25314 }, { "epoch": 11.769409576940959, "grad_norm": 0.4671981930732727, "learning_rate": 1.3094044908301472e-05, "loss": 0.0057, "step": 25316 }, { "epoch": 11.770339377033938, "grad_norm": 0.7720194458961487, "learning_rate": 1.3025042433881045e-05, "loss": 0.0166, "step": 25318 }, { "epoch": 11.771269177126918, "grad_norm": 0.424027681350708, "learning_rate": 1.2956158147457152e-05, "loss": 0.0057, "step": 25320 }, { "epoch": 11.772198977219897, "grad_norm": 0.9707697033882141, "learning_rate": 1.2887392728890048e-05, "loss": 0.0118, "step": 25322 }, { "epoch": 11.773128777312877, "grad_norm": 0.9466176629066467, "learning_rate": 1.2818746856866656e-05, "loss": 0.0148, "step": 25324 }, { "epoch": 11.774058577405858, "grad_norm": 0.11431534588336945, "learning_rate": 1.275022120889417e-05, "loss": 0.006, "step": 25326 }, { "epoch": 11.774988377498838, "grad_norm": 1.267182469367981, "learning_rate": 1.2681816461292759e-05, "loss": 0.0119, "step": 25328 }, { "epoch": 11.775918177591818, "grad_norm": 0.9398954510688782, "learning_rate": 1.2613533289189829e-05, "loss": 0.0143, "step": 25330 }, { "epoch": 11.776847977684797, "grad_norm": 0.12412301450967789, "learning_rate": 1.2545372366512868e-05, "loss": 0.0033, "step": 25332 }, { "epoch": 11.777777777777779, "grad_norm": 0.8521366119384766, "learning_rate": 1.2477334365982204e-05, "loss": 0.0177, "step": 25334 }, { "epoch": 11.778707577870758, "grad_norm": 0.475376695394516, "learning_rate": 1.2409419959105898e-05, "loss": 0.0156, "step": 25336 }, { "epoch": 11.779637377963738, "grad_norm": 0.09358105063438416, "learning_rate": 1.2341629816171725e-05, "loss": 0.0129, "step": 25338 }, { "epoch": 11.780567178056717, "grad_norm": 0.5314068794250488, "learning_rate": 1.2273964606240721e-05, "loss": 0.0155, "step": 25340 }, { "epoch": 11.781496978149697, "grad_norm": 1.4489538669586182, "learning_rate": 1.2206424997141347e-05, "loss": 0.0192, "step": 25342 }, { "epoch": 11.782426778242678, "grad_norm": 0.17983408272266388, "learning_rate": 1.2139011655462272e-05, "loss": 0.0201, "step": 25344 }, { "epoch": 11.783356578335658, "grad_norm": 1.465120553970337, "learning_rate": 1.2071725246546124e-05, "loss": 0.0243, "step": 25346 }, { "epoch": 11.784286378428638, "grad_norm": 1.0751252174377441, "learning_rate": 1.2004566434482283e-05, "loss": 0.0349, "step": 25348 }, { "epoch": 11.785216178521617, "grad_norm": 0.28987571597099304, "learning_rate": 1.1937535882101266e-05, "loss": 0.0058, "step": 25350 }, { "epoch": 11.786145978614599, "grad_norm": 0.33586952090263367, "learning_rate": 1.1870634250967715e-05, "loss": 0.0144, "step": 25352 }, { "epoch": 11.787075778707578, "grad_norm": 1.046442985534668, "learning_rate": 1.1803862201373413e-05, "loss": 0.0181, "step": 25354 }, { "epoch": 11.788005578800558, "grad_norm": 0.9706133604049683, "learning_rate": 1.173722039233169e-05, "loss": 0.0141, "step": 25356 }, { "epoch": 11.788935378893537, "grad_norm": 0.5935108661651611, "learning_rate": 1.1670709481570443e-05, "loss": 0.0216, "step": 25358 }, { "epoch": 11.789865178986517, "grad_norm": 1.0717145204544067, "learning_rate": 1.160433012552521e-05, "loss": 0.011, "step": 25360 }, { "epoch": 11.790794979079498, "grad_norm": 1.2210568189620972, "learning_rate": 1.1538082979333437e-05, "loss": 0.0095, "step": 25362 }, { "epoch": 11.791724779172478, "grad_norm": 1.2326438426971436, "learning_rate": 1.147196869682801e-05, "loss": 0.0197, "step": 25364 }, { "epoch": 11.792654579265458, "grad_norm": 1.181718349456787, "learning_rate": 1.1405987930530213e-05, "loss": 0.0183, "step": 25366 }, { "epoch": 11.793584379358437, "grad_norm": 0.11366889625787735, "learning_rate": 1.1340141331643264e-05, "loss": 0.0101, "step": 25368 }, { "epoch": 11.794514179451419, "grad_norm": 1.0659312009811401, "learning_rate": 1.1274429550046665e-05, "loss": 0.0156, "step": 25370 }, { "epoch": 11.795443979544398, "grad_norm": 2.0406463146209717, "learning_rate": 1.1208853234289318e-05, "loss": 0.0157, "step": 25372 }, { "epoch": 11.796373779637378, "grad_norm": 0.9810275435447693, "learning_rate": 1.1143413031582689e-05, "loss": 0.0325, "step": 25374 }, { "epoch": 11.797303579730357, "grad_norm": 0.28565871715545654, "learning_rate": 1.1078109587795308e-05, "loss": 0.0058, "step": 25376 }, { "epoch": 11.798233379823339, "grad_norm": 1.111979365348816, "learning_rate": 1.1012943547445802e-05, "loss": 0.0182, "step": 25378 }, { "epoch": 11.799163179916318, "grad_norm": 0.7221468687057495, "learning_rate": 1.0947915553696825e-05, "loss": 0.0057, "step": 25380 }, { "epoch": 11.800092980009298, "grad_norm": 0.4572736918926239, "learning_rate": 1.088302624834813e-05, "loss": 0.013, "step": 25382 }, { "epoch": 11.801022780102278, "grad_norm": 0.07268176227807999, "learning_rate": 1.0818276271831112e-05, "loss": 0.0033, "step": 25384 }, { "epoch": 11.801952580195259, "grad_norm": 0.30574560165405273, "learning_rate": 1.0753666263202045e-05, "loss": 0.0061, "step": 25386 }, { "epoch": 11.802882380288239, "grad_norm": 0.11416300386190414, "learning_rate": 1.0689196860135188e-05, "loss": 0.003, "step": 25388 }, { "epoch": 11.803812180381218, "grad_norm": 0.8406102061271667, "learning_rate": 1.0624868698917964e-05, "loss": 0.007, "step": 25390 }, { "epoch": 11.804741980474198, "grad_norm": 0.871415913105011, "learning_rate": 1.0560682414443351e-05, "loss": 0.0157, "step": 25392 }, { "epoch": 11.805671780567177, "grad_norm": 1.2894814014434814, "learning_rate": 1.0496638640203774e-05, "loss": 0.0339, "step": 25394 }, { "epoch": 11.806601580660159, "grad_norm": 0.2622932195663452, "learning_rate": 1.0432738008285576e-05, "loss": 0.0045, "step": 25396 }, { "epoch": 11.807531380753138, "grad_norm": 0.5564125776290894, "learning_rate": 1.0368981149362193e-05, "loss": 0.014, "step": 25398 }, { "epoch": 11.808461180846118, "grad_norm": 0.5676633715629578, "learning_rate": 1.030536869268823e-05, "loss": 0.0057, "step": 25400 }, { "epoch": 11.809390980939098, "grad_norm": 0.761233389377594, "learning_rate": 1.0241901266092661e-05, "loss": 0.0095, "step": 25402 }, { "epoch": 11.810320781032079, "grad_norm": 0.5198423862457275, "learning_rate": 1.0178579495973514e-05, "loss": 0.0078, "step": 25404 }, { "epoch": 11.811250581125059, "grad_norm": 0.6057487726211548, "learning_rate": 1.0115404007291232e-05, "loss": 0.005, "step": 25406 }, { "epoch": 11.812180381218038, "grad_norm": 0.8051832318305969, "learning_rate": 1.0052375423562102e-05, "loss": 0.0184, "step": 25408 }, { "epoch": 11.813110181311018, "grad_norm": 0.6392989158630371, "learning_rate": 9.989494366852941e-06, "loss": 0.0245, "step": 25410 }, { "epoch": 11.814039981403997, "grad_norm": 1.1735092401504517, "learning_rate": 9.926761457774394e-06, "loss": 0.0128, "step": 25412 }, { "epoch": 11.814969781496979, "grad_norm": 0.9990472197532654, "learning_rate": 9.864177315475085e-06, "loss": 0.0304, "step": 25414 }, { "epoch": 11.815899581589958, "grad_norm": 0.8490030765533447, "learning_rate": 9.801742557634811e-06, "loss": 0.0076, "step": 25416 }, { "epoch": 11.816829381682938, "grad_norm": 1.433321475982666, "learning_rate": 9.739457800459852e-06, "loss": 0.0138, "step": 25418 }, { "epoch": 11.817759181775918, "grad_norm": 0.09398996829986572, "learning_rate": 9.677323658675613e-06, "loss": 0.0071, "step": 25420 }, { "epoch": 11.818688981868899, "grad_norm": 1.0081511735916138, "learning_rate": 9.615340745520706e-06, "loss": 0.0236, "step": 25422 }, { "epoch": 11.819618781961879, "grad_norm": 0.7986426949501038, "learning_rate": 9.553509672741606e-06, "loss": 0.0085, "step": 25424 }, { "epoch": 11.820548582054858, "grad_norm": 0.601155161857605, "learning_rate": 9.491831050586172e-06, "loss": 0.0072, "step": 25426 }, { "epoch": 11.821478382147838, "grad_norm": 0.5515241026878357, "learning_rate": 9.430305487797229e-06, "loss": 0.0057, "step": 25428 }, { "epoch": 11.822408182240817, "grad_norm": 0.18714158236980438, "learning_rate": 9.368933591607382e-06, "loss": 0.0112, "step": 25430 }, { "epoch": 11.823337982333799, "grad_norm": 0.6743453741073608, "learning_rate": 9.307715967732467e-06, "loss": 0.0104, "step": 25432 }, { "epoch": 11.824267782426778, "grad_norm": 0.32397305965423584, "learning_rate": 9.246653220365857e-06, "loss": 0.0067, "step": 25434 }, { "epoch": 11.825197582519758, "grad_norm": 0.23856498301029205, "learning_rate": 9.185745952171944e-06, "loss": 0.0091, "step": 25436 }, { "epoch": 11.826127382612738, "grad_norm": 0.14959406852722168, "learning_rate": 9.124994764281008e-06, "loss": 0.0029, "step": 25438 }, { "epoch": 11.827057182705719, "grad_norm": 0.6371771693229675, "learning_rate": 9.06440025628289e-06, "loss": 0.0089, "step": 25440 }, { "epoch": 11.827986982798699, "grad_norm": 1.2890352010726929, "learning_rate": 9.003963026220641e-06, "loss": 0.011, "step": 25442 }, { "epoch": 11.828916782891678, "grad_norm": 0.5033839344978333, "learning_rate": 8.943683670585326e-06, "loss": 0.0051, "step": 25444 }, { "epoch": 11.829846582984658, "grad_norm": 0.34867995977401733, "learning_rate": 8.883562784310258e-06, "loss": 0.007, "step": 25446 }, { "epoch": 11.83077638307764, "grad_norm": 0.4124736189842224, "learning_rate": 8.823600960763905e-06, "loss": 0.0055, "step": 25448 }, { "epoch": 11.831706183170619, "grad_norm": 0.6929027438163757, "learning_rate": 8.763798791745388e-06, "loss": 0.0189, "step": 25450 }, { "epoch": 11.832635983263598, "grad_norm": 0.10885711014270782, "learning_rate": 8.704156867477981e-06, "loss": 0.0052, "step": 25452 }, { "epoch": 11.833565783356578, "grad_norm": 0.2566131055355072, "learning_rate": 8.64467577660353e-06, "loss": 0.0054, "step": 25454 }, { "epoch": 11.834495583449558, "grad_norm": 0.1861279010772705, "learning_rate": 8.585356106176116e-06, "loss": 0.0058, "step": 25456 }, { "epoch": 11.835425383542539, "grad_norm": 0.277926504611969, "learning_rate": 8.526198441657074e-06, "loss": 0.0073, "step": 25458 }, { "epoch": 11.836355183635519, "grad_norm": 0.6299346685409546, "learning_rate": 8.467203366908805e-06, "loss": 0.0077, "step": 25460 }, { "epoch": 11.837284983728498, "grad_norm": 0.13182494044303894, "learning_rate": 8.408371464188607e-06, "loss": 0.0036, "step": 25462 }, { "epoch": 11.838214783821478, "grad_norm": 0.18385224044322968, "learning_rate": 8.349703314143748e-06, "loss": 0.0145, "step": 25464 }, { "epoch": 11.83914458391446, "grad_norm": 0.12096917629241943, "learning_rate": 8.291199495805183e-06, "loss": 0.0041, "step": 25466 }, { "epoch": 11.840074384007439, "grad_norm": 1.2849690914154053, "learning_rate": 8.232860586582111e-06, "loss": 0.0125, "step": 25468 }, { "epoch": 11.841004184100418, "grad_norm": 0.9418808221817017, "learning_rate": 8.174687162255618e-06, "loss": 0.0065, "step": 25470 }, { "epoch": 11.841933984193398, "grad_norm": 0.042744822800159454, "learning_rate": 8.116679796974312e-06, "loss": 0.0069, "step": 25472 }, { "epoch": 11.84286378428638, "grad_norm": 0.17698538303375244, "learning_rate": 8.058839063247469e-06, "loss": 0.0031, "step": 25474 }, { "epoch": 11.843793584379359, "grad_norm": 0.13661116361618042, "learning_rate": 8.001165531939516e-06, "loss": 0.0038, "step": 25476 }, { "epoch": 11.844723384472339, "grad_norm": 0.6372092366218567, "learning_rate": 7.943659772265059e-06, "loss": 0.0083, "step": 25478 }, { "epoch": 11.845653184565318, "grad_norm": 0.21232028305530548, "learning_rate": 7.886322351782852e-06, "loss": 0.0093, "step": 25480 }, { "epoch": 11.846582984658298, "grad_norm": 0.2919805645942688, "learning_rate": 7.829153836389834e-06, "loss": 0.0079, "step": 25482 }, { "epoch": 11.84751278475128, "grad_norm": 1.0684056282043457, "learning_rate": 7.772154790316301e-06, "loss": 0.0076, "step": 25484 }, { "epoch": 11.848442584844259, "grad_norm": 0.2820598781108856, "learning_rate": 7.715325776119824e-06, "loss": 0.0045, "step": 25486 }, { "epoch": 11.849372384937238, "grad_norm": 1.1535147428512573, "learning_rate": 7.658667354679959e-06, "loss": 0.0163, "step": 25488 }, { "epoch": 11.850302185030218, "grad_norm": 0.059888020157814026, "learning_rate": 7.602180085192197e-06, "loss": 0.0054, "step": 25490 }, { "epoch": 11.8512319851232, "grad_norm": 1.3680310249328613, "learning_rate": 7.545864525163209e-06, "loss": 0.0168, "step": 25492 }, { "epoch": 11.852161785216179, "grad_norm": 0.16549910604953766, "learning_rate": 7.489721230404966e-06, "loss": 0.0033, "step": 25494 }, { "epoch": 11.853091585309159, "grad_norm": 0.061654530465602875, "learning_rate": 7.433750755028867e-06, "loss": 0.0105, "step": 25496 }, { "epoch": 11.854021385402138, "grad_norm": 0.4593453109264374, "learning_rate": 7.377953651440908e-06, "loss": 0.0099, "step": 25498 }, { "epoch": 11.854951185495118, "grad_norm": 0.4206429719924927, "learning_rate": 7.322330470336227e-06, "loss": 0.0098, "step": 25500 }, { "epoch": 11.855880985588099, "grad_norm": 1.4065003395080566, "learning_rate": 7.266881760693166e-06, "loss": 0.0121, "step": 25502 }, { "epoch": 11.856810785681079, "grad_norm": 1.34980309009552, "learning_rate": 7.211608069767851e-06, "loss": 0.0176, "step": 25504 }, { "epoch": 11.857740585774058, "grad_norm": 0.7345114350318909, "learning_rate": 7.156509943089421e-06, "loss": 0.0108, "step": 25506 }, { "epoch": 11.858670385867038, "grad_norm": 0.30306175351142883, "learning_rate": 7.101587924454273e-06, "loss": 0.0033, "step": 25508 }, { "epoch": 11.85960018596002, "grad_norm": 0.08860684931278229, "learning_rate": 7.046842555920302e-06, "loss": 0.0089, "step": 25510 }, { "epoch": 11.860529986052999, "grad_norm": 0.40104803442955017, "learning_rate": 6.992274377802323e-06, "loss": 0.007, "step": 25512 }, { "epoch": 11.861459786145979, "grad_norm": 0.6132017374038696, "learning_rate": 6.937883928666346e-06, "loss": 0.0124, "step": 25514 }, { "epoch": 11.862389586238958, "grad_norm": 0.9466161131858826, "learning_rate": 6.883671745323896e-06, "loss": 0.0102, "step": 25516 }, { "epoch": 11.863319386331938, "grad_norm": 0.3469197154045105, "learning_rate": 6.829638362827465e-06, "loss": 0.0056, "step": 25518 }, { "epoch": 11.864249186424919, "grad_norm": 0.481515496969223, "learning_rate": 6.775784314464723e-06, "loss": 0.0074, "step": 25520 }, { "epoch": 11.865178986517899, "grad_norm": 0.4217521846294403, "learning_rate": 6.722110131753501e-06, "loss": 0.0184, "step": 25522 }, { "epoch": 11.866108786610878, "grad_norm": 0.1438293606042862, "learning_rate": 6.668616344436077e-06, "loss": 0.0028, "step": 25524 }, { "epoch": 11.867038586703858, "grad_norm": 0.32324209809303284, "learning_rate": 6.615303480474533e-06, "loss": 0.0129, "step": 25526 }, { "epoch": 11.86796838679684, "grad_norm": 0.7248399257659912, "learning_rate": 6.562172066045674e-06, "loss": 0.0086, "step": 25528 }, { "epoch": 11.868898186889819, "grad_norm": 0.10811728984117508, "learning_rate": 6.509222625534752e-06, "loss": 0.0021, "step": 25530 }, { "epoch": 11.869827986982799, "grad_norm": 0.8067317008972168, "learning_rate": 6.456455681531489e-06, "loss": 0.009, "step": 25532 }, { "epoch": 11.870757787075778, "grad_norm": 0.4068275988101959, "learning_rate": 6.403871754824435e-06, "loss": 0.004, "step": 25534 }, { "epoch": 11.87168758716876, "grad_norm": 0.07525406777858734, "learning_rate": 6.35147136439548e-06, "loss": 0.0032, "step": 25536 }, { "epoch": 11.872617387261739, "grad_norm": 0.6200103163719177, "learning_rate": 6.2992550274154455e-06, "loss": 0.0031, "step": 25538 }, { "epoch": 11.873547187354719, "grad_norm": 0.37402263283729553, "learning_rate": 6.247223259238494e-06, "loss": 0.0069, "step": 25540 }, { "epoch": 11.874476987447698, "grad_norm": 0.1664271354675293, "learning_rate": 6.195376573397288e-06, "loss": 0.005, "step": 25542 }, { "epoch": 11.87540678754068, "grad_norm": 1.7740089893341064, "learning_rate": 6.143715481597451e-06, "loss": 0.0145, "step": 25544 }, { "epoch": 11.87633658763366, "grad_norm": 1.0529465675354004, "learning_rate": 6.092240493713222e-06, "loss": 0.0089, "step": 25546 }, { "epoch": 11.877266387726639, "grad_norm": 0.0936996340751648, "learning_rate": 6.040952117782065e-06, "loss": 0.0044, "step": 25548 }, { "epoch": 11.878196187819619, "grad_norm": 0.32462427020072937, "learning_rate": 5.989850859999311e-06, "loss": 0.0103, "step": 25550 }, { "epoch": 11.879125987912598, "grad_norm": 0.2245960533618927, "learning_rate": 5.938937224713748e-06, "loss": 0.0062, "step": 25552 }, { "epoch": 11.88005578800558, "grad_norm": 0.2800801992416382, "learning_rate": 5.8882117144226285e-06, "loss": 0.0051, "step": 25554 }, { "epoch": 11.880985588098559, "grad_norm": 0.8169980645179749, "learning_rate": 5.8376748297662625e-06, "loss": 0.0169, "step": 25556 }, { "epoch": 11.881915388191539, "grad_norm": 0.48051872849464417, "learning_rate": 5.787327069523066e-06, "loss": 0.0133, "step": 25558 }, { "epoch": 11.882845188284518, "grad_norm": 0.18581722676753998, "learning_rate": 5.7371689306052275e-06, "loss": 0.003, "step": 25560 }, { "epoch": 11.8837749883775, "grad_norm": 0.9629623293876648, "learning_rate": 5.687200908053434e-06, "loss": 0.01, "step": 25562 }, { "epoch": 11.88470478847048, "grad_norm": 0.1597316712141037, "learning_rate": 5.637423495031674e-06, "loss": 0.0063, "step": 25564 }, { "epoch": 11.885634588563459, "grad_norm": 0.05272487923502922, "learning_rate": 5.587837182823029e-06, "loss": 0.0027, "step": 25566 }, { "epoch": 11.886564388656438, "grad_norm": 0.5207926630973816, "learning_rate": 5.5384424608244964e-06, "loss": 0.0085, "step": 25568 }, { "epoch": 11.887494188749418, "grad_norm": 0.4055958688259125, "learning_rate": 5.489239816541813e-06, "loss": 0.0057, "step": 25570 }, { "epoch": 11.8884239888424, "grad_norm": 0.4093753695487976, "learning_rate": 5.440229735585328e-06, "loss": 0.017, "step": 25572 }, { "epoch": 11.889353788935379, "grad_norm": 0.3294672667980194, "learning_rate": 5.391412701664755e-06, "loss": 0.0082, "step": 25574 }, { "epoch": 11.890283589028359, "grad_norm": 0.18543638288974762, "learning_rate": 5.342789196584619e-06, "loss": 0.0189, "step": 25576 }, { "epoch": 11.891213389121338, "grad_norm": 1.1113247871398926, "learning_rate": 5.294359700239068e-06, "loss": 0.0112, "step": 25578 }, { "epoch": 11.89214318921432, "grad_norm": 0.1899043470621109, "learning_rate": 5.246124690607676e-06, "loss": 0.0222, "step": 25580 }, { "epoch": 11.8930729893073, "grad_norm": 1.7023135423660278, "learning_rate": 5.198084643750842e-06, "loss": 0.0072, "step": 25582 }, { "epoch": 11.894002789400279, "grad_norm": 0.6267293691635132, "learning_rate": 5.1502400338041105e-06, "loss": 0.0064, "step": 25584 }, { "epoch": 11.894932589493258, "grad_norm": 0.10996250063180923, "learning_rate": 5.1025913329745745e-06, "loss": 0.0036, "step": 25586 }, { "epoch": 11.895862389586238, "grad_norm": 1.3678934574127197, "learning_rate": 5.055139011535673e-06, "loss": 0.0275, "step": 25588 }, { "epoch": 11.89679218967922, "grad_norm": 0.6833757162094116, "learning_rate": 5.007883537822766e-06, "loss": 0.0074, "step": 25590 }, { "epoch": 11.897721989772199, "grad_norm": 0.6288872957229614, "learning_rate": 4.960825378228092e-06, "loss": 0.0069, "step": 25592 }, { "epoch": 11.898651789865179, "grad_norm": 0.46189484000205994, "learning_rate": 4.913964997196792e-06, "loss": 0.003, "step": 25594 }, { "epoch": 11.899581589958158, "grad_norm": 0.3493000864982605, "learning_rate": 4.867302857222016e-06, "loss": 0.0046, "step": 25596 }, { "epoch": 11.90051139005114, "grad_norm": 0.501395583152771, "learning_rate": 4.820839418840033e-06, "loss": 0.0054, "step": 25598 }, { "epoch": 11.90144119014412, "grad_norm": 0.4579058289527893, "learning_rate": 4.774575140626333e-06, "loss": 0.0074, "step": 25600 }, { "epoch": 11.902370990237099, "grad_norm": 0.22021673619747162, "learning_rate": 4.728510479190761e-06, "loss": 0.0105, "step": 25602 }, { "epoch": 11.903300790330078, "grad_norm": 0.05253042280673981, "learning_rate": 4.682645889172726e-06, "loss": 0.0105, "step": 25604 }, { "epoch": 11.90423059042306, "grad_norm": 0.7244082093238831, "learning_rate": 4.636981823237315e-06, "loss": 0.0067, "step": 25606 }, { "epoch": 11.90516039051604, "grad_norm": 0.33417361974716187, "learning_rate": 4.591518732070326e-06, "loss": 0.0052, "step": 25608 }, { "epoch": 11.906090190609019, "grad_norm": 0.0923529863357544, "learning_rate": 4.54625706437442e-06, "loss": 0.0075, "step": 25610 }, { "epoch": 11.907019990701999, "grad_norm": 0.2771727442741394, "learning_rate": 4.501197266863673e-06, "loss": 0.0059, "step": 25612 }, { "epoch": 11.907949790794978, "grad_norm": 0.23641282320022583, "learning_rate": 4.456339784260204e-06, "loss": 0.0039, "step": 25614 }, { "epoch": 11.90887959088796, "grad_norm": 0.6902941465377808, "learning_rate": 4.411685059289352e-06, "loss": 0.0108, "step": 25616 }, { "epoch": 11.90980939098094, "grad_norm": 0.1958412379026413, "learning_rate": 4.367233532675027e-06, "loss": 0.0084, "step": 25618 }, { "epoch": 11.910739191073919, "grad_norm": 1.2903096675872803, "learning_rate": 4.3229856431359484e-06, "loss": 0.0176, "step": 25620 }, { "epoch": 11.911668991166898, "grad_norm": 0.20390473306179047, "learning_rate": 4.278941827380924e-06, "loss": 0.0176, "step": 25622 }, { "epoch": 11.91259879125988, "grad_norm": 0.2598813772201538, "learning_rate": 4.235102520104732e-06, "loss": 0.0029, "step": 25624 }, { "epoch": 11.91352859135286, "grad_norm": 1.2274115085601807, "learning_rate": 4.19146815398344e-06, "loss": 0.0093, "step": 25626 }, { "epoch": 11.914458391445839, "grad_norm": 0.5133273005485535, "learning_rate": 4.148039159670724e-06, "loss": 0.0245, "step": 25628 }, { "epoch": 11.915388191538819, "grad_norm": 0.46064701676368713, "learning_rate": 4.104815965793323e-06, "loss": 0.0066, "step": 25630 }, { "epoch": 11.9163179916318, "grad_norm": 0.6369730234146118, "learning_rate": 4.061798998946516e-06, "loss": 0.0074, "step": 25632 }, { "epoch": 11.91724779172478, "grad_norm": 1.0022565126419067, "learning_rate": 4.018988683690493e-06, "loss": 0.0061, "step": 25634 }, { "epoch": 11.91817759181776, "grad_norm": 0.07717634737491608, "learning_rate": 3.976385442545787e-06, "loss": 0.0029, "step": 25636 }, { "epoch": 11.919107391910739, "grad_norm": 1.0977967977523804, "learning_rate": 3.933989695989187e-06, "loss": 0.017, "step": 25638 }, { "epoch": 11.920037192003718, "grad_norm": 0.38139814138412476, "learning_rate": 3.891801862449595e-06, "loss": 0.0039, "step": 25640 }, { "epoch": 11.9209669920967, "grad_norm": 0.42791280150413513, "learning_rate": 3.849822358303897e-06, "loss": 0.0045, "step": 25642 }, { "epoch": 11.92189679218968, "grad_norm": 0.1320963203907013, "learning_rate": 3.808051597872943e-06, "loss": 0.0059, "step": 25644 }, { "epoch": 11.922826592282659, "grad_norm": 0.380312979221344, "learning_rate": 3.76648999341709e-06, "loss": 0.0042, "step": 25646 }, { "epoch": 11.923756392375639, "grad_norm": 0.9161626100540161, "learning_rate": 3.7251379551326863e-06, "loss": 0.0068, "step": 25648 }, { "epoch": 11.92468619246862, "grad_norm": 1.881590485572815, "learning_rate": 3.68399589114775e-06, "loss": 0.0153, "step": 25650 }, { "epoch": 11.9256159925616, "grad_norm": 0.08265440165996552, "learning_rate": 3.6430642075176538e-06, "loss": 0.0031, "step": 25652 }, { "epoch": 11.92654579265458, "grad_norm": 0.29316243529319763, "learning_rate": 3.6023433082216886e-06, "loss": 0.0041, "step": 25654 }, { "epoch": 11.927475592747559, "grad_norm": 0.3090706765651703, "learning_rate": 3.5618335951587775e-06, "loss": 0.0038, "step": 25656 }, { "epoch": 11.928405392840538, "grad_norm": 0.17290961742401123, "learning_rate": 3.521535468143255e-06, "loss": 0.0175, "step": 25658 }, { "epoch": 11.92933519293352, "grad_norm": 0.5816835761070251, "learning_rate": 3.481449324901453e-06, "loss": 0.0099, "step": 25660 }, { "epoch": 11.9302649930265, "grad_norm": 0.41400811076164246, "learning_rate": 3.441575561067334e-06, "loss": 0.0255, "step": 25662 }, { "epoch": 11.931194793119479, "grad_norm": 0.40296733379364014, "learning_rate": 3.401914570179121e-06, "loss": 0.0067, "step": 25664 }, { "epoch": 11.932124593212459, "grad_norm": 0.4416654109954834, "learning_rate": 3.3624667436745086e-06, "loss": 0.0088, "step": 25666 }, { "epoch": 11.93305439330544, "grad_norm": 0.10342461615800858, "learning_rate": 3.3232324708877135e-06, "loss": 0.0038, "step": 25668 }, { "epoch": 11.93398419339842, "grad_norm": 0.15270011126995087, "learning_rate": 3.2842121390452505e-06, "loss": 0.0092, "step": 25670 }, { "epoch": 11.9349139934914, "grad_norm": 0.821927547454834, "learning_rate": 3.2454061332618717e-06, "loss": 0.0119, "step": 25672 }, { "epoch": 11.935843793584379, "grad_norm": 0.40183183550834656, "learning_rate": 3.206814836537272e-06, "loss": 0.0083, "step": 25674 }, { "epoch": 11.936773593677358, "grad_norm": 0.6710494160652161, "learning_rate": 3.1684386297519707e-06, "loss": 0.0151, "step": 25676 }, { "epoch": 11.93770339377034, "grad_norm": 0.18942266702651978, "learning_rate": 3.1302778916637213e-06, "loss": 0.0184, "step": 25678 }, { "epoch": 11.93863319386332, "grad_norm": 0.19461023807525635, "learning_rate": 3.0923329989034327e-06, "loss": 0.0031, "step": 25680 }, { "epoch": 11.939562993956299, "grad_norm": 0.5465829372406006, "learning_rate": 3.0546043259719604e-06, "loss": 0.004, "step": 25682 }, { "epoch": 11.940492794049279, "grad_norm": 0.6224928498268127, "learning_rate": 3.017092245236161e-06, "loss": 0.006, "step": 25684 }, { "epoch": 11.94142259414226, "grad_norm": 0.29957205057144165, "learning_rate": 2.9797971269249598e-06, "loss": 0.0096, "step": 25686 }, { "epoch": 11.94235239423524, "grad_norm": 1.0420012474060059, "learning_rate": 2.942719339126198e-06, "loss": 0.0325, "step": 25688 }, { "epoch": 11.94328219432822, "grad_norm": 0.18380820751190186, "learning_rate": 2.9058592477826763e-06, "loss": 0.0038, "step": 25690 }, { "epoch": 11.944211994421199, "grad_norm": 0.5721486806869507, "learning_rate": 2.869217216688612e-06, "loss": 0.0093, "step": 25692 }, { "epoch": 11.94514179451418, "grad_norm": 0.24993376433849335, "learning_rate": 2.8327936074860605e-06, "loss": 0.0093, "step": 25694 }, { "epoch": 11.94607159460716, "grad_norm": 0.12433349341154099, "learning_rate": 2.7965887796613427e-06, "loss": 0.0073, "step": 25696 }, { "epoch": 11.94700139470014, "grad_norm": 0.1144324243068695, "learning_rate": 2.7606030905415707e-06, "loss": 0.0031, "step": 25698 }, { "epoch": 11.947931194793119, "grad_norm": 0.3425871729850769, "learning_rate": 2.7248368952908045e-06, "loss": 0.0042, "step": 25700 }, { "epoch": 11.9488609948861, "grad_norm": 0.5838231444358826, "learning_rate": 2.689290546907035e-06, "loss": 0.0047, "step": 25702 }, { "epoch": 11.94979079497908, "grad_norm": 0.1954965889453888, "learning_rate": 2.6539643962184523e-06, "loss": 0.0034, "step": 25704 }, { "epoch": 11.95072059507206, "grad_norm": 0.13706177473068237, "learning_rate": 2.618858791879735e-06, "loss": 0.0066, "step": 25706 }, { "epoch": 11.95165039516504, "grad_norm": 0.1605571210384369, "learning_rate": 2.5839740803691134e-06, "loss": 0.0048, "step": 25708 }, { "epoch": 11.952580195258019, "grad_norm": 0.6103760004043579, "learning_rate": 2.5493106059846027e-06, "loss": 0.0051, "step": 25710 }, { "epoch": 11.953509995351, "grad_norm": 1.1435821056365967, "learning_rate": 2.514868710840778e-06, "loss": 0.0312, "step": 25712 }, { "epoch": 11.95443979544398, "grad_norm": 0.5535786151885986, "learning_rate": 2.480648734865084e-06, "loss": 0.0168, "step": 25714 }, { "epoch": 11.95536959553696, "grad_norm": 0.15669040381908417, "learning_rate": 2.4466510157949477e-06, "loss": 0.0087, "step": 25716 }, { "epoch": 11.956299395629939, "grad_norm": 0.4966561496257782, "learning_rate": 2.4128758891741312e-06, "loss": 0.0035, "step": 25718 }, { "epoch": 11.95722919572292, "grad_norm": 0.2228117436170578, "learning_rate": 2.379323688349496e-06, "loss": 0.0026, "step": 25720 }, { "epoch": 11.9581589958159, "grad_norm": 0.07330061495304108, "learning_rate": 2.3459947444677215e-06, "loss": 0.0025, "step": 25722 }, { "epoch": 11.95908879590888, "grad_norm": 0.6292467713356018, "learning_rate": 2.3128893864721024e-06, "loss": 0.0048, "step": 25724 }, { "epoch": 11.96001859600186, "grad_norm": 1.1406309604644775, "learning_rate": 2.2800079410990072e-06, "loss": 0.0099, "step": 25726 }, { "epoch": 11.960948396094839, "grad_norm": 0.9529898166656494, "learning_rate": 2.2473507328750997e-06, "loss": 0.0101, "step": 25728 }, { "epoch": 11.96187819618782, "grad_norm": 0.21375925838947296, "learning_rate": 2.214918084113845e-06, "loss": 0.0054, "step": 25730 }, { "epoch": 11.9628079962808, "grad_norm": 0.4451896548271179, "learning_rate": 2.1827103149124644e-06, "loss": 0.0109, "step": 25732 }, { "epoch": 11.96373779637378, "grad_norm": 0.567134439945221, "learning_rate": 2.1507277431484865e-06, "loss": 0.0074, "step": 25734 }, { "epoch": 11.964667596466759, "grad_norm": 1.12453031539917, "learning_rate": 2.1189706844770616e-06, "loss": 0.0079, "step": 25736 }, { "epoch": 11.96559739655974, "grad_norm": 0.5707564949989319, "learning_rate": 2.087439452327608e-06, "loss": 0.0206, "step": 25738 }, { "epoch": 11.96652719665272, "grad_norm": 0.5842170715332031, "learning_rate": 2.0561343579005105e-06, "loss": 0.0086, "step": 25740 }, { "epoch": 11.9674569967457, "grad_norm": 0.08343888819217682, "learning_rate": 2.025055710164492e-06, "loss": 0.0047, "step": 25742 }, { "epoch": 11.96838679683868, "grad_norm": 0.23490111529827118, "learning_rate": 1.9942038158533186e-06, "loss": 0.0042, "step": 25744 }, { "epoch": 11.969316596931659, "grad_norm": 0.08204822987318039, "learning_rate": 1.963578979462533e-06, "loss": 0.0053, "step": 25746 }, { "epoch": 11.97024639702464, "grad_norm": 0.5936181545257568, "learning_rate": 1.933181503247106e-06, "loss": 0.0064, "step": 25748 }, { "epoch": 11.97117619711762, "grad_norm": 0.36585733294487, "learning_rate": 1.903011687217793e-06, "loss": 0.005, "step": 25750 }, { "epoch": 11.9721059972106, "grad_norm": 0.7085009813308716, "learning_rate": 1.8730698291385662e-06, "loss": 0.0071, "step": 25752 }, { "epoch": 11.973035797303579, "grad_norm": 0.5651799440383911, "learning_rate": 1.8433562245233353e-06, "loss": 0.0064, "step": 25754 }, { "epoch": 11.97396559739656, "grad_norm": 0.42965182662010193, "learning_rate": 1.8138711666334522e-06, "loss": 0.0043, "step": 25756 }, { "epoch": 11.97489539748954, "grad_norm": 0.9738620519638062, "learning_rate": 1.7846149464746032e-06, "loss": 0.0304, "step": 25758 }, { "epoch": 11.97582519758252, "grad_norm": 0.08819863945245743, "learning_rate": 1.7555878527937363e-06, "loss": 0.0046, "step": 25760 }, { "epoch": 11.9767549976755, "grad_norm": 0.2541852593421936, "learning_rate": 1.7267901720766148e-06, "loss": 0.0088, "step": 25762 }, { "epoch": 11.97768479776848, "grad_norm": 0.5520263910293579, "learning_rate": 1.6982221885447185e-06, "loss": 0.0071, "step": 25764 }, { "epoch": 11.97861459786146, "grad_norm": 0.39669865369796753, "learning_rate": 1.6698841841525791e-06, "loss": 0.0078, "step": 25766 }, { "epoch": 11.97954439795444, "grad_norm": 0.16082967817783356, "learning_rate": 1.641776438584728e-06, "loss": 0.0055, "step": 25768 }, { "epoch": 11.98047419804742, "grad_norm": 0.7975040078163147, "learning_rate": 1.6138992292533302e-06, "loss": 0.0089, "step": 25770 }, { "epoch": 11.981403998140399, "grad_norm": 0.9108855724334717, "learning_rate": 1.586252831295191e-06, "loss": 0.0061, "step": 25772 }, { "epoch": 11.98233379823338, "grad_norm": 0.36011144518852234, "learning_rate": 1.558837517569096e-06, "loss": 0.004, "step": 25774 }, { "epoch": 11.98326359832636, "grad_norm": 0.1855759620666504, "learning_rate": 1.5316535586531217e-06, "loss": 0.0067, "step": 25776 }, { "epoch": 11.98419339841934, "grad_norm": 0.5548912286758423, "learning_rate": 1.5047012228420295e-06, "loss": 0.0053, "step": 25778 }, { "epoch": 11.98512319851232, "grad_norm": 0.10042867809534073, "learning_rate": 1.4779807761443704e-06, "loss": 0.0024, "step": 25780 }, { "epoch": 11.9860529986053, "grad_norm": 0.10016090422868729, "learning_rate": 1.4514924822802295e-06, "loss": 0.0152, "step": 25782 }, { "epoch": 11.98698279869828, "grad_norm": 0.6787150502204895, "learning_rate": 1.4252366026783687e-06, "loss": 0.0074, "step": 25784 }, { "epoch": 11.98791259879126, "grad_norm": 1.224422574043274, "learning_rate": 1.3992133964737847e-06, "loss": 0.0162, "step": 25786 }, { "epoch": 11.98884239888424, "grad_norm": 1.0143358707427979, "learning_rate": 1.3734231205048923e-06, "loss": 0.0109, "step": 25788 }, { "epoch": 11.98977219897722, "grad_norm": 1.2395310401916504, "learning_rate": 1.347866029311369e-06, "loss": 0.0099, "step": 25790 }, { "epoch": 11.9907019990702, "grad_norm": 0.23322585225105286, "learning_rate": 1.3225423751314374e-06, "loss": 0.0035, "step": 25792 }, { "epoch": 11.99163179916318, "grad_norm": 0.8297770023345947, "learning_rate": 1.2974524078992317e-06, "loss": 0.0042, "step": 25794 }, { "epoch": 11.99256159925616, "grad_norm": 0.15363521873950958, "learning_rate": 1.272596375242656e-06, "loss": 0.0042, "step": 25796 }, { "epoch": 11.99349139934914, "grad_norm": 0.10351111739873886, "learning_rate": 1.2479745224807148e-06, "loss": 0.0042, "step": 25798 }, { "epoch": 11.99442119944212, "grad_norm": 0.561823308467865, "learning_rate": 1.2235870926211553e-06, "loss": 0.0066, "step": 25800 }, { "epoch": 11.9953509995351, "grad_norm": 0.7806784510612488, "learning_rate": 1.1994343263580668e-06, "loss": 0.0082, "step": 25802 }, { "epoch": 11.99628079962808, "grad_norm": 1.4377856254577637, "learning_rate": 1.1755164620695e-06, "loss": 0.014, "step": 25804 }, { "epoch": 11.99721059972106, "grad_norm": 0.7716406583786011, "learning_rate": 1.1518337358151741e-06, "loss": 0.0108, "step": 25806 }, { "epoch": 11.99814039981404, "grad_norm": 0.2823881208896637, "learning_rate": 1.1283863813339257e-06, "loss": 0.0124, "step": 25808 }, { "epoch": 11.99907019990702, "grad_norm": 0.18876679241657257, "learning_rate": 1.1051746300417355e-06, "loss": 0.0036, "step": 25810 }, { "epoch": 12.0, "grad_norm": 1.4139397144317627, "learning_rate": 1.0821987110292668e-06, "loss": 0.0163, "step": 25812 }, { "epoch": 12.00092980009298, "grad_norm": 0.8388379812240601, "learning_rate": 1.0594588510594637e-06, "loss": 0.009, "step": 25814 }, { "epoch": 12.00185960018596, "grad_norm": 0.09959103167057037, "learning_rate": 1.0369552745656098e-06, "loss": 0.002, "step": 25816 }, { "epoch": 12.00278940027894, "grad_norm": 0.2656339406967163, "learning_rate": 1.0146882036489256e-06, "loss": 0.0034, "step": 25818 }, { "epoch": 12.00371920037192, "grad_norm": 0.35815009474754333, "learning_rate": 9.926578580764573e-07, "loss": 0.0032, "step": 25820 }, { "epoch": 12.0046490004649, "grad_norm": 0.5795745253562927, "learning_rate": 9.708644552787255e-07, "loss": 0.005, "step": 25822 }, { "epoch": 12.00557880055788, "grad_norm": 0.08266130834817886, "learning_rate": 9.493082103478663e-07, "loss": 0.0021, "step": 25824 }, { "epoch": 12.00650860065086, "grad_norm": 0.6017718315124512, "learning_rate": 9.279893360353598e-07, "loss": 0.006, "step": 25826 }, { "epoch": 12.00743840074384, "grad_norm": 0.06560755521059036, "learning_rate": 9.069080427497495e-07, "loss": 0.0044, "step": 25828 }, { "epoch": 12.00836820083682, "grad_norm": 0.14910265803337097, "learning_rate": 8.860645385550293e-07, "loss": 0.0023, "step": 25830 }, { "epoch": 12.0092980009298, "grad_norm": 0.7016617655754089, "learning_rate": 8.654590291681704e-07, "loss": 0.006, "step": 25832 }, { "epoch": 12.010227801022781, "grad_norm": 0.12261174619197845, "learning_rate": 8.450917179571395e-07, "loss": 0.0034, "step": 25834 }, { "epoch": 12.01115760111576, "grad_norm": 0.17804336547851562, "learning_rate": 8.249628059391204e-07, "loss": 0.0025, "step": 25836 }, { "epoch": 12.01208740120874, "grad_norm": 0.3313988745212555, "learning_rate": 8.050724917783506e-07, "loss": 0.0039, "step": 25838 }, { "epoch": 12.01301720130172, "grad_norm": 0.08246950805187225, "learning_rate": 7.854209717842434e-07, "loss": 0.0081, "step": 25840 }, { "epoch": 12.0139470013947, "grad_norm": 0.2815708816051483, "learning_rate": 7.66008439909278e-07, "loss": 0.0036, "step": 25842 }, { "epoch": 12.01487680148768, "grad_norm": 0.341844379901886, "learning_rate": 7.468350877473561e-07, "loss": 0.006, "step": 25844 }, { "epoch": 12.01580660158066, "grad_norm": 0.10037597268819809, "learning_rate": 7.279011045317596e-07, "loss": 0.0021, "step": 25846 }, { "epoch": 12.01673640167364, "grad_norm": 1.2770401239395142, "learning_rate": 7.092066771331769e-07, "loss": 0.0176, "step": 25848 }, { "epoch": 12.01766620176662, "grad_norm": 0.36246106028556824, "learning_rate": 6.907519900581012e-07, "loss": 0.0027, "step": 25850 }, { "epoch": 12.018596001859601, "grad_norm": 0.09743882715702057, "learning_rate": 6.725372254468411e-07, "loss": 0.0018, "step": 25852 }, { "epoch": 12.01952580195258, "grad_norm": 0.07488467544317245, "learning_rate": 6.545625630717741e-07, "loss": 0.0031, "step": 25854 }, { "epoch": 12.02045560204556, "grad_norm": 0.09960465133190155, "learning_rate": 6.368281803355594e-07, "loss": 0.003, "step": 25856 }, { "epoch": 12.02138540213854, "grad_norm": 0.10470974445343018, "learning_rate": 6.193342522693948e-07, "loss": 0.0049, "step": 25858 }, { "epoch": 12.02231520223152, "grad_norm": 0.6189177632331848, "learning_rate": 6.020809515313238e-07, "loss": 0.0056, "step": 25860 }, { "epoch": 12.0232450023245, "grad_norm": 0.17771388590335846, "learning_rate": 5.850684484043869e-07, "loss": 0.0023, "step": 25862 }, { "epoch": 12.02417480241748, "grad_norm": 0.22055861353874207, "learning_rate": 5.68296910795156e-07, "loss": 0.003, "step": 25864 }, { "epoch": 12.02510460251046, "grad_norm": 0.16942936182022095, "learning_rate": 5.517665042319778e-07, "loss": 0.003, "step": 25866 }, { "epoch": 12.02603440260344, "grad_norm": 0.07387848198413849, "learning_rate": 5.354773918632108e-07, "loss": 0.0026, "step": 25868 }, { "epoch": 12.026964202696421, "grad_norm": 1.4454677104949951, "learning_rate": 5.194297344558605e-07, "loss": 0.0072, "step": 25870 }, { "epoch": 12.0278940027894, "grad_norm": 0.15028952062129974, "learning_rate": 5.036236903938244e-07, "loss": 0.004, "step": 25872 }, { "epoch": 12.02882380288238, "grad_norm": 0.8992105722427368, "learning_rate": 4.880594156764133e-07, "loss": 0.0094, "step": 25874 }, { "epoch": 12.02975360297536, "grad_norm": 0.27807751297950745, "learning_rate": 4.7273706391666326e-07, "loss": 0.003, "step": 25876 }, { "epoch": 12.030683403068341, "grad_norm": 0.6056722402572632, "learning_rate": 4.57656786340037e-07, "loss": 0.005, "step": 25878 }, { "epoch": 12.03161320316132, "grad_norm": 0.2910832166671753, "learning_rate": 4.428187317828168e-07, "loss": 0.0037, "step": 25880 }, { "epoch": 12.0325430032543, "grad_norm": 0.10739566385746002, "learning_rate": 4.2822304669051385e-07, "loss": 0.0032, "step": 25882 }, { "epoch": 12.03347280334728, "grad_norm": 0.11177029460668564, "learning_rate": 4.138698751167445e-07, "loss": 0.0026, "step": 25884 }, { "epoch": 12.03440260344026, "grad_norm": 0.3119395077228546, "learning_rate": 3.9975935872148697e-07, "loss": 0.0041, "step": 25886 }, { "epoch": 12.035332403533241, "grad_norm": 0.10018681734800339, "learning_rate": 3.8589163676987113e-07, "loss": 0.0034, "step": 25888 }, { "epoch": 12.03626220362622, "grad_norm": 0.35671889781951904, "learning_rate": 3.7226684613064946e-07, "loss": 0.0058, "step": 25890 }, { "epoch": 12.0371920037192, "grad_norm": 0.23950324952602386, "learning_rate": 3.588851212750394e-07, "loss": 0.0041, "step": 25892 }, { "epoch": 12.03812180381218, "grad_norm": 0.35959210991859436, "learning_rate": 3.457465942752942e-07, "loss": 0.004, "step": 25894 }, { "epoch": 12.039051603905161, "grad_norm": 0.25200584530830383, "learning_rate": 3.328513948033064e-07, "loss": 0.0039, "step": 25896 }, { "epoch": 12.03998140399814, "grad_norm": 0.19839471578598022, "learning_rate": 3.201996501295203e-07, "loss": 0.0042, "step": 25898 }, { "epoch": 12.04091120409112, "grad_norm": 0.2186138778924942, "learning_rate": 3.0779148512157985e-07, "loss": 0.0025, "step": 25900 }, { "epoch": 12.0418410041841, "grad_norm": 0.28360432386398315, "learning_rate": 2.956270222430049e-07, "loss": 0.0036, "step": 25902 }, { "epoch": 12.04277080427708, "grad_norm": 0.2183711677789688, "learning_rate": 2.837063815521588e-07, "loss": 0.0022, "step": 25904 }, { "epoch": 12.043700604370061, "grad_norm": 0.03211750090122223, "learning_rate": 2.720296807009574e-07, "loss": 0.0011, "step": 25906 }, { "epoch": 12.04463040446304, "grad_norm": 0.3180871903896332, "learning_rate": 2.6059703493374814e-07, "loss": 0.0045, "step": 25908 }, { "epoch": 12.04556020455602, "grad_norm": 0.08269177377223969, "learning_rate": 2.494085570860526e-07, "loss": 0.0034, "step": 25910 }, { "epoch": 12.046490004649, "grad_norm": 0.05820699408650398, "learning_rate": 2.3846435758370576e-07, "loss": 0.0028, "step": 25912 }, { "epoch": 12.047419804741981, "grad_norm": 1.1933085918426514, "learning_rate": 2.2776454444153817e-07, "loss": 0.0087, "step": 25914 }, { "epoch": 12.04834960483496, "grad_norm": 0.030138222500681877, "learning_rate": 2.1730922326233743e-07, "loss": 0.002, "step": 25916 }, { "epoch": 12.04927940492794, "grad_norm": 0.10726530849933624, "learning_rate": 2.0709849723592408e-07, "loss": 0.0105, "step": 25918 }, { "epoch": 12.05020920502092, "grad_norm": 1.1230831146240234, "learning_rate": 1.9713246713806644e-07, "loss": 0.011, "step": 25920 }, { "epoch": 12.051139005113901, "grad_norm": 0.06509082764387131, "learning_rate": 1.8741123132941466e-07, "loss": 0.0026, "step": 25922 }, { "epoch": 12.052068805206881, "grad_norm": 0.09137183427810669, "learning_rate": 1.779348857546626e-07, "loss": 0.0032, "step": 25924 }, { "epoch": 12.05299860529986, "grad_norm": 0.2823321223258972, "learning_rate": 1.6870352394151253e-07, "loss": 0.0029, "step": 25926 }, { "epoch": 12.05392840539284, "grad_norm": 0.8446424603462219, "learning_rate": 1.5971723699980355e-07, "loss": 0.0052, "step": 25928 }, { "epoch": 12.05485820548582, "grad_norm": 0.6912087202072144, "learning_rate": 1.50976113620518e-07, "loss": 0.0064, "step": 25930 }, { "epoch": 12.055788005578801, "grad_norm": 0.8177868127822876, "learning_rate": 1.4248024007503202e-07, "loss": 0.01, "step": 25932 }, { "epoch": 12.05671780567178, "grad_norm": 0.06625031679868698, "learning_rate": 1.342297002142108e-07, "loss": 0.0017, "step": 25934 }, { "epoch": 12.05764760576476, "grad_norm": 0.1608959436416626, "learning_rate": 1.2622457546751192e-07, "loss": 0.0077, "step": 25936 }, { "epoch": 12.05857740585774, "grad_norm": 0.14656203985214233, "learning_rate": 1.1846494484228606e-07, "loss": 0.0026, "step": 25938 }, { "epoch": 12.059507205950721, "grad_norm": 0.4310585558414459, "learning_rate": 1.1095088492298866e-07, "loss": 0.0049, "step": 25940 }, { "epoch": 12.060437006043701, "grad_norm": 0.07939330488443375, "learning_rate": 1.0368246987036114e-07, "loss": 0.0023, "step": 25942 }, { "epoch": 12.06136680613668, "grad_norm": 0.09251703321933746, "learning_rate": 9.665977142068709e-08, "loss": 0.0041, "step": 25944 }, { "epoch": 12.06229660622966, "grad_norm": 0.27453410625457764, "learning_rate": 8.98828588851844e-08, "loss": 0.0045, "step": 25946 }, { "epoch": 12.06322640632264, "grad_norm": 0.09046991914510727, "learning_rate": 8.335179914926136e-08, "loss": 0.006, "step": 25948 }, { "epoch": 12.064156206415621, "grad_norm": 0.3515627980232239, "learning_rate": 7.706665667180344e-08, "loss": 0.0033, "step": 25950 }, { "epoch": 12.0650860065086, "grad_norm": 0.17624999582767487, "learning_rate": 7.102749348465141e-08, "loss": 0.0022, "step": 25952 }, { "epoch": 12.06601580660158, "grad_norm": 0.0584106482565403, "learning_rate": 6.523436919191861e-08, "loss": 0.0044, "step": 25954 }, { "epoch": 12.06694560669456, "grad_norm": 0.29466843605041504, "learning_rate": 5.968734096937747e-08, "loss": 0.0027, "step": 25956 }, { "epoch": 12.067875406787541, "grad_norm": 0.20083795487880707, "learning_rate": 5.438646356396831e-08, "loss": 0.0026, "step": 25958 }, { "epoch": 12.068805206880521, "grad_norm": 0.17379336059093475, "learning_rate": 4.933178929321365e-08, "loss": 0.0025, "step": 25960 }, { "epoch": 12.0697350069735, "grad_norm": 0.10912217199802399, "learning_rate": 4.4523368044715874e-08, "loss": 0.0054, "step": 25962 }, { "epoch": 12.07066480706648, "grad_norm": 0.37733587622642517, "learning_rate": 3.996124727562154e-08, "loss": 0.0047, "step": 25964 }, { "epoch": 12.071594607159462, "grad_norm": 0.10354180634021759, "learning_rate": 3.564547201225221e-08, "loss": 0.0054, "step": 25966 }, { "epoch": 12.072524407252441, "grad_norm": 0.11569740623235703, "learning_rate": 3.1576084849565984e-08, "loss": 0.0045, "step": 25968 }, { "epoch": 12.07345420734542, "grad_norm": 0.13071398437023163, "learning_rate": 2.7753125950752318e-08, "loss": 0.0032, "step": 25970 }, { "epoch": 12.0743840074384, "grad_norm": 0.4957360625267029, "learning_rate": 2.4176633046879476e-08, "loss": 0.0034, "step": 25972 }, { "epoch": 12.07531380753138, "grad_norm": 0.5234202742576599, "learning_rate": 2.0846641436494877e-08, "loss": 0.0115, "step": 25974 }, { "epoch": 12.076243607624361, "grad_norm": 0.09743230789899826, "learning_rate": 1.7763183985272596e-08, "loss": 0.0033, "step": 25976 }, { "epoch": 12.077173407717341, "grad_norm": 0.20053711533546448, "learning_rate": 1.492629112567751e-08, "loss": 0.0026, "step": 25978 }, { "epoch": 12.07810320781032, "grad_norm": 0.19473272562026978, "learning_rate": 1.2335990856709957e-08, "loss": 0.0032, "step": 25980 }, { "epoch": 12.0790330079033, "grad_norm": 0.3230682611465454, "learning_rate": 9.992308743589326e-09, "loss": 0.0047, "step": 25982 }, { "epoch": 12.079962807996282, "grad_norm": 0.3163733184337616, "learning_rate": 7.895267917501476e-09, "loss": 0.0035, "step": 25984 }, { "epoch": 12.080892608089261, "grad_norm": 0.1990329623222351, "learning_rate": 6.044889075398886e-09, "loss": 0.0028, "step": 25986 }, { "epoch": 12.08182240818224, "grad_norm": 0.1328582465648651, "learning_rate": 4.44119047977863e-09, "loss": 0.0028, "step": 25988 }, { "epoch": 12.08275220827522, "grad_norm": 0.06369932740926743, "learning_rate": 3.0841879584880725e-09, "loss": 0.0025, "step": 25990 }, { "epoch": 12.0836820083682, "grad_norm": 0.3117820918560028, "learning_rate": 1.9738949045972e-09, "loss": 0.0038, "step": 25992 }, { "epoch": 12.084611808461181, "grad_norm": 0.26497161388397217, "learning_rate": 1.1103222762542906e-09, "loss": 0.0042, "step": 25994 }, { "epoch": 12.085541608554161, "grad_norm": 0.0899633839726448, "learning_rate": 4.934785965721151e-10, "loss": 0.0026, "step": 25996 }, { "epoch": 12.08647140864714, "grad_norm": 0.11538621038198471, "learning_rate": 1.2336995354467153e-10, "loss": 0.0029, "step": 25998 }, { "epoch": 12.08740120874012, "grad_norm": 1.033705711364746, "learning_rate": 0.0, "loss": 0.0233, "step": 26000 }, { "epoch": 12.08740120874012, "eval_cer": 0.10650482497136614, "eval_loss": 0.17576922476291656, "eval_runtime": 398.6807, "eval_samples_per_second": 31.84, "eval_steps_per_second": 0.996, "step": 26000 }, { "epoch": 12.088331008833102, "grad_norm": 0.1838456690311432, "learning_rate": 1.2336995354467197e-10, "loss": 0.0025, "step": 26002 }, { "epoch": 12.089260808926081, "grad_norm": 0.16415676474571228, "learning_rate": 4.934785965721167e-10, "loss": 0.0031, "step": 26004 }, { "epoch": 12.09019060901906, "grad_norm": 0.10023327171802521, "learning_rate": 1.1103222762542941e-09, "loss": 0.0101, "step": 26006 }, { "epoch": 12.09112040911204, "grad_norm": 0.36419588327407837, "learning_rate": 1.973894904594431e-09, "loss": 0.0036, "step": 26008 }, { "epoch": 12.092050209205022, "grad_norm": 0.4537512958049774, "learning_rate": 3.084187958482532e-09, "loss": 0.0036, "step": 26010 }, { "epoch": 12.092980009298001, "grad_norm": 1.4371951818466187, "learning_rate": 4.441190479773095e-09, "loss": 0.0059, "step": 26012 }, { "epoch": 12.093909809390981, "grad_norm": 0.17298388481140137, "learning_rate": 6.044889075398908e-09, "loss": 0.0033, "step": 26014 }, { "epoch": 12.09483960948396, "grad_norm": 0.262686163187027, "learning_rate": 7.895267917495953e-09, "loss": 0.0032, "step": 26016 }, { "epoch": 12.09576940957694, "grad_norm": 0.46541762351989746, "learning_rate": 9.992308743583814e-09, "loss": 0.0046, "step": 26018 }, { "epoch": 12.096699209669922, "grad_norm": 0.0967332199215889, "learning_rate": 1.2335990856712779e-08, "loss": 0.0021, "step": 26020 }, { "epoch": 12.097629009762901, "grad_norm": 0.0890083834528923, "learning_rate": 1.4926291125674792e-08, "loss": 0.002, "step": 26022 }, { "epoch": 12.09855880985588, "grad_norm": 0.05371105298399925, "learning_rate": 1.7763183985267117e-08, "loss": 0.0021, "step": 26024 }, { "epoch": 12.09948860994886, "grad_norm": 0.08463862538337708, "learning_rate": 2.084664143649774e-08, "loss": 0.0046, "step": 26026 }, { "epoch": 12.100418410041842, "grad_norm": 0.09065423905849457, "learning_rate": 2.417663304688513e-08, "loss": 0.0021, "step": 26028 }, { "epoch": 12.101348210134821, "grad_norm": 0.48293817043304443, "learning_rate": 2.7753125950746888e-08, "loss": 0.0063, "step": 26030 }, { "epoch": 12.102278010227801, "grad_norm": 0.41423606872558594, "learning_rate": 3.157608484956057e-08, "loss": 0.0065, "step": 26032 }, { "epoch": 12.10320781032078, "grad_norm": 0.06796233355998993, "learning_rate": 3.5645472012257914e-08, "loss": 0.002, "step": 26034 }, { "epoch": 12.104137610413762, "grad_norm": 0.19831712543964386, "learning_rate": 3.996124727561617e-08, "loss": 0.0026, "step": 26036 }, { "epoch": 12.105067410506742, "grad_norm": 0.282614529132843, "learning_rate": 4.452336804469942e-08, "loss": 0.0027, "step": 26038 }, { "epoch": 12.105997210599721, "grad_norm": 0.15971770882606506, "learning_rate": 4.9331789293208306e-08, "loss": 0.0021, "step": 26040 }, { "epoch": 12.1069270106927, "grad_norm": 0.38586753606796265, "learning_rate": 5.438646356395188e-08, "loss": 0.007, "step": 26042 }, { "epoch": 12.10785681078568, "grad_norm": 0.09018779546022415, "learning_rate": 5.96873409693583e-08, "loss": 0.0025, "step": 26044 }, { "epoch": 12.108786610878662, "grad_norm": 0.08206065744161606, "learning_rate": 6.523436919191334e-08, "loss": 0.0074, "step": 26046 }, { "epoch": 12.109716410971641, "grad_norm": 0.5270140171051025, "learning_rate": 7.102749348466004e-08, "loss": 0.0074, "step": 26048 }, { "epoch": 12.110646211064621, "grad_norm": 0.07184328138828278, "learning_rate": 7.70666566717982e-08, "loss": 0.0031, "step": 26050 }, { "epoch": 12.1115760111576, "grad_norm": 0.2326747328042984, "learning_rate": 8.335179914925336e-08, "loss": 0.0032, "step": 26052 }, { "epoch": 12.112505811250582, "grad_norm": 0.17986565828323364, "learning_rate": 8.988285888519308e-08, "loss": 0.007, "step": 26054 }, { "epoch": 12.113435611343562, "grad_norm": 0.4018728733062744, "learning_rate": 9.665977142068191e-08, "loss": 0.0041, "step": 26056 }, { "epoch": 12.114365411436541, "grad_norm": 0.07391513139009476, "learning_rate": 1.0368246987035322e-07, "loss": 0.0023, "step": 26058 }, { "epoch": 12.11529521152952, "grad_norm": 0.09794200211763382, "learning_rate": 1.1095088492300021e-07, "loss": 0.0025, "step": 26060 }, { "epoch": 12.1162250116225, "grad_norm": 1.1275711059570312, "learning_rate": 1.1846494484229485e-07, "loss": 0.012, "step": 26062 }, { "epoch": 12.117154811715482, "grad_norm": 0.258942574262619, "learning_rate": 1.2622457546748466e-07, "loss": 0.0039, "step": 26064 }, { "epoch": 12.118084611808461, "grad_norm": 0.11164847761392593, "learning_rate": 1.3422970021418353e-07, "loss": 0.0082, "step": 26066 }, { "epoch": 12.119014411901441, "grad_norm": 0.13160887360572815, "learning_rate": 1.424802400750242e-07, "loss": 0.0023, "step": 26068 }, { "epoch": 12.11994421199442, "grad_norm": 0.877353310585022, "learning_rate": 1.5097611362049076e-07, "loss": 0.0071, "step": 26070 }, { "epoch": 12.120874012087402, "grad_norm": 0.6724251508712769, "learning_rate": 1.5971723699977634e-07, "loss": 0.0096, "step": 26072 }, { "epoch": 12.121803812180381, "grad_norm": 0.2641468644142151, "learning_rate": 1.687035239415242e-07, "loss": 0.0041, "step": 26074 }, { "epoch": 12.122733612273361, "grad_norm": 0.1517239660024643, "learning_rate": 1.7793488575465208e-07, "loss": 0.0023, "step": 26076 }, { "epoch": 12.12366341236634, "grad_norm": 0.1081017553806305, "learning_rate": 1.874112313294069e-07, "loss": 0.0029, "step": 26078 }, { "epoch": 12.124593212459322, "grad_norm": 0.11138558387756348, "learning_rate": 1.9713246713805872e-07, "loss": 0.003, "step": 26080 }, { "epoch": 12.125523012552302, "grad_norm": 0.20127688348293304, "learning_rate": 2.070984972359386e-07, "loss": 0.0059, "step": 26082 }, { "epoch": 12.126452812645281, "grad_norm": 0.3369797170162201, "learning_rate": 2.17309223262327e-07, "loss": 0.0084, "step": 26084 }, { "epoch": 12.127382612738261, "grad_norm": 0.2340410202741623, "learning_rate": 2.2776454444152777e-07, "loss": 0.0032, "step": 26086 }, { "epoch": 12.12831241283124, "grad_norm": 0.10705067217350006, "learning_rate": 2.384643575837204e-07, "loss": 0.002, "step": 26088 }, { "epoch": 12.129242212924222, "grad_norm": 0.05458638444542885, "learning_rate": 2.494085570860422e-07, "loss": 0.0013, "step": 26090 }, { "epoch": 12.130172013017201, "grad_norm": 0.23390284180641174, "learning_rate": 2.605970349337128e-07, "loss": 0.0048, "step": 26092 }, { "epoch": 12.131101813110181, "grad_norm": 0.22883087396621704, "learning_rate": 2.7202968070094707e-07, "loss": 0.0034, "step": 26094 }, { "epoch": 12.13203161320316, "grad_norm": 0.45893511176109314, "learning_rate": 2.8370638155212074e-07, "loss": 0.0045, "step": 26096 }, { "epoch": 12.132961413296142, "grad_norm": 0.1627352237701416, "learning_rate": 2.9562702224296415e-07, "loss": 0.004, "step": 26098 }, { "epoch": 12.133891213389122, "grad_norm": 0.45856547355651855, "learning_rate": 3.0779148512153914e-07, "loss": 0.0032, "step": 26100 }, { "epoch": 12.134821013482101, "grad_norm": 0.1588565856218338, "learning_rate": 3.201996501295379e-07, "loss": 0.0121, "step": 26102 }, { "epoch": 12.135750813575081, "grad_norm": 0.1525639444589615, "learning_rate": 3.328513948032935e-07, "loss": 0.0027, "step": 26104 }, { "epoch": 12.13668061366806, "grad_norm": 0.09403475373983383, "learning_rate": 3.4574659427528133e-07, "loss": 0.0043, "step": 26106 }, { "epoch": 12.137610413761042, "grad_norm": 0.16324973106384277, "learning_rate": 3.588851212750571e-07, "loss": 0.0026, "step": 26108 }, { "epoch": 12.138540213854021, "grad_norm": 0.5661905407905579, "learning_rate": 3.722668461306367e-07, "loss": 0.0053, "step": 26110 }, { "epoch": 12.139470013947001, "grad_norm": 0.06469167023897171, "learning_rate": 3.858916367698585e-07, "loss": 0.0019, "step": 26112 }, { "epoch": 12.14039981403998, "grad_norm": 0.04218466579914093, "learning_rate": 3.997593587215049e-07, "loss": 0.0013, "step": 26114 }, { "epoch": 12.141329614132962, "grad_norm": 0.05904131382703781, "learning_rate": 4.138698751167625e-07, "loss": 0.0035, "step": 26116 }, { "epoch": 12.142259414225942, "grad_norm": 0.10601228475570679, "learning_rate": 4.282230466905013e-07, "loss": 0.0037, "step": 26118 }, { "epoch": 12.143189214318921, "grad_norm": 0.2147350311279297, "learning_rate": 4.4281873178277083e-07, "loss": 0.0047, "step": 26120 }, { "epoch": 12.144119014411901, "grad_norm": 1.236702561378479, "learning_rate": 4.576567863400243e-07, "loss": 0.0106, "step": 26122 }, { "epoch": 12.145048814504882, "grad_norm": 0.22115881741046906, "learning_rate": 4.727370639166145e-07, "loss": 0.0085, "step": 26124 }, { "epoch": 12.145978614597862, "grad_norm": 0.152614563703537, "learning_rate": 4.880594156763646e-07, "loss": 0.0077, "step": 26126 }, { "epoch": 12.146908414690841, "grad_norm": 1.5611858367919922, "learning_rate": 5.036236903938479e-07, "loss": 0.023, "step": 26128 }, { "epoch": 12.147838214783821, "grad_norm": 0.17170608043670654, "learning_rate": 5.194297344558451e-07, "loss": 0.0078, "step": 26130 }, { "epoch": 12.1487680148768, "grad_norm": 0.10758437216281891, "learning_rate": 5.354773918631955e-07, "loss": 0.0142, "step": 26132 }, { "epoch": 12.149697814969782, "grad_norm": 0.10319983959197998, "learning_rate": 5.517665042319625e-07, "loss": 0.0021, "step": 26134 }, { "epoch": 12.150627615062762, "grad_norm": 0.7235544919967651, "learning_rate": 5.682969107951795e-07, "loss": 0.0087, "step": 26136 }, { "epoch": 12.151557415155741, "grad_norm": 0.16721028089523315, "learning_rate": 5.850684484043715e-07, "loss": 0.0033, "step": 26138 }, { "epoch": 12.15248721524872, "grad_norm": 1.1335574388504028, "learning_rate": 6.020809515313085e-07, "loss": 0.0091, "step": 26140 }, { "epoch": 12.153417015341702, "grad_norm": 0.11096800863742828, "learning_rate": 6.193342522694183e-07, "loss": 0.0039, "step": 26142 }, { "epoch": 12.154346815434682, "grad_norm": 0.3421127498149872, "learning_rate": 6.368281803355439e-07, "loss": 0.0033, "step": 26144 }, { "epoch": 12.155276615527661, "grad_norm": 0.2787078619003296, "learning_rate": 6.545625630717585e-07, "loss": 0.0036, "step": 26146 }, { "epoch": 12.156206415620641, "grad_norm": 0.2118760496377945, "learning_rate": 6.725372254468255e-07, "loss": 0.0031, "step": 26148 }, { "epoch": 12.15713621571362, "grad_norm": 0.061376411467790604, "learning_rate": 6.907519900580829e-07, "loss": 0.0039, "step": 26150 }, { "epoch": 12.158066015806602, "grad_norm": 0.4164101779460907, "learning_rate": 7.09206677133117e-07, "loss": 0.0041, "step": 26152 }, { "epoch": 12.158995815899582, "grad_norm": 0.6091434359550476, "learning_rate": 7.279011045316999e-07, "loss": 0.0055, "step": 26154 }, { "epoch": 12.159925615992561, "grad_norm": 0.10558336973190308, "learning_rate": 7.468350877473797e-07, "loss": 0.0031, "step": 26156 }, { "epoch": 12.16085541608554, "grad_norm": 0.7938752174377441, "learning_rate": 7.6600843990926e-07, "loss": 0.0069, "step": 26158 }, { "epoch": 12.161785216178522, "grad_norm": 0.22179186344146729, "learning_rate": 7.854209717842254e-07, "loss": 0.003, "step": 26160 }, { "epoch": 12.162715016271502, "grad_norm": 0.5633471012115479, "learning_rate": 8.05072491778377e-07, "loss": 0.0065, "step": 26162 }, { "epoch": 12.163644816364481, "grad_norm": 0.1596798449754715, "learning_rate": 8.249628059391024e-07, "loss": 0.0025, "step": 26164 }, { "epoch": 12.164574616457461, "grad_norm": 0.1732005923986435, "learning_rate": 8.45091717957119e-07, "loss": 0.0138, "step": 26166 }, { "epoch": 12.165504416550442, "grad_norm": 0.43672165274620056, "learning_rate": 8.654590291681499e-07, "loss": 0.0037, "step": 26168 }, { "epoch": 12.166434216643422, "grad_norm": 0.11993500590324402, "learning_rate": 8.860645385550559e-07, "loss": 0.0026, "step": 26170 }, { "epoch": 12.167364016736402, "grad_norm": 0.09943409264087677, "learning_rate": 9.069080427497289e-07, "loss": 0.0028, "step": 26172 }, { "epoch": 12.168293816829381, "grad_norm": 0.11391273140907288, "learning_rate": 9.279893360352921e-07, "loss": 0.0021, "step": 26174 }, { "epoch": 12.16922361692236, "grad_norm": 0.5806716680526733, "learning_rate": 9.493082103478455e-07, "loss": 0.0057, "step": 26176 }, { "epoch": 12.170153417015342, "grad_norm": 0.10726258903741837, "learning_rate": 9.708644552786546e-07, "loss": 0.0018, "step": 26178 }, { "epoch": 12.171083217108322, "grad_norm": 1.1056922674179077, "learning_rate": 9.926578580763861e-07, "loss": 0.0118, "step": 26180 }, { "epoch": 12.172013017201301, "grad_norm": 0.12616227567195892, "learning_rate": 1.0146882036489046e-06, "loss": 0.0027, "step": 26182 }, { "epoch": 12.172942817294281, "grad_norm": 0.6705018281936646, "learning_rate": 1.036955274565589e-06, "loss": 0.0045, "step": 26184 }, { "epoch": 12.173872617387262, "grad_norm": 0.16268518567085266, "learning_rate": 1.0594588510594406e-06, "loss": 0.0038, "step": 26186 }, { "epoch": 12.174802417480242, "grad_norm": 0.0875280350446701, "learning_rate": 1.0821987110292466e-06, "loss": 0.0031, "step": 26188 }, { "epoch": 12.175732217573222, "grad_norm": 0.07884868234395981, "learning_rate": 1.1051746300417684e-06, "loss": 0.002, "step": 26190 }, { "epoch": 12.176662017666201, "grad_norm": 0.10920016467571259, "learning_rate": 1.1283863813339058e-06, "loss": 0.0082, "step": 26192 }, { "epoch": 12.177591817759183, "grad_norm": 1.36835777759552, "learning_rate": 1.1518337358151515e-06, "loss": 0.0096, "step": 26194 }, { "epoch": 12.178521617852162, "grad_norm": 0.3187304139137268, "learning_rate": 1.175516462069533e-06, "loss": 0.0035, "step": 26196 }, { "epoch": 12.179451417945142, "grad_norm": 0.155853271484375, "learning_rate": 1.1994343263580443e-06, "loss": 0.0033, "step": 26198 }, { "epoch": 12.180381218038121, "grad_norm": 0.272286981344223, "learning_rate": 1.2235870926211333e-06, "loss": 0.0031, "step": 26200 }, { "epoch": 12.181311018131101, "grad_norm": 0.41077104210853577, "learning_rate": 1.247974522480693e-06, "loss": 0.0024, "step": 26202 }, { "epoch": 12.182240818224082, "grad_norm": 0.4436368942260742, "learning_rate": 1.2725963752426317e-06, "loss": 0.0054, "step": 26204 }, { "epoch": 12.183170618317062, "grad_norm": 0.4685962498188019, "learning_rate": 1.2974524078991546e-06, "loss": 0.0147, "step": 26206 }, { "epoch": 12.184100418410042, "grad_norm": 0.1082296147942543, "learning_rate": 1.3225423751313608e-06, "loss": 0.0034, "step": 26208 }, { "epoch": 12.185030218503021, "grad_norm": 0.7123224139213562, "learning_rate": 1.3478660293113451e-06, "loss": 0.0038, "step": 26210 }, { "epoch": 12.185960018596003, "grad_norm": 0.11998046934604645, "learning_rate": 1.373423120504871e-06, "loss": 0.002, "step": 26212 }, { "epoch": 12.186889818688982, "grad_norm": 0.11645714938640594, "learning_rate": 1.3992133964737608e-06, "loss": 0.0023, "step": 26214 }, { "epoch": 12.187819618781962, "grad_norm": 0.2896559536457062, "learning_rate": 1.4252366026784058e-06, "loss": 0.0059, "step": 26216 }, { "epoch": 12.188749418874941, "grad_norm": 0.04061846807599068, "learning_rate": 1.4514924822802053e-06, "loss": 0.003, "step": 26218 }, { "epoch": 12.189679218967921, "grad_norm": 0.09420754760503769, "learning_rate": 1.477980776144346e-06, "loss": 0.0033, "step": 26220 }, { "epoch": 12.190609019060902, "grad_norm": 0.08674295991659164, "learning_rate": 1.5047012228420051e-06, "loss": 0.002, "step": 26222 }, { "epoch": 12.191538819153882, "grad_norm": 0.16422860324382782, "learning_rate": 1.5316535586531585e-06, "loss": 0.0038, "step": 26224 }, { "epoch": 12.192468619246862, "grad_norm": 0.29283928871154785, "learning_rate": 1.5588375175690721e-06, "loss": 0.0025, "step": 26226 }, { "epoch": 12.193398419339841, "grad_norm": 0.1203363761305809, "learning_rate": 1.5862528312951673e-06, "loss": 0.0027, "step": 26228 }, { "epoch": 12.194328219432823, "grad_norm": 0.101221464574337, "learning_rate": 1.6138992292533064e-06, "loss": 0.0075, "step": 26230 }, { "epoch": 12.195258019525802, "grad_norm": 0.22295816242694855, "learning_rate": 1.6417764385846405e-06, "loss": 0.0032, "step": 26232 }, { "epoch": 12.196187819618782, "grad_norm": 0.39818769693374634, "learning_rate": 1.6698841841524919e-06, "loss": 0.004, "step": 26234 }, { "epoch": 12.197117619711761, "grad_norm": 0.17776614427566528, "learning_rate": 1.698222188544695e-06, "loss": 0.0028, "step": 26236 }, { "epoch": 12.198047419804743, "grad_norm": 0.7002178430557251, "learning_rate": 1.7267901720766555e-06, "loss": 0.0048, "step": 26238 }, { "epoch": 12.198977219897722, "grad_norm": 0.5955983400344849, "learning_rate": 1.755587852793713e-06, "loss": 0.0057, "step": 26240 }, { "epoch": 12.199907019990702, "grad_norm": 0.13069206476211548, "learning_rate": 1.7846149464745797e-06, "loss": 0.0022, "step": 26242 }, { "epoch": 12.200836820083682, "grad_norm": 0.30738112330436707, "learning_rate": 1.8138711666334954e-06, "loss": 0.0032, "step": 26244 }, { "epoch": 12.201766620176661, "grad_norm": 0.18521112203598022, "learning_rate": 1.8433562245233093e-06, "loss": 0.0019, "step": 26246 }, { "epoch": 12.202696420269643, "grad_norm": 0.13332664966583252, "learning_rate": 1.8730698291385401e-06, "loss": 0.0016, "step": 26248 }, { "epoch": 12.203626220362622, "grad_norm": 0.08188112080097198, "learning_rate": 1.9030116872178363e-06, "loss": 0.0022, "step": 26250 }, { "epoch": 12.204556020455602, "grad_norm": 1.1084871292114258, "learning_rate": 1.93318150324708e-06, "loss": 0.005, "step": 26252 }, { "epoch": 12.205485820548581, "grad_norm": 0.16489572823047638, "learning_rate": 1.9635789794625065e-06, "loss": 0.0035, "step": 26254 }, { "epoch": 12.206415620641563, "grad_norm": 0.038033634424209595, "learning_rate": 1.994203815853223e-06, "loss": 0.0057, "step": 26256 }, { "epoch": 12.207345420734542, "grad_norm": 0.44770583510398865, "learning_rate": 2.0250557101644633e-06, "loss": 0.0052, "step": 26258 }, { "epoch": 12.208275220827522, "grad_norm": 0.05130356177687645, "learning_rate": 2.056134357900415e-06, "loss": 0.0017, "step": 26260 }, { "epoch": 12.209205020920502, "grad_norm": 0.24806493520736694, "learning_rate": 2.08743945232751e-06, "loss": 0.0081, "step": 26262 }, { "epoch": 12.210134821013481, "grad_norm": 0.5428979992866516, "learning_rate": 2.118970684477036e-06, "loss": 0.0062, "step": 26264 }, { "epoch": 12.211064621106463, "grad_norm": 0.5559217929840088, "learning_rate": 2.150727743148458e-06, "loss": 0.0061, "step": 26266 }, { "epoch": 12.211994421199442, "grad_norm": 0.09402748942375183, "learning_rate": 2.182710314912436e-06, "loss": 0.0023, "step": 26268 }, { "epoch": 12.212924221292422, "grad_norm": 0.13315774500370026, "learning_rate": 2.214918084113892e-06, "loss": 0.0033, "step": 26270 }, { "epoch": 12.213854021385401, "grad_norm": 0.2718457579612732, "learning_rate": 2.2473507328750717e-06, "loss": 0.0036, "step": 26272 }, { "epoch": 12.214783821478383, "grad_norm": 0.23269113898277283, "learning_rate": 2.2800079410989793e-06, "loss": 0.0041, "step": 26274 }, { "epoch": 12.215713621571362, "grad_norm": 0.07715395838022232, "learning_rate": 2.3128893864720745e-06, "loss": 0.0017, "step": 26276 }, { "epoch": 12.216643421664342, "grad_norm": 0.07302632927894592, "learning_rate": 2.3459947444677685e-06, "loss": 0.0032, "step": 26278 }, { "epoch": 12.217573221757322, "grad_norm": 0.07811291515827179, "learning_rate": 2.3793236883494678e-06, "loss": 0.0058, "step": 26280 }, { "epoch": 12.218503021850303, "grad_norm": 0.4296873211860657, "learning_rate": 2.4128758891741e-06, "loss": 0.0055, "step": 26282 }, { "epoch": 12.219432821943283, "grad_norm": 0.2273838371038437, "learning_rate": 2.4466510157949164e-06, "loss": 0.0032, "step": 26284 }, { "epoch": 12.220362622036262, "grad_norm": 0.3357243537902832, "learning_rate": 2.4806487348649773e-06, "loss": 0.0044, "step": 26286 }, { "epoch": 12.221292422129242, "grad_norm": 0.12603585422039032, "learning_rate": 2.514868710840668e-06, "loss": 0.0044, "step": 26288 }, { "epoch": 12.222222222222221, "grad_norm": 0.3397084176540375, "learning_rate": 2.5493106059845705e-06, "loss": 0.0032, "step": 26290 }, { "epoch": 12.223152022315203, "grad_norm": 1.757649302482605, "learning_rate": 2.5839740803690812e-06, "loss": 0.0074, "step": 26292 }, { "epoch": 12.224081822408182, "grad_norm": 0.03819978982210159, "learning_rate": 2.6188587918797057e-06, "loss": 0.0032, "step": 26294 }, { "epoch": 12.225011622501162, "grad_norm": 0.050310637801885605, "learning_rate": 2.6539643962184205e-06, "loss": 0.0017, "step": 26296 }, { "epoch": 12.225941422594142, "grad_norm": 0.06525000929832458, "learning_rate": 2.6892905469070842e-06, "loss": 0.0016, "step": 26298 }, { "epoch": 12.226871222687123, "grad_norm": 0.1005474403500557, "learning_rate": 2.7248368952907727e-06, "loss": 0.0024, "step": 26300 }, { "epoch": 12.227801022780103, "grad_norm": 1.2091323137283325, "learning_rate": 2.760603090541539e-06, "loss": 0.0083, "step": 26302 }, { "epoch": 12.228730822873082, "grad_norm": 0.18149244785308838, "learning_rate": 2.796588779661391e-06, "loss": 0.0024, "step": 26304 }, { "epoch": 12.229660622966062, "grad_norm": 0.19340789318084717, "learning_rate": 2.8327936074860284e-06, "loss": 0.0022, "step": 26306 }, { "epoch": 12.230590423059041, "grad_norm": 0.5545610785484314, "learning_rate": 2.8692172166885795e-06, "loss": 0.0053, "step": 26308 }, { "epoch": 12.231520223152023, "grad_norm": 0.06637880951166153, "learning_rate": 2.905859247782641e-06, "loss": 0.0022, "step": 26310 }, { "epoch": 12.232450023245002, "grad_norm": 0.2650730311870575, "learning_rate": 2.9427193391261623e-06, "loss": 0.0023, "step": 26312 }, { "epoch": 12.233379823337982, "grad_norm": 0.2693936228752136, "learning_rate": 2.9797971269248407e-06, "loss": 0.0042, "step": 26314 }, { "epoch": 12.234309623430962, "grad_norm": 0.05780082195997238, "learning_rate": 3.0170922452360423e-06, "loss": 0.0039, "step": 26316 }, { "epoch": 12.235239423523943, "grad_norm": 0.0713954046368599, "learning_rate": 3.0546043259719244e-06, "loss": 0.0024, "step": 26318 }, { "epoch": 12.236169223616923, "grad_norm": 0.21784181892871857, "learning_rate": 3.0923329989033107e-06, "loss": 0.0044, "step": 26320 }, { "epoch": 12.237099023709902, "grad_norm": 0.07873022556304932, "learning_rate": 3.1302778916636858e-06, "loss": 0.0031, "step": 26322 }, { "epoch": 12.238028823802882, "grad_norm": 1.1275628805160522, "learning_rate": 3.1684386297520215e-06, "loss": 0.0061, "step": 26324 }, { "epoch": 12.238958623895863, "grad_norm": 0.36778369545936584, "learning_rate": 3.2068148365373225e-06, "loss": 0.0038, "step": 26326 }, { "epoch": 12.239888423988843, "grad_norm": 0.3624846935272217, "learning_rate": 3.2454061332618357e-06, "loss": 0.0051, "step": 26328 }, { "epoch": 12.240818224081822, "grad_norm": 0.5878868699073792, "learning_rate": 3.284212139045215e-06, "loss": 0.0069, "step": 26330 }, { "epoch": 12.241748024174802, "grad_norm": 0.1142360046505928, "learning_rate": 3.323232470887763e-06, "loss": 0.0023, "step": 26332 }, { "epoch": 12.242677824267782, "grad_norm": 0.11021379381418228, "learning_rate": 3.3624667436744717e-06, "loss": 0.0021, "step": 26334 }, { "epoch": 12.243607624360763, "grad_norm": 0.1972733587026596, "learning_rate": 3.4019145701790824e-06, "loss": 0.0033, "step": 26336 }, { "epoch": 12.244537424453743, "grad_norm": 0.11900965124368668, "learning_rate": 3.441575561067387e-06, "loss": 0.0083, "step": 26338 }, { "epoch": 12.245467224546722, "grad_norm": 0.6107234358787537, "learning_rate": 3.481449324901326e-06, "loss": 0.0054, "step": 26340 }, { "epoch": 12.246397024639702, "grad_norm": 0.4668284058570862, "learning_rate": 3.521535468143125e-06, "loss": 0.0048, "step": 26342 }, { "epoch": 12.247326824732683, "grad_norm": 0.16573606431484222, "learning_rate": 3.561833595158648e-06, "loss": 0.0031, "step": 26344 }, { "epoch": 12.248256624825663, "grad_norm": 0.06990740448236465, "learning_rate": 3.60234330822165e-06, "loss": 0.0019, "step": 26346 }, { "epoch": 12.249186424918642, "grad_norm": 0.13783058524131775, "learning_rate": 3.643064207517615e-06, "loss": 0.0026, "step": 26348 }, { "epoch": 12.250116225011622, "grad_norm": 0.052676837891340256, "learning_rate": 3.6839958911477114e-06, "loss": 0.0024, "step": 26350 }, { "epoch": 12.251046025104603, "grad_norm": 0.11365959048271179, "learning_rate": 3.7251379551327397e-06, "loss": 0.0035, "step": 26352 }, { "epoch": 12.251975825197583, "grad_norm": 0.5625682473182678, "learning_rate": 3.7664899934170518e-06, "loss": 0.0031, "step": 26354 }, { "epoch": 12.252905625290563, "grad_norm": 0.18613581359386444, "learning_rate": 3.808051597872905e-06, "loss": 0.0036, "step": 26356 }, { "epoch": 12.253835425383542, "grad_norm": 0.7639160752296448, "learning_rate": 3.8498223583039535e-06, "loss": 0.0089, "step": 26358 }, { "epoch": 12.254765225476522, "grad_norm": 0.1879347562789917, "learning_rate": 3.891801862449558e-06, "loss": 0.002, "step": 26360 }, { "epoch": 12.255695025569503, "grad_norm": 0.04960170015692711, "learning_rate": 3.93398969598915e-06, "loss": 0.0026, "step": 26362 }, { "epoch": 12.256624825662483, "grad_norm": 0.18176895380020142, "learning_rate": 3.976385442545751e-06, "loss": 0.0031, "step": 26364 }, { "epoch": 12.257554625755462, "grad_norm": 0.13067200779914856, "learning_rate": 4.018988683690455e-06, "loss": 0.0033, "step": 26366 }, { "epoch": 12.258484425848442, "grad_norm": 0.1448715478181839, "learning_rate": 4.061798998946381e-06, "loss": 0.0043, "step": 26368 }, { "epoch": 12.259414225941423, "grad_norm": 0.18695282936096191, "learning_rate": 4.104815965793188e-06, "loss": 0.0031, "step": 26370 }, { "epoch": 12.260344026034403, "grad_norm": 1.03421950340271, "learning_rate": 4.148039159670686e-06, "loss": 0.0094, "step": 26372 }, { "epoch": 12.261273826127383, "grad_norm": 0.09724032878875732, "learning_rate": 4.191468153983306e-06, "loss": 0.0037, "step": 26374 }, { "epoch": 12.262203626220362, "grad_norm": 1.2210289239883423, "learning_rate": 4.235102520104696e-06, "loss": 0.0202, "step": 26376 }, { "epoch": 12.263133426313342, "grad_norm": 0.1311277151107788, "learning_rate": 4.278941827380986e-06, "loss": 0.0029, "step": 26378 }, { "epoch": 12.264063226406323, "grad_norm": 1.0049258470535278, "learning_rate": 4.322985643136012e-06, "loss": 0.0129, "step": 26380 }, { "epoch": 12.264993026499303, "grad_norm": 0.9608583450317383, "learning_rate": 4.36723353267499e-06, "loss": 0.0048, "step": 26382 }, { "epoch": 12.265922826592282, "grad_norm": 0.10015928745269775, "learning_rate": 4.411685059289316e-06, "loss": 0.0027, "step": 26384 }, { "epoch": 12.266852626685262, "grad_norm": 0.47132930159568787, "learning_rate": 4.456339784260268e-06, "loss": 0.0035, "step": 26386 }, { "epoch": 12.267782426778243, "grad_norm": 0.5747683048248291, "learning_rate": 4.501197266863637e-06, "loss": 0.0035, "step": 26388 }, { "epoch": 12.268712226871223, "grad_norm": 0.20156945288181305, "learning_rate": 4.546257064374379e-06, "loss": 0.0052, "step": 26390 }, { "epoch": 12.269642026964203, "grad_norm": 0.04925752058625221, "learning_rate": 4.591518732070391e-06, "loss": 0.0017, "step": 26392 }, { "epoch": 12.270571827057182, "grad_norm": 0.09879033267498016, "learning_rate": 4.636981823237175e-06, "loss": 0.0024, "step": 26394 }, { "epoch": 12.271501627150164, "grad_norm": 0.5323134660720825, "learning_rate": 4.682645889172583e-06, "loss": 0.0096, "step": 26396 }, { "epoch": 12.272431427243143, "grad_norm": 0.0783357247710228, "learning_rate": 4.728510479190618e-06, "loss": 0.0026, "step": 26398 }, { "epoch": 12.273361227336123, "grad_norm": 0.2307867705821991, "learning_rate": 4.774575140626292e-06, "loss": 0.0018, "step": 26400 }, { "epoch": 12.274291027429102, "grad_norm": 0.1269225776195526, "learning_rate": 4.820839418839887e-06, "loss": 0.0016, "step": 26402 }, { "epoch": 12.275220827522082, "grad_norm": 0.30123987793922424, "learning_rate": 4.867302857221976e-06, "loss": 0.0029, "step": 26404 }, { "epoch": 12.276150627615063, "grad_norm": 0.13751055300235748, "learning_rate": 4.913964997196857e-06, "loss": 0.0023, "step": 26406 }, { "epoch": 12.277080427708043, "grad_norm": 0.918856143951416, "learning_rate": 4.960825378228051e-06, "loss": 0.0056, "step": 26408 }, { "epoch": 12.278010227801023, "grad_norm": 0.06592503190040588, "learning_rate": 5.0078835378227255e-06, "loss": 0.0029, "step": 26410 }, { "epoch": 12.278940027894002, "grad_norm": 0.4799760580062866, "learning_rate": 5.055139011535737e-06, "loss": 0.0072, "step": 26412 }, { "epoch": 12.279869827986984, "grad_norm": 0.14057151973247528, "learning_rate": 5.10259133297464e-06, "loss": 0.003, "step": 26414 }, { "epoch": 12.280799628079963, "grad_norm": 0.27453917264938354, "learning_rate": 5.150240033804067e-06, "loss": 0.0117, "step": 26416 }, { "epoch": 12.281729428172943, "grad_norm": 0.11126112192869186, "learning_rate": 5.1980846437507996e-06, "loss": 0.0038, "step": 26418 }, { "epoch": 12.282659228265922, "grad_norm": 0.1345347762107849, "learning_rate": 5.246124690607739e-06, "loss": 0.0023, "step": 26420 }, { "epoch": 12.283589028358902, "grad_norm": 0.1340733915567398, "learning_rate": 5.294359700238915e-06, "loss": 0.0018, "step": 26422 }, { "epoch": 12.284518828451883, "grad_norm": 0.4518483579158783, "learning_rate": 5.342789196584466e-06, "loss": 0.0051, "step": 26424 }, { "epoch": 12.285448628544863, "grad_norm": 0.35701414942741394, "learning_rate": 5.391412701664712e-06, "loss": 0.0019, "step": 26426 }, { "epoch": 12.286378428637843, "grad_norm": 0.25138095021247864, "learning_rate": 5.4402297355851736e-06, "loss": 0.0025, "step": 26428 }, { "epoch": 12.287308228730822, "grad_norm": 0.03545770049095154, "learning_rate": 5.4892398165417685e-06, "loss": 0.0012, "step": 26430 }, { "epoch": 12.288238028823804, "grad_norm": 0.4181731939315796, "learning_rate": 5.538442460824452e-06, "loss": 0.0039, "step": 26432 }, { "epoch": 12.289167828916783, "grad_norm": 0.09419941157102585, "learning_rate": 5.587837182823096e-06, "loss": 0.003, "step": 26434 }, { "epoch": 12.290097629009763, "grad_norm": 0.20767056941986084, "learning_rate": 5.637423495031629e-06, "loss": 0.0022, "step": 26436 }, { "epoch": 12.291027429102742, "grad_norm": 0.05750693753361702, "learning_rate": 5.687200908053388e-06, "loss": 0.0025, "step": 26438 }, { "epoch": 12.291957229195724, "grad_norm": 0.06808987259864807, "learning_rate": 5.7371689306052945e-06, "loss": 0.0066, "step": 26440 }, { "epoch": 12.292887029288703, "grad_norm": 0.07359931617975235, "learning_rate": 5.787327069523022e-06, "loss": 0.0026, "step": 26442 }, { "epoch": 12.293816829381683, "grad_norm": 0.13172511756420135, "learning_rate": 5.837674829766215e-06, "loss": 0.0034, "step": 26444 }, { "epoch": 12.294746629474663, "grad_norm": 0.1974816918373108, "learning_rate": 5.888211714422698e-06, "loss": 0.0026, "step": 26446 }, { "epoch": 12.295676429567642, "grad_norm": 0.09760010242462158, "learning_rate": 5.938937224713701e-06, "loss": 0.0021, "step": 26448 }, { "epoch": 12.296606229660624, "grad_norm": 0.7805627584457397, "learning_rate": 5.9898508599991505e-06, "loss": 0.0116, "step": 26450 }, { "epoch": 12.297536029753603, "grad_norm": 0.16768983006477356, "learning_rate": 6.040952117781905e-06, "loss": 0.0022, "step": 26452 }, { "epoch": 12.298465829846583, "grad_norm": 0.1985188126564026, "learning_rate": 6.0922404937131765e-06, "loss": 0.0019, "step": 26454 }, { "epoch": 12.299395629939562, "grad_norm": 0.2383720576763153, "learning_rate": 6.143715481597291e-06, "loss": 0.008, "step": 26456 }, { "epoch": 12.300325430032544, "grad_norm": 0.5106719136238098, "learning_rate": 6.195376573397242e-06, "loss": 0.0033, "step": 26458 }, { "epoch": 12.301255230125523, "grad_norm": 0.1057654395699501, "learning_rate": 6.247223259238564e-06, "loss": 0.0026, "step": 26460 }, { "epoch": 12.302185030218503, "grad_norm": 0.12851841747760773, "learning_rate": 6.299255027415401e-06, "loss": 0.0026, "step": 26462 }, { "epoch": 12.303114830311483, "grad_norm": 0.22445566952228546, "learning_rate": 6.351471364395435e-06, "loss": 0.0024, "step": 26464 }, { "epoch": 12.304044630404462, "grad_norm": 0.0700046494603157, "learning_rate": 6.403871754824389e-06, "loss": 0.0024, "step": 26466 }, { "epoch": 12.304974430497444, "grad_norm": 0.07956920564174652, "learning_rate": 6.456455681531561e-06, "loss": 0.0046, "step": 26468 }, { "epoch": 12.305904230590423, "grad_norm": 0.10811097174882889, "learning_rate": 6.509222625534706e-06, "loss": 0.003, "step": 26470 }, { "epoch": 12.306834030683403, "grad_norm": 0.23354493081569672, "learning_rate": 6.5621720660456285e-06, "loss": 0.0028, "step": 26472 }, { "epoch": 12.307763830776382, "grad_norm": 0.09616439044475555, "learning_rate": 6.615303480474606e-06, "loss": 0.0027, "step": 26474 }, { "epoch": 12.308693630869364, "grad_norm": 0.2118055820465088, "learning_rate": 6.668616344435908e-06, "loss": 0.0034, "step": 26476 }, { "epoch": 12.309623430962343, "grad_norm": 0.2504933774471283, "learning_rate": 6.722110131753332e-06, "loss": 0.0026, "step": 26478 }, { "epoch": 12.310553231055323, "grad_norm": 0.5122504830360413, "learning_rate": 6.775784314464673e-06, "loss": 0.0042, "step": 26480 }, { "epoch": 12.311483031148303, "grad_norm": 0.13104529678821564, "learning_rate": 6.829638362827295e-06, "loss": 0.003, "step": 26482 }, { "epoch": 12.312412831241284, "grad_norm": 0.27164262533187866, "learning_rate": 6.883671745323726e-06, "loss": 0.0035, "step": 26484 }, { "epoch": 12.313342631334264, "grad_norm": 0.14622651040554047, "learning_rate": 6.937883928666298e-06, "loss": 0.0018, "step": 26486 }, { "epoch": 12.314272431427243, "grad_norm": 0.11267964541912079, "learning_rate": 6.992274377802398e-06, "loss": 0.0016, "step": 26488 }, { "epoch": 12.315202231520223, "grad_norm": 0.22095200419425964, "learning_rate": 7.046842555920254e-06, "loss": 0.0047, "step": 26490 }, { "epoch": 12.316132031613202, "grad_norm": 0.30390334129333496, "learning_rate": 7.101587924454224e-06, "loss": 0.0056, "step": 26492 }, { "epoch": 12.317061831706184, "grad_norm": 0.06918802857398987, "learning_rate": 7.156509943089496e-06, "loss": 0.0026, "step": 26494 }, { "epoch": 12.317991631799163, "grad_norm": 0.06420820206403732, "learning_rate": 7.211608069767799e-06, "loss": 0.0027, "step": 26496 }, { "epoch": 12.318921431892143, "grad_norm": 0.08392779529094696, "learning_rate": 7.266881760693113e-06, "loss": 0.0051, "step": 26498 }, { "epoch": 12.319851231985123, "grad_norm": 0.5301122665405273, "learning_rate": 7.3223304703363025e-06, "loss": 0.0039, "step": 26500 }, { "epoch": 12.320781032078104, "grad_norm": 0.1632889211177826, "learning_rate": 7.377953651440983e-06, "loss": 0.0027, "step": 26502 }, { "epoch": 12.321710832171084, "grad_norm": 0.18525707721710205, "learning_rate": 7.433750755028689e-06, "loss": 0.0021, "step": 26504 }, { "epoch": 12.322640632264063, "grad_norm": 0.08374417573213577, "learning_rate": 7.489721230404788e-06, "loss": 0.0025, "step": 26506 }, { "epoch": 12.323570432357043, "grad_norm": 0.07595325261354446, "learning_rate": 7.545864525163159e-06, "loss": 0.0013, "step": 26508 }, { "epoch": 12.324500232450024, "grad_norm": 0.08394964784383774, "learning_rate": 7.602180085192017e-06, "loss": 0.0098, "step": 26510 }, { "epoch": 12.325430032543004, "grad_norm": 0.18193742632865906, "learning_rate": 7.65866735467978e-06, "loss": 0.0027, "step": 26512 }, { "epoch": 12.326359832635983, "grad_norm": 0.46142855286598206, "learning_rate": 7.7153257761199e-06, "loss": 0.004, "step": 26514 }, { "epoch": 12.327289632728963, "grad_norm": 0.16747045516967773, "learning_rate": 7.77215479031625e-06, "loss": 0.0023, "step": 26516 }, { "epoch": 12.328219432821943, "grad_norm": 0.12365002930164337, "learning_rate": 7.829153836389783e-06, "loss": 0.003, "step": 26518 }, { "epoch": 12.329149232914924, "grad_norm": 0.12294799089431763, "learning_rate": 7.886322351782801e-06, "loss": 0.0016, "step": 26520 }, { "epoch": 12.330079033007904, "grad_norm": 0.027568982914090157, "learning_rate": 7.943659772265135e-06, "loss": 0.0014, "step": 26522 }, { "epoch": 12.331008833100883, "grad_norm": 0.17706654965877533, "learning_rate": 8.001165531939467e-06, "loss": 0.0065, "step": 26524 }, { "epoch": 12.331938633193863, "grad_norm": 0.6982369422912598, "learning_rate": 8.05883906324742e-06, "loss": 0.0122, "step": 26526 }, { "epoch": 12.332868433286844, "grad_norm": 0.8613461256027222, "learning_rate": 8.116679796974393e-06, "loss": 0.0045, "step": 26528 }, { "epoch": 12.333798233379824, "grad_norm": 0.09767956286668777, "learning_rate": 8.17468716225557e-06, "loss": 0.0028, "step": 26530 }, { "epoch": 12.334728033472803, "grad_norm": 0.2543191909790039, "learning_rate": 8.232860586581931e-06, "loss": 0.0029, "step": 26532 }, { "epoch": 12.335657833565783, "grad_norm": 0.15242616832256317, "learning_rate": 8.291199495805135e-06, "loss": 0.0047, "step": 26534 }, { "epoch": 12.336587633658763, "grad_norm": 0.20624883472919464, "learning_rate": 8.349703314143567e-06, "loss": 0.0029, "step": 26536 }, { "epoch": 12.337517433751744, "grad_norm": 1.083548903465271, "learning_rate": 8.408371464188424e-06, "loss": 0.007, "step": 26538 }, { "epoch": 12.338447233844724, "grad_norm": 0.7062238454818726, "learning_rate": 8.467203366908751e-06, "loss": 0.0148, "step": 26540 }, { "epoch": 12.339377033937703, "grad_norm": 0.4707393944263458, "learning_rate": 8.526198441657156e-06, "loss": 0.0033, "step": 26542 }, { "epoch": 12.340306834030683, "grad_norm": 0.15902677178382874, "learning_rate": 8.585356106176063e-06, "loss": 0.0023, "step": 26544 }, { "epoch": 12.341236634123664, "grad_norm": 0.0595090389251709, "learning_rate": 8.644675776603474e-06, "loss": 0.0029, "step": 26546 }, { "epoch": 12.342166434216644, "grad_norm": 0.05483639985322952, "learning_rate": 8.704156867478061e-06, "loss": 0.0014, "step": 26548 }, { "epoch": 12.343096234309623, "grad_norm": 0.39405685663223267, "learning_rate": 8.763798791745335e-06, "loss": 0.0037, "step": 26550 }, { "epoch": 12.344026034402603, "grad_norm": 0.0690576583147049, "learning_rate": 8.823600960763847e-06, "loss": 0.0029, "step": 26552 }, { "epoch": 12.344955834495583, "grad_norm": 0.1351536065340042, "learning_rate": 8.883562784310202e-06, "loss": 0.0019, "step": 26554 }, { "epoch": 12.345885634588564, "grad_norm": 0.4134317934513092, "learning_rate": 8.943683670585407e-06, "loss": 0.0051, "step": 26556 }, { "epoch": 12.346815434681544, "grad_norm": 0.7750957012176514, "learning_rate": 9.003963026220448e-06, "loss": 0.0123, "step": 26558 }, { "epoch": 12.347745234774523, "grad_norm": 0.2624944746494293, "learning_rate": 9.064400256282697e-06, "loss": 0.0041, "step": 26560 }, { "epoch": 12.348675034867503, "grad_norm": 0.1599166989326477, "learning_rate": 9.124994764280956e-06, "loss": 0.0053, "step": 26562 }, { "epoch": 12.349604834960484, "grad_norm": 0.14544793963432312, "learning_rate": 9.185745952171754e-06, "loss": 0.0024, "step": 26564 }, { "epoch": 12.350534635053464, "grad_norm": 0.09730830043554306, "learning_rate": 9.246653220365668e-06, "loss": 0.0018, "step": 26566 }, { "epoch": 12.351464435146443, "grad_norm": 0.2514103353023529, "learning_rate": 9.307715967732554e-06, "loss": 0.0023, "step": 26568 }, { "epoch": 12.352394235239423, "grad_norm": 0.4210396409034729, "learning_rate": 9.368933591607327e-06, "loss": 0.0082, "step": 26570 }, { "epoch": 12.353324035332404, "grad_norm": 0.3182884454727173, "learning_rate": 9.430305487797178e-06, "loss": 0.0028, "step": 26572 }, { "epoch": 12.354253835425384, "grad_norm": 0.07048936933279037, "learning_rate": 9.491831050586118e-06, "loss": 0.0027, "step": 26574 }, { "epoch": 12.355183635518364, "grad_norm": 0.3837098777294159, "learning_rate": 9.55350967274169e-06, "loss": 0.0036, "step": 26576 }, { "epoch": 12.356113435611343, "grad_norm": 0.41627436876296997, "learning_rate": 9.615340745520655e-06, "loss": 0.0032, "step": 26578 }, { "epoch": 12.357043235704323, "grad_norm": 0.140743687748909, "learning_rate": 9.677323658675562e-06, "loss": 0.0027, "step": 26580 }, { "epoch": 12.357973035797304, "grad_norm": 0.5505696535110474, "learning_rate": 9.73945780045994e-06, "loss": 0.004, "step": 26582 }, { "epoch": 12.358902835890284, "grad_norm": 0.058880310505628586, "learning_rate": 9.801742557634757e-06, "loss": 0.0024, "step": 26584 }, { "epoch": 12.359832635983263, "grad_norm": 0.525588870048523, "learning_rate": 9.864177315474892e-06, "loss": 0.0054, "step": 26586 }, { "epoch": 12.360762436076243, "grad_norm": 0.23365986347198486, "learning_rate": 9.92676145777434e-06, "loss": 0.0039, "step": 26588 }, { "epoch": 12.361692236169224, "grad_norm": 0.8295428156852722, "learning_rate": 9.989494366852887e-06, "loss": 0.0073, "step": 26590 }, { "epoch": 12.362622036262204, "grad_norm": 0.2523753345012665, "learning_rate": 1.0052375423561906e-05, "loss": 0.0018, "step": 26592 }, { "epoch": 12.363551836355184, "grad_norm": 0.12729457020759583, "learning_rate": 1.0115404007291037e-05, "loss": 0.0027, "step": 26594 }, { "epoch": 12.364481636448163, "grad_norm": 0.8586771488189697, "learning_rate": 1.0178579495973605e-05, "loss": 0.0177, "step": 26596 }, { "epoch": 12.365411436541144, "grad_norm": 0.41557055711746216, "learning_rate": 1.0241901266092607e-05, "loss": 0.004, "step": 26598 }, { "epoch": 12.366341236634124, "grad_norm": 0.0670454353094101, "learning_rate": 1.0305368692688174e-05, "loss": 0.0021, "step": 26600 }, { "epoch": 12.367271036727104, "grad_norm": 0.33631178736686707, "learning_rate": 1.0368981149362285e-05, "loss": 0.0066, "step": 26602 }, { "epoch": 12.368200836820083, "grad_norm": 0.19878441095352173, "learning_rate": 1.0432738008285524e-05, "loss": 0.0017, "step": 26604 }, { "epoch": 12.369130636913063, "grad_norm": 0.07572168111801147, "learning_rate": 1.0496638640203722e-05, "loss": 0.0038, "step": 26606 }, { "epoch": 12.370060437006044, "grad_norm": 0.07621597498655319, "learning_rate": 1.0560682414443298e-05, "loss": 0.0062, "step": 26608 }, { "epoch": 12.370990237099024, "grad_norm": 0.18777340650558472, "learning_rate": 1.0624868698918057e-05, "loss": 0.0033, "step": 26610 }, { "epoch": 12.371920037192004, "grad_norm": 1.309862494468689, "learning_rate": 1.0689196860135136e-05, "loss": 0.0119, "step": 26612 }, { "epoch": 12.372849837284983, "grad_norm": 0.2409597486257553, "learning_rate": 1.0753666263201846e-05, "loss": 0.0033, "step": 26614 }, { "epoch": 12.373779637377964, "grad_norm": 0.15104001760482788, "learning_rate": 1.0818276271831059e-05, "loss": 0.0047, "step": 26616 }, { "epoch": 12.374709437470944, "grad_norm": 0.17970754206180573, "learning_rate": 1.0883026248347929e-05, "loss": 0.0027, "step": 26618 }, { "epoch": 12.375639237563924, "grad_norm": 0.9353024363517761, "learning_rate": 1.0947915553696625e-05, "loss": 0.0198, "step": 26620 }, { "epoch": 12.376569037656903, "grad_norm": 0.4488253891468048, "learning_rate": 1.1012943547445895e-05, "loss": 0.0053, "step": 26622 }, { "epoch": 12.377498837749883, "grad_norm": 0.20887775719165802, "learning_rate": 1.1078109587795401e-05, "loss": 0.0023, "step": 26624 }, { "epoch": 12.378428637842864, "grad_norm": 0.734769344329834, "learning_rate": 1.1143413031582638e-05, "loss": 0.0091, "step": 26626 }, { "epoch": 12.379358437935844, "grad_norm": 0.21189133822917938, "learning_rate": 1.1208853234289268e-05, "loss": 0.0023, "step": 26628 }, { "epoch": 12.380288238028823, "grad_norm": 0.1592886745929718, "learning_rate": 1.1274429550046763e-05, "loss": 0.0025, "step": 26630 }, { "epoch": 12.381218038121803, "grad_norm": 0.3567931354045868, "learning_rate": 1.1340141331643213e-05, "loss": 0.0035, "step": 26632 }, { "epoch": 12.382147838214784, "grad_norm": 0.13797777891159058, "learning_rate": 1.1405987930530162e-05, "loss": 0.0031, "step": 26634 }, { "epoch": 12.383077638307764, "grad_norm": 0.04850143939256668, "learning_rate": 1.147196869682811e-05, "loss": 0.0016, "step": 26636 }, { "epoch": 12.384007438400744, "grad_norm": 0.27998483180999756, "learning_rate": 1.1538082979333386e-05, "loss": 0.0044, "step": 26638 }, { "epoch": 12.384937238493723, "grad_norm": 0.14860808849334717, "learning_rate": 1.160433012552501e-05, "loss": 0.0026, "step": 26640 }, { "epoch": 12.385867038586705, "grad_norm": 0.24976834654808044, "learning_rate": 1.1670709481570243e-05, "loss": 0.0025, "step": 26642 }, { "epoch": 12.386796838679684, "grad_norm": 0.10806696861982346, "learning_rate": 1.1737220392331639e-05, "loss": 0.003, "step": 26644 }, { "epoch": 12.387726638772664, "grad_norm": 1.1126234531402588, "learning_rate": 1.1803862201373215e-05, "loss": 0.0119, "step": 26646 }, { "epoch": 12.388656438865643, "grad_norm": 0.3460268974304199, "learning_rate": 1.1870634250967515e-05, "loss": 0.0033, "step": 26648 }, { "epoch": 12.389586238958623, "grad_norm": 0.34247174859046936, "learning_rate": 1.1937535882101371e-05, "loss": 0.004, "step": 26650 }, { "epoch": 12.390516039051604, "grad_norm": 0.2734858989715576, "learning_rate": 1.200456643448224e-05, "loss": 0.0028, "step": 26652 }, { "epoch": 12.391445839144584, "grad_norm": 0.33534756302833557, "learning_rate": 1.2071725246546082e-05, "loss": 0.0046, "step": 26654 }, { "epoch": 12.392375639237564, "grad_norm": 0.1082005575299263, "learning_rate": 1.2139011655462382e-05, "loss": 0.0037, "step": 26656 }, { "epoch": 12.393305439330543, "grad_norm": 0.23780302703380585, "learning_rate": 1.2206424997141303e-05, "loss": 0.0043, "step": 26658 }, { "epoch": 12.394235239423525, "grad_norm": 0.5275303721427917, "learning_rate": 1.2273964606240679e-05, "loss": 0.0161, "step": 26660 }, { "epoch": 12.395165039516504, "grad_norm": 0.43663549423217773, "learning_rate": 1.2341629816171683e-05, "loss": 0.0039, "step": 26662 }, { "epoch": 12.396094839609484, "grad_norm": 0.18451836705207825, "learning_rate": 1.240941995910601e-05, "loss": 0.0034, "step": 26664 }, { "epoch": 12.397024639702463, "grad_norm": 0.0492858849465847, "learning_rate": 1.2477334365982163e-05, "loss": 0.0031, "step": 26666 }, { "epoch": 12.397954439795445, "grad_norm": 0.25007155537605286, "learning_rate": 1.2545372366512674e-05, "loss": 0.0044, "step": 26668 }, { "epoch": 12.398884239888424, "grad_norm": 0.09221195429563522, "learning_rate": 1.2613533289189792e-05, "loss": 0.0024, "step": 26670 }, { "epoch": 12.399814039981404, "grad_norm": 0.15638424456119537, "learning_rate": 1.268181646129257e-05, "loss": 0.003, "step": 26672 }, { "epoch": 12.400743840074384, "grad_norm": 0.4051799774169922, "learning_rate": 1.2750221208893976e-05, "loss": 0.0088, "step": 26674 }, { "epoch": 12.401673640167363, "grad_norm": 0.04943328723311424, "learning_rate": 1.281874685686662e-05, "loss": 0.0021, "step": 26676 }, { "epoch": 12.402603440260345, "grad_norm": 0.09648661315441132, "learning_rate": 1.2887392728890168e-05, "loss": 0.0037, "step": 26678 }, { "epoch": 12.403533240353324, "grad_norm": 0.15593500435352325, "learning_rate": 1.2956158147457116e-05, "loss": 0.0026, "step": 26680 }, { "epoch": 12.404463040446304, "grad_norm": 0.5885037779808044, "learning_rate": 1.3025042433881009e-05, "loss": 0.0106, "step": 26682 }, { "epoch": 12.405392840539283, "grad_norm": 0.36542466282844543, "learning_rate": 1.3094044908301592e-05, "loss": 0.0037, "step": 26684 }, { "epoch": 12.406322640632265, "grad_norm": 0.35286596417427063, "learning_rate": 1.3163164889692147e-05, "loss": 0.0072, "step": 26686 }, { "epoch": 12.407252440725244, "grad_norm": 0.4366985261440277, "learning_rate": 1.3232401695866675e-05, "loss": 0.004, "step": 26688 }, { "epoch": 12.408182240818224, "grad_norm": 0.22715657949447632, "learning_rate": 1.3301754643485686e-05, "loss": 0.0028, "step": 26690 }, { "epoch": 12.409112040911204, "grad_norm": 0.11643486469984055, "learning_rate": 1.3371223048063443e-05, "loss": 0.0056, "step": 26692 }, { "epoch": 12.410041841004183, "grad_norm": 0.41975149512290955, "learning_rate": 1.3440806223975139e-05, "loss": 0.0038, "step": 26694 }, { "epoch": 12.410971641097165, "grad_norm": 0.3534362316131592, "learning_rate": 1.351050348446277e-05, "loss": 0.0046, "step": 26696 }, { "epoch": 12.411901441190144, "grad_norm": 0.24758471548557281, "learning_rate": 1.3580314141642519e-05, "loss": 0.0033, "step": 26698 }, { "epoch": 12.412831241283124, "grad_norm": 0.06111471727490425, "learning_rate": 1.3650237506511213e-05, "loss": 0.0018, "step": 26700 }, { "epoch": 12.413761041376103, "grad_norm": 0.3082374334335327, "learning_rate": 1.372027288895379e-05, "loss": 0.0039, "step": 26702 }, { "epoch": 12.414690841469085, "grad_norm": 0.24124112725257874, "learning_rate": 1.3790419597749306e-05, "loss": 0.0033, "step": 26704 }, { "epoch": 12.415620641562064, "grad_norm": 0.0667395070195198, "learning_rate": 1.3860676940577579e-05, "loss": 0.0018, "step": 26706 }, { "epoch": 12.416550441655044, "grad_norm": 0.10242623835802078, "learning_rate": 1.3931044224027485e-05, "loss": 0.0034, "step": 26708 }, { "epoch": 12.417480241748024, "grad_norm": 0.23515304923057556, "learning_rate": 1.4001520753602182e-05, "loss": 0.0039, "step": 26710 }, { "epoch": 12.418410041841003, "grad_norm": 0.09129292517900467, "learning_rate": 1.4072105833726777e-05, "loss": 0.0018, "step": 26712 }, { "epoch": 12.419339841933985, "grad_norm": 1.0716335773468018, "learning_rate": 1.414279876775486e-05, "loss": 0.0072, "step": 26714 }, { "epoch": 12.420269642026964, "grad_norm": 0.4906592071056366, "learning_rate": 1.4213598857976034e-05, "loss": 0.0046, "step": 26716 }, { "epoch": 12.421199442119944, "grad_norm": 0.31140971183776855, "learning_rate": 1.428450540562184e-05, "loss": 0.0038, "step": 26718 }, { "epoch": 12.422129242212923, "grad_norm": 0.10124842077493668, "learning_rate": 1.4355517710873107e-05, "loss": 0.0026, "step": 26720 }, { "epoch": 12.423059042305905, "grad_norm": 0.2024921476840973, "learning_rate": 1.4426635072867377e-05, "loss": 0.0026, "step": 26722 }, { "epoch": 12.423988842398884, "grad_norm": 0.06768052279949188, "learning_rate": 1.4497856789704838e-05, "loss": 0.0028, "step": 26724 }, { "epoch": 12.424918642491864, "grad_norm": 0.09776492416858673, "learning_rate": 1.4569182158455734e-05, "loss": 0.0027, "step": 26726 }, { "epoch": 12.425848442584844, "grad_norm": 0.3659822642803192, "learning_rate": 1.46406104751678e-05, "loss": 0.0088, "step": 26728 }, { "epoch": 12.426778242677825, "grad_norm": 0.32578036189079285, "learning_rate": 1.4712141034872221e-05, "loss": 0.0029, "step": 26730 }, { "epoch": 12.427708042770805, "grad_norm": 0.10868095606565475, "learning_rate": 1.4783773131591415e-05, "loss": 0.0029, "step": 26732 }, { "epoch": 12.428637842863784, "grad_norm": 0.3009358048439026, "learning_rate": 1.4855506058345019e-05, "loss": 0.0039, "step": 26734 }, { "epoch": 12.429567642956764, "grad_norm": 1.0320364236831665, "learning_rate": 1.4927339107158489e-05, "loss": 0.0165, "step": 26736 }, { "epoch": 12.430497443049743, "grad_norm": 0.2795008718967438, "learning_rate": 1.4999271569068468e-05, "loss": 0.0037, "step": 26738 }, { "epoch": 12.431427243142725, "grad_norm": 0.513306200504303, "learning_rate": 1.5071302734130446e-05, "loss": 0.01, "step": 26740 }, { "epoch": 12.432357043235704, "grad_norm": 0.34598490595817566, "learning_rate": 1.5143431891426225e-05, "loss": 0.0044, "step": 26742 }, { "epoch": 12.433286843328684, "grad_norm": 0.31747448444366455, "learning_rate": 1.5215658329069978e-05, "loss": 0.0029, "step": 26744 }, { "epoch": 12.434216643421664, "grad_norm": 0.42983299493789673, "learning_rate": 1.528798133421576e-05, "loss": 0.0045, "step": 26746 }, { "epoch": 12.435146443514645, "grad_norm": 0.0664982870221138, "learning_rate": 1.5360400193065032e-05, "loss": 0.0048, "step": 26748 }, { "epoch": 12.436076243607625, "grad_norm": 0.1802682876586914, "learning_rate": 1.5432914190872736e-05, "loss": 0.0031, "step": 26750 }, { "epoch": 12.437006043700604, "grad_norm": 0.2102503478527069, "learning_rate": 1.5505522611954988e-05, "loss": 0.0028, "step": 26752 }, { "epoch": 12.437935843793584, "grad_norm": 0.10417060554027557, "learning_rate": 1.557822473969582e-05, "loss": 0.0252, "step": 26754 }, { "epoch": 12.438865643886565, "grad_norm": 0.1958465278148651, "learning_rate": 1.5651019856554914e-05, "loss": 0.015, "step": 26756 }, { "epoch": 12.439795443979545, "grad_norm": 0.4273376166820526, "learning_rate": 1.572390724407368e-05, "loss": 0.0042, "step": 26758 }, { "epoch": 12.440725244072524, "grad_norm": 0.1763894408941269, "learning_rate": 1.5796886182883046e-05, "loss": 0.005, "step": 26760 }, { "epoch": 12.441655044165504, "grad_norm": 0.22963984310626984, "learning_rate": 1.5869955952710335e-05, "loss": 0.0038, "step": 26762 }, { "epoch": 12.442584844258484, "grad_norm": 0.2511771619319916, "learning_rate": 1.5943115832386424e-05, "loss": 0.0118, "step": 26764 }, { "epoch": 12.443514644351465, "grad_norm": 0.6591562628746033, "learning_rate": 1.6016365099852837e-05, "loss": 0.0196, "step": 26766 }, { "epoch": 12.444444444444445, "grad_norm": 0.4311988353729248, "learning_rate": 1.608970303216871e-05, "loss": 0.0058, "step": 26768 }, { "epoch": 12.445374244537424, "grad_norm": 0.05406005308032036, "learning_rate": 1.6163128905518585e-05, "loss": 0.0013, "step": 26770 }, { "epoch": 12.446304044630404, "grad_norm": 0.1533208042383194, "learning_rate": 1.6236641995218575e-05, "loss": 0.0021, "step": 26772 }, { "epoch": 12.447233844723385, "grad_norm": 0.9899201989173889, "learning_rate": 1.6310241575724e-05, "loss": 0.0062, "step": 26774 }, { "epoch": 12.448163644816365, "grad_norm": 0.05740352347493172, "learning_rate": 1.6383926920637033e-05, "loss": 0.0024, "step": 26776 }, { "epoch": 12.449093444909344, "grad_norm": 0.27922317385673523, "learning_rate": 1.6457697302712914e-05, "loss": 0.0037, "step": 26778 }, { "epoch": 12.450023245002324, "grad_norm": 0.8791851997375488, "learning_rate": 1.653155199386758e-05, "loss": 0.0063, "step": 26780 }, { "epoch": 12.450953045095304, "grad_norm": 0.06485899537801743, "learning_rate": 1.660549026518539e-05, "loss": 0.0022, "step": 26782 }, { "epoch": 12.451882845188285, "grad_norm": 0.9334276914596558, "learning_rate": 1.667951138692528e-05, "loss": 0.0075, "step": 26784 }, { "epoch": 12.452812645281265, "grad_norm": 0.2872985303401947, "learning_rate": 1.675361462852866e-05, "loss": 0.0043, "step": 26786 }, { "epoch": 12.453742445374244, "grad_norm": 0.8466708064079285, "learning_rate": 1.6827799258626456e-05, "loss": 0.0139, "step": 26788 }, { "epoch": 12.454672245467224, "grad_norm": 0.3163716197013855, "learning_rate": 1.690206454504632e-05, "loss": 0.0031, "step": 26790 }, { "epoch": 12.455602045560205, "grad_norm": 0.09157215803861618, "learning_rate": 1.6976409754819855e-05, "loss": 0.0024, "step": 26792 }, { "epoch": 12.456531845653185, "grad_norm": 0.19714504480361938, "learning_rate": 1.7050834154189682e-05, "loss": 0.0026, "step": 26794 }, { "epoch": 12.457461645746164, "grad_norm": 0.39942046999931335, "learning_rate": 1.7125337008617372e-05, "loss": 0.0033, "step": 26796 }, { "epoch": 12.458391445839144, "grad_norm": 1.2238725423812866, "learning_rate": 1.7199917582789657e-05, "loss": 0.0151, "step": 26798 }, { "epoch": 12.459321245932125, "grad_norm": 0.0851428285241127, "learning_rate": 1.727457514062638e-05, "loss": 0.002, "step": 26800 }, { "epoch": 12.460251046025105, "grad_norm": 0.7822845578193665, "learning_rate": 1.7349308945287416e-05, "loss": 0.0089, "step": 26802 }, { "epoch": 12.461180846118085, "grad_norm": 0.948496401309967, "learning_rate": 1.7424118259180624e-05, "loss": 0.0055, "step": 26804 }, { "epoch": 12.462110646211064, "grad_norm": 0.13613393902778625, "learning_rate": 1.7499002343968105e-05, "loss": 0.0034, "step": 26806 }, { "epoch": 12.463040446304044, "grad_norm": 0.14604350924491882, "learning_rate": 1.7573960460574005e-05, "loss": 0.0082, "step": 26808 }, { "epoch": 12.463970246397025, "grad_norm": 0.5338394045829773, "learning_rate": 1.7648991869192314e-05, "loss": 0.0203, "step": 26810 }, { "epoch": 12.464900046490005, "grad_norm": 0.5225127339363098, "learning_rate": 1.772409582929315e-05, "loss": 0.0085, "step": 26812 }, { "epoch": 12.465829846582984, "grad_norm": 0.8845000267028809, "learning_rate": 1.7799271599630586e-05, "loss": 0.0101, "step": 26814 }, { "epoch": 12.466759646675964, "grad_norm": 0.03541187569499016, "learning_rate": 1.787451843825062e-05, "loss": 0.0011, "step": 26816 }, { "epoch": 12.467689446768945, "grad_norm": 0.7673326134681702, "learning_rate": 1.7949835602496827e-05, "loss": 0.0042, "step": 26818 }, { "epoch": 12.468619246861925, "grad_norm": 0.3779965043067932, "learning_rate": 1.802522234901937e-05, "loss": 0.0038, "step": 26820 }, { "epoch": 12.469549046954905, "grad_norm": 0.3186706602573395, "learning_rate": 1.8100677933781323e-05, "loss": 0.0042, "step": 26822 }, { "epoch": 12.470478847047884, "grad_norm": 0.07432088255882263, "learning_rate": 1.8176201612066875e-05, "loss": 0.0016, "step": 26824 }, { "epoch": 12.471408647140866, "grad_norm": 0.1156473457813263, "learning_rate": 1.8251792638487637e-05, "loss": 0.0072, "step": 26826 }, { "epoch": 12.472338447233845, "grad_norm": 0.12841936945915222, "learning_rate": 1.832745026699052e-05, "loss": 0.0065, "step": 26828 }, { "epoch": 12.473268247326825, "grad_norm": 0.1991986781358719, "learning_rate": 1.8403173750865624e-05, "loss": 0.0046, "step": 26830 }, { "epoch": 12.474198047419804, "grad_norm": 0.24266675114631653, "learning_rate": 1.8478962342752563e-05, "loss": 0.0029, "step": 26832 }, { "epoch": 12.475127847512784, "grad_norm": 0.7393808960914612, "learning_rate": 1.855481529464836e-05, "loss": 0.0118, "step": 26834 }, { "epoch": 12.476057647605765, "grad_norm": 0.11014319956302643, "learning_rate": 1.8630731857915344e-05, "loss": 0.0026, "step": 26836 }, { "epoch": 12.476987447698745, "grad_norm": 0.7481403946876526, "learning_rate": 1.8706711283287508e-05, "loss": 0.0094, "step": 26838 }, { "epoch": 12.477917247791725, "grad_norm": 0.13682478666305542, "learning_rate": 1.8782752820878604e-05, "loss": 0.0097, "step": 26840 }, { "epoch": 12.478847047884704, "grad_norm": 0.10025545209646225, "learning_rate": 1.8858855720189354e-05, "loss": 0.0017, "step": 26842 }, { "epoch": 12.479776847977686, "grad_norm": 0.3409735858440399, "learning_rate": 1.8935019230114863e-05, "loss": 0.0046, "step": 26844 }, { "epoch": 12.480706648070665, "grad_norm": 0.2110137790441513, "learning_rate": 1.9011242598952047e-05, "loss": 0.0057, "step": 26846 }, { "epoch": 12.481636448163645, "grad_norm": 1.6529812812805176, "learning_rate": 1.9087525074406838e-05, "loss": 0.0172, "step": 26848 }, { "epoch": 12.482566248256624, "grad_norm": 0.28507405519485474, "learning_rate": 1.916386590360236e-05, "loss": 0.0032, "step": 26850 }, { "epoch": 12.483496048349604, "grad_norm": 0.38472306728363037, "learning_rate": 1.9240264333085272e-05, "loss": 0.0049, "step": 26852 }, { "epoch": 12.484425848442585, "grad_norm": 0.6286142468452454, "learning_rate": 1.9316719608833912e-05, "loss": 0.0155, "step": 26854 }, { "epoch": 12.485355648535565, "grad_norm": 0.20315763354301453, "learning_rate": 1.9393230976265406e-05, "loss": 0.0032, "step": 26856 }, { "epoch": 12.486285448628545, "grad_norm": 0.28713876008987427, "learning_rate": 1.9469797680243794e-05, "loss": 0.0053, "step": 26858 }, { "epoch": 12.487215248721524, "grad_norm": 1.406461477279663, "learning_rate": 1.954641896508645e-05, "loss": 0.0088, "step": 26860 }, { "epoch": 12.488145048814506, "grad_norm": 0.49827495217323303, "learning_rate": 1.962309407457204e-05, "loss": 0.0041, "step": 26862 }, { "epoch": 12.489074848907485, "grad_norm": 1.037796139717102, "learning_rate": 1.9699822251948552e-05, "loss": 0.0067, "step": 26864 }, { "epoch": 12.490004649000465, "grad_norm": 0.08747141063213348, "learning_rate": 1.9776602739939663e-05, "loss": 0.0178, "step": 26866 }, { "epoch": 12.490934449093444, "grad_norm": 2.5776047706604004, "learning_rate": 1.9853434780752787e-05, "loss": 0.0162, "step": 26868 }, { "epoch": 12.491864249186424, "grad_norm": 0.44999876618385315, "learning_rate": 1.993031761608722e-05, "loss": 0.006, "step": 26870 }, { "epoch": 12.492794049279405, "grad_norm": 0.22713802754878998, "learning_rate": 2.0007250487139894e-05, "loss": 0.0042, "step": 26872 }, { "epoch": 12.493723849372385, "grad_norm": 0.7610503435134888, "learning_rate": 2.0084232634614608e-05, "loss": 0.0064, "step": 26874 }, { "epoch": 12.494653649465365, "grad_norm": 0.1640702337026596, "learning_rate": 2.016126329872847e-05, "loss": 0.0049, "step": 26876 }, { "epoch": 12.495583449558344, "grad_norm": 0.659980833530426, "learning_rate": 2.0238341719220268e-05, "loss": 0.0074, "step": 26878 }, { "epoch": 12.496513249651326, "grad_norm": 0.4432814419269562, "learning_rate": 2.031546713535694e-05, "loss": 0.0051, "step": 26880 }, { "epoch": 12.497443049744305, "grad_norm": 0.6409614682197571, "learning_rate": 2.039263878594159e-05, "loss": 0.011, "step": 26882 }, { "epoch": 12.498372849837285, "grad_norm": 0.11403819173574448, "learning_rate": 2.0469855909321526e-05, "loss": 0.0061, "step": 26884 }, { "epoch": 12.499302649930264, "grad_norm": 0.31227439641952515, "learning_rate": 2.0547117743394744e-05, "loss": 0.0066, "step": 26886 }, { "epoch": 12.500232450023246, "grad_norm": 0.599841296672821, "learning_rate": 2.062442352561814e-05, "loss": 0.0143, "step": 26888 }, { "epoch": 12.501162250116225, "grad_norm": 0.14100822806358337, "learning_rate": 2.0701772493014662e-05, "loss": 0.0024, "step": 26890 }, { "epoch": 12.502092050209205, "grad_norm": 0.31081080436706543, "learning_rate": 2.077916388218159e-05, "loss": 0.0042, "step": 26892 }, { "epoch": 12.503021850302185, "grad_norm": 0.3445286154747009, "learning_rate": 2.0856596929296982e-05, "loss": 0.0039, "step": 26894 }, { "epoch": 12.503951650395164, "grad_norm": 0.11323379725217819, "learning_rate": 2.0934070870127753e-05, "loss": 0.0074, "step": 26896 }, { "epoch": 12.504881450488146, "grad_norm": 0.5368624925613403, "learning_rate": 2.10115849400379e-05, "loss": 0.0066, "step": 26898 }, { "epoch": 12.505811250581125, "grad_norm": 0.20806041359901428, "learning_rate": 2.1089138373994328e-05, "loss": 0.0062, "step": 26900 }, { "epoch": 12.506741050674105, "grad_norm": 0.036257557570934296, "learning_rate": 2.1166730406575947e-05, "loss": 0.0043, "step": 26902 }, { "epoch": 12.507670850767084, "grad_norm": 1.1462211608886719, "learning_rate": 2.1244360271981073e-05, "loss": 0.032, "step": 26904 }, { "epoch": 12.508600650860066, "grad_norm": 1.101671576499939, "learning_rate": 2.1322027204034107e-05, "loss": 0.009, "step": 26906 }, { "epoch": 12.509530450953045, "grad_norm": 0.24100258946418762, "learning_rate": 2.139973043619378e-05, "loss": 0.0048, "step": 26908 }, { "epoch": 12.510460251046025, "grad_norm": 0.4931763708591461, "learning_rate": 2.147746920156038e-05, "loss": 0.0093, "step": 26910 }, { "epoch": 12.511390051139005, "grad_norm": 0.9202988743782043, "learning_rate": 2.1555242732884034e-05, "loss": 0.012, "step": 26912 }, { "epoch": 12.512319851231986, "grad_norm": 0.1164809986948967, "learning_rate": 2.1633050262571213e-05, "loss": 0.002, "step": 26914 }, { "epoch": 12.513249651324966, "grad_norm": 0.49857690930366516, "learning_rate": 2.1710891022692827e-05, "loss": 0.0046, "step": 26916 }, { "epoch": 12.514179451417945, "grad_norm": 0.2993846535682678, "learning_rate": 2.178876424499236e-05, "loss": 0.0037, "step": 26918 }, { "epoch": 12.515109251510925, "grad_norm": 0.2918406128883362, "learning_rate": 2.1866669160892367e-05, "loss": 0.003, "step": 26920 }, { "epoch": 12.516039051603904, "grad_norm": 0.4181336760520935, "learning_rate": 2.19446050015026e-05, "loss": 0.0064, "step": 26922 }, { "epoch": 12.516968851696886, "grad_norm": 1.3971047401428223, "learning_rate": 2.2022570997628303e-05, "loss": 0.0307, "step": 26924 }, { "epoch": 12.517898651789865, "grad_norm": 0.10757624357938766, "learning_rate": 2.210056637977605e-05, "loss": 0.0038, "step": 26926 }, { "epoch": 12.518828451882845, "grad_norm": 0.11591702699661255, "learning_rate": 2.2178590378163085e-05, "loss": 0.0045, "step": 26928 }, { "epoch": 12.519758251975825, "grad_norm": 0.2243824005126953, "learning_rate": 2.2256642222723855e-05, "loss": 0.0065, "step": 26930 }, { "epoch": 12.520688052068806, "grad_norm": 0.101276695728302, "learning_rate": 2.233472114311853e-05, "loss": 0.0037, "step": 26932 }, { "epoch": 12.521617852161786, "grad_norm": 0.891777515411377, "learning_rate": 2.2412826368739508e-05, "loss": 0.009, "step": 26934 }, { "epoch": 12.522547652254765, "grad_norm": 0.23097804188728333, "learning_rate": 2.2490957128719563e-05, "loss": 0.0028, "step": 26936 }, { "epoch": 12.523477452347745, "grad_norm": 1.0513845682144165, "learning_rate": 2.2569112651939992e-05, "loss": 0.0089, "step": 26938 }, { "epoch": 12.524407252440724, "grad_norm": 0.06466212868690491, "learning_rate": 2.264729216703716e-05, "loss": 0.0016, "step": 26940 }, { "epoch": 12.525337052533706, "grad_norm": 0.3005838692188263, "learning_rate": 2.2725494902410807e-05, "loss": 0.0045, "step": 26942 }, { "epoch": 12.526266852626685, "grad_norm": 0.4354495704174042, "learning_rate": 2.2803720086231337e-05, "loss": 0.0054, "step": 26944 }, { "epoch": 12.527196652719665, "grad_norm": 0.760443925857544, "learning_rate": 2.288196694644812e-05, "loss": 0.0259, "step": 26946 }, { "epoch": 12.528126452812645, "grad_norm": 0.2712699770927429, "learning_rate": 2.2960234710796057e-05, "loss": 0.0047, "step": 26948 }, { "epoch": 12.529056252905626, "grad_norm": 0.33603060245513916, "learning_rate": 2.3038522606803734e-05, "loss": 0.0036, "step": 26950 }, { "epoch": 12.529986052998606, "grad_norm": 0.45038941502571106, "learning_rate": 2.311682986180176e-05, "loss": 0.0046, "step": 26952 }, { "epoch": 12.530915853091585, "grad_norm": 0.4750058948993683, "learning_rate": 2.3195155702928602e-05, "loss": 0.0053, "step": 26954 }, { "epoch": 12.531845653184565, "grad_norm": 0.14557141065597534, "learning_rate": 2.3273499357139835e-05, "loss": 0.0055, "step": 26956 }, { "epoch": 12.532775453277544, "grad_norm": 0.3266768157482147, "learning_rate": 2.3351860051215576e-05, "loss": 0.0047, "step": 26958 }, { "epoch": 12.533705253370526, "grad_norm": 0.5593791007995605, "learning_rate": 2.3430237011767225e-05, "loss": 0.0068, "step": 26960 }, { "epoch": 12.534635053463505, "grad_norm": 0.32605668902397156, "learning_rate": 2.350862946524583e-05, "loss": 0.0049, "step": 26962 }, { "epoch": 12.535564853556485, "grad_norm": 0.1907048374414444, "learning_rate": 2.358703663794934e-05, "loss": 0.0053, "step": 26964 }, { "epoch": 12.536494653649465, "grad_norm": 0.12208766490221024, "learning_rate": 2.366545775603098e-05, "loss": 0.0041, "step": 26966 }, { "epoch": 12.537424453742446, "grad_norm": 0.11993587762117386, "learning_rate": 2.3743892045505798e-05, "loss": 0.0026, "step": 26968 }, { "epoch": 12.538354253835426, "grad_norm": 0.3224937617778778, "learning_rate": 2.382233873225883e-05, "loss": 0.0057, "step": 26970 }, { "epoch": 12.539284053928405, "grad_norm": 1.164790391921997, "learning_rate": 2.3900797042053307e-05, "loss": 0.015, "step": 26972 }, { "epoch": 12.540213854021385, "grad_norm": 0.06779496371746063, "learning_rate": 2.3979266200537218e-05, "loss": 0.0022, "step": 26974 }, { "epoch": 12.541143654114366, "grad_norm": 0.30273088812828064, "learning_rate": 2.405774543325165e-05, "loss": 0.0094, "step": 26976 }, { "epoch": 12.542073454207346, "grad_norm": 0.3439481854438782, "learning_rate": 2.4136233965638066e-05, "loss": 0.0074, "step": 26978 }, { "epoch": 12.543003254300325, "grad_norm": 0.2551521360874176, "learning_rate": 2.421473102304688e-05, "loss": 0.0086, "step": 26980 }, { "epoch": 12.543933054393305, "grad_norm": 0.08893894404172897, "learning_rate": 2.4293235830743293e-05, "loss": 0.0132, "step": 26982 }, { "epoch": 12.544862854486286, "grad_norm": 1.437912940979004, "learning_rate": 2.4371747613916552e-05, "loss": 0.0125, "step": 26984 }, { "epoch": 12.545792654579266, "grad_norm": 1.2448524236679077, "learning_rate": 2.44502655976874e-05, "loss": 0.0194, "step": 26986 }, { "epoch": 12.546722454672246, "grad_norm": 0.20624667406082153, "learning_rate": 2.4528789007114865e-05, "loss": 0.0118, "step": 26988 }, { "epoch": 12.547652254765225, "grad_norm": 0.6708744764328003, "learning_rate": 2.460731706720441e-05, "loss": 0.0155, "step": 26990 }, { "epoch": 12.548582054858205, "grad_norm": 0.44447851181030273, "learning_rate": 2.4685849002916152e-05, "loss": 0.0042, "step": 26992 }, { "epoch": 12.549511854951186, "grad_norm": 0.25537917017936707, "learning_rate": 2.4764384039171404e-05, "loss": 0.007, "step": 26994 }, { "epoch": 12.550441655044166, "grad_norm": 1.3513526916503906, "learning_rate": 2.4842921400861066e-05, "loss": 0.0285, "step": 26996 }, { "epoch": 12.551371455137145, "grad_norm": 1.209521770477295, "learning_rate": 2.4921460312852912e-05, "loss": 0.0181, "step": 26998 }, { "epoch": 12.552301255230125, "grad_norm": 0.3338717818260193, "learning_rate": 2.4999999999999944e-05, "loss": 0.0081, "step": 27000 }, { "epoch": 12.552301255230125, "eval_cer": 0.12221481684766274, "eval_loss": 0.20480288565158844, "eval_runtime": 401.2627, "eval_samples_per_second": 31.635, "eval_steps_per_second": 0.989, "step": 27000 }, { "epoch": 12.553231055323106, "grad_norm": 0.37611478567123413, "learning_rate": 2.5078539687146978e-05, "loss": 0.0156, "step": 27002 }, { "epoch": 12.554160855416086, "grad_norm": 1.3368234634399414, "learning_rate": 2.515707859913882e-05, "loss": 0.0158, "step": 27004 }, { "epoch": 12.555090655509066, "grad_norm": 1.1218819618225098, "learning_rate": 2.5235615960828666e-05, "loss": 0.0106, "step": 27006 }, { "epoch": 12.556020455602045, "grad_norm": 1.3857694864273071, "learning_rate": 2.531415099708392e-05, "loss": 0.0127, "step": 27008 }, { "epoch": 12.556950255695025, "grad_norm": 0.5107358694076538, "learning_rate": 2.5392682932795483e-05, "loss": 0.0059, "step": 27010 }, { "epoch": 12.557880055788006, "grad_norm": 1.4562562704086304, "learning_rate": 2.5471210992885216e-05, "loss": 0.0154, "step": 27012 }, { "epoch": 12.558809855880986, "grad_norm": 0.40731072425842285, "learning_rate": 2.5549734402312678e-05, "loss": 0.0183, "step": 27014 }, { "epoch": 12.559739655973965, "grad_norm": 0.333427369594574, "learning_rate": 2.562825238608352e-05, "loss": 0.0037, "step": 27016 }, { "epoch": 12.560669456066945, "grad_norm": 0.17130565643310547, "learning_rate": 2.570676416925678e-05, "loss": 0.0042, "step": 27018 }, { "epoch": 12.561599256159926, "grad_norm": 0.26717638969421387, "learning_rate": 2.57852689769532e-05, "loss": 0.0054, "step": 27020 }, { "epoch": 12.562529056252906, "grad_norm": 0.5529980063438416, "learning_rate": 2.586376603436184e-05, "loss": 0.013, "step": 27022 }, { "epoch": 12.563458856345886, "grad_norm": 0.42054006457328796, "learning_rate": 2.594225456674826e-05, "loss": 0.0071, "step": 27024 }, { "epoch": 12.564388656438865, "grad_norm": 0.3220071494579315, "learning_rate": 2.6020733799462686e-05, "loss": 0.0085, "step": 27026 }, { "epoch": 12.565318456531845, "grad_norm": 0.43298089504241943, "learning_rate": 2.6099202957946597e-05, "loss": 0.0135, "step": 27028 }, { "epoch": 12.566248256624826, "grad_norm": 0.7473593950271606, "learning_rate": 2.6177661267741082e-05, "loss": 0.016, "step": 27030 }, { "epoch": 12.567178056717806, "grad_norm": 1.401361346244812, "learning_rate": 2.6256107954494113e-05, "loss": 0.0196, "step": 27032 }, { "epoch": 12.568107856810785, "grad_norm": 0.4086097478866577, "learning_rate": 2.6334542243969106e-05, "loss": 0.0086, "step": 27034 }, { "epoch": 12.569037656903765, "grad_norm": 0.7673318982124329, "learning_rate": 2.641296336205075e-05, "loss": 0.0069, "step": 27036 }, { "epoch": 12.569967456996746, "grad_norm": 1.4990147352218628, "learning_rate": 2.6491370534754268e-05, "loss": 0.0095, "step": 27038 }, { "epoch": 12.570897257089726, "grad_norm": 1.433351993560791, "learning_rate": 2.656976298823287e-05, "loss": 0.0117, "step": 27040 }, { "epoch": 12.571827057182706, "grad_norm": 1.6887589693069458, "learning_rate": 2.664813994878452e-05, "loss": 0.0148, "step": 27042 }, { "epoch": 12.572756857275685, "grad_norm": 0.18364296853542328, "learning_rate": 2.6726500642860086e-05, "loss": 0.0047, "step": 27044 }, { "epoch": 12.573686657368667, "grad_norm": 0.13497452437877655, "learning_rate": 2.6804844297071495e-05, "loss": 0.0078, "step": 27046 }, { "epoch": 12.574616457461646, "grad_norm": 0.1996360719203949, "learning_rate": 2.6883170138198336e-05, "loss": 0.0056, "step": 27048 }, { "epoch": 12.575546257554626, "grad_norm": 0.4490557014942169, "learning_rate": 2.6961477393196187e-05, "loss": 0.021, "step": 27050 }, { "epoch": 12.576476057647605, "grad_norm": 0.8350660800933838, "learning_rate": 2.703976528920386e-05, "loss": 0.0242, "step": 27052 }, { "epoch": 12.577405857740585, "grad_norm": 1.0563334226608276, "learning_rate": 2.7118033053551802e-05, "loss": 0.023, "step": 27054 }, { "epoch": 12.578335657833566, "grad_norm": 1.3566738367080688, "learning_rate": 2.7196279913768584e-05, "loss": 0.0081, "step": 27056 }, { "epoch": 12.579265457926546, "grad_norm": 0.2117971032857895, "learning_rate": 2.7274505097589114e-05, "loss": 0.0223, "step": 27058 }, { "epoch": 12.580195258019526, "grad_norm": 0.27273428440093994, "learning_rate": 2.7352707832962767e-05, "loss": 0.0226, "step": 27060 }, { "epoch": 12.581125058112505, "grad_norm": 0.6383532881736755, "learning_rate": 2.7430887348060115e-05, "loss": 0.0119, "step": 27062 }, { "epoch": 12.582054858205487, "grad_norm": 0.5861068367958069, "learning_rate": 2.7509042871280548e-05, "loss": 0.0086, "step": 27064 }, { "epoch": 12.582984658298466, "grad_norm": 0.4982439875602722, "learning_rate": 2.7587173631260596e-05, "loss": 0.008, "step": 27066 }, { "epoch": 12.583914458391446, "grad_norm": 0.45449623465538025, "learning_rate": 2.7665278856881576e-05, "loss": 0.0177, "step": 27068 }, { "epoch": 12.584844258484425, "grad_norm": 0.28170058131217957, "learning_rate": 2.774335777727625e-05, "loss": 0.0048, "step": 27070 }, { "epoch": 12.585774058577407, "grad_norm": 0.3536652624607086, "learning_rate": 2.782140962183702e-05, "loss": 0.0067, "step": 27072 }, { "epoch": 12.586703858670386, "grad_norm": 0.5755845904350281, "learning_rate": 2.7899433620224053e-05, "loss": 0.0089, "step": 27074 }, { "epoch": 12.587633658763366, "grad_norm": 1.0134705305099487, "learning_rate": 2.79774290023718e-05, "loss": 0.0355, "step": 27076 }, { "epoch": 12.588563458856346, "grad_norm": 0.7467309236526489, "learning_rate": 2.8055394998497162e-05, "loss": 0.0059, "step": 27078 }, { "epoch": 12.589493258949325, "grad_norm": 1.3076494932174683, "learning_rate": 2.8133330839107564e-05, "loss": 0.0377, "step": 27080 }, { "epoch": 12.590423059042307, "grad_norm": 0.44243860244750977, "learning_rate": 2.8211235755007572e-05, "loss": 0.0056, "step": 27082 }, { "epoch": 12.591352859135286, "grad_norm": 1.8206908702850342, "learning_rate": 2.8289108977307097e-05, "loss": 0.0354, "step": 27084 }, { "epoch": 12.592282659228266, "grad_norm": 1.62507164478302, "learning_rate": 2.836694973742872e-05, "loss": 0.0088, "step": 27086 }, { "epoch": 12.593212459321245, "grad_norm": 1.5642449855804443, "learning_rate": 2.8444757267115897e-05, "loss": 0.0277, "step": 27088 }, { "epoch": 12.594142259414227, "grad_norm": 1.7836393117904663, "learning_rate": 2.852253079843973e-05, "loss": 0.0375, "step": 27090 }, { "epoch": 12.595072059507206, "grad_norm": 1.5772013664245605, "learning_rate": 2.860026956380633e-05, "loss": 0.0172, "step": 27092 }, { "epoch": 12.596001859600186, "grad_norm": 0.9352628588676453, "learning_rate": 2.8677972795966e-05, "loss": 0.0102, "step": 27094 }, { "epoch": 12.596931659693166, "grad_norm": 2.193711519241333, "learning_rate": 2.8755639728019034e-05, "loss": 0.0238, "step": 27096 }, { "epoch": 12.597861459786145, "grad_norm": 1.346861720085144, "learning_rate": 2.883326959342399e-05, "loss": 0.014, "step": 27098 }, { "epoch": 12.598791259879127, "grad_norm": 0.6396177411079407, "learning_rate": 2.8910861626005783e-05, "loss": 0.0408, "step": 27100 }, { "epoch": 12.599721059972106, "grad_norm": 0.5026848912239075, "learning_rate": 2.898841505996221e-05, "loss": 0.0167, "step": 27102 }, { "epoch": 12.600650860065086, "grad_norm": 1.7164860963821411, "learning_rate": 2.906592912987218e-05, "loss": 0.0277, "step": 27104 }, { "epoch": 12.601580660158065, "grad_norm": 0.6495733857154846, "learning_rate": 2.914340307070295e-05, "loss": 0.0123, "step": 27106 }, { "epoch": 12.602510460251047, "grad_norm": 0.4617631137371063, "learning_rate": 2.9220836117818344e-05, "loss": 0.0086, "step": 27108 }, { "epoch": 12.603440260344026, "grad_norm": 0.2775193452835083, "learning_rate": 2.9298227506985276e-05, "loss": 0.0074, "step": 27110 }, { "epoch": 12.604370060437006, "grad_norm": 0.9917684197425842, "learning_rate": 2.93755764743818e-05, "loss": 0.0107, "step": 27112 }, { "epoch": 12.605299860529986, "grad_norm": 2.307745933532715, "learning_rate": 2.945288225660519e-05, "loss": 0.0419, "step": 27114 }, { "epoch": 12.606229660622965, "grad_norm": 0.4719885289669037, "learning_rate": 2.953014409067858e-05, "loss": 0.0207, "step": 27116 }, { "epoch": 12.607159460715947, "grad_norm": 1.5915738344192505, "learning_rate": 2.960736121405852e-05, "loss": 0.0177, "step": 27118 }, { "epoch": 12.608089260808926, "grad_norm": 2.151444673538208, "learning_rate": 2.9684532864643166e-05, "loss": 0.0189, "step": 27120 }, { "epoch": 12.609019060901906, "grad_norm": 0.8656976819038391, "learning_rate": 2.9761658280779836e-05, "loss": 0.0102, "step": 27122 }, { "epoch": 12.609948860994885, "grad_norm": 0.35771048069000244, "learning_rate": 2.9838736701271636e-05, "loss": 0.0094, "step": 27124 }, { "epoch": 12.610878661087867, "grad_norm": 0.4210054874420166, "learning_rate": 2.9915767365385496e-05, "loss": 0.0204, "step": 27126 }, { "epoch": 12.611808461180846, "grad_norm": 0.7333981394767761, "learning_rate": 2.9992749512860207e-05, "loss": 0.007, "step": 27128 }, { "epoch": 12.612738261273826, "grad_norm": 2.2474231719970703, "learning_rate": 3.0069682383912888e-05, "loss": 0.0256, "step": 27130 }, { "epoch": 12.613668061366806, "grad_norm": 0.21084412932395935, "learning_rate": 3.014656521924697e-05, "loss": 0.032, "step": 27132 }, { "epoch": 12.614597861459787, "grad_norm": 1.224798560142517, "learning_rate": 3.0223397260060268e-05, "loss": 0.0199, "step": 27134 }, { "epoch": 12.615527661552767, "grad_norm": 0.8384305834770203, "learning_rate": 3.0300177748051376e-05, "loss": 0.0114, "step": 27136 }, { "epoch": 12.616457461645746, "grad_norm": 0.8435202240943909, "learning_rate": 3.037690592542788e-05, "loss": 0.0247, "step": 27138 }, { "epoch": 12.617387261738726, "grad_norm": 0.7729492783546448, "learning_rate": 3.045358103491348e-05, "loss": 0.0074, "step": 27140 }, { "epoch": 12.618317061831707, "grad_norm": 1.681199550628662, "learning_rate": 3.053020231975613e-05, "loss": 0.0355, "step": 27142 }, { "epoch": 12.619246861924687, "grad_norm": 1.7358969449996948, "learning_rate": 3.0606769023734685e-05, "loss": 0.0385, "step": 27144 }, { "epoch": 12.620176662017666, "grad_norm": 1.8897958993911743, "learning_rate": 3.068328039116618e-05, "loss": 0.0276, "step": 27146 }, { "epoch": 12.621106462110646, "grad_norm": 0.6465791463851929, "learning_rate": 3.0759735666914825e-05, "loss": 0.0087, "step": 27148 }, { "epoch": 12.622036262203626, "grad_norm": 1.4790623188018799, "learning_rate": 3.083613409639774e-05, "loss": 0.0341, "step": 27150 }, { "epoch": 12.622966062296607, "grad_norm": 0.7367238998413086, "learning_rate": 3.0912474925593256e-05, "loss": 0.0087, "step": 27152 }, { "epoch": 12.623895862389586, "grad_norm": 0.6395780444145203, "learning_rate": 3.098875740104804e-05, "loss": 0.0097, "step": 27154 }, { "epoch": 12.624825662482566, "grad_norm": 1.9107462167739868, "learning_rate": 3.106498076988522e-05, "loss": 0.0241, "step": 27156 }, { "epoch": 12.625755462575546, "grad_norm": 1.3729132413864136, "learning_rate": 3.1141144279810733e-05, "loss": 0.0294, "step": 27158 }, { "epoch": 12.626685262668527, "grad_norm": 1.7342973947525024, "learning_rate": 3.121724717912132e-05, "loss": 0.0148, "step": 27160 }, { "epoch": 12.627615062761507, "grad_norm": 0.8183881044387817, "learning_rate": 3.129328871671241e-05, "loss": 0.0127, "step": 27162 }, { "epoch": 12.628544862854486, "grad_norm": 0.1916964203119278, "learning_rate": 3.1369268142084577e-05, "loss": 0.0098, "step": 27164 }, { "epoch": 12.629474662947466, "grad_norm": 1.616687297821045, "learning_rate": 3.144518470535138e-05, "loss": 0.0366, "step": 27166 }, { "epoch": 12.630404463040446, "grad_norm": 1.1849896907806396, "learning_rate": 3.152103765724735e-05, "loss": 0.0221, "step": 27168 }, { "epoch": 12.631334263133427, "grad_norm": 1.9878978729248047, "learning_rate": 3.1596826249134284e-05, "loss": 0.025, "step": 27170 }, { "epoch": 12.632264063226406, "grad_norm": 0.26185160875320435, "learning_rate": 3.167254973300955e-05, "loss": 0.0141, "step": 27172 }, { "epoch": 12.633193863319386, "grad_norm": 0.8967354893684387, "learning_rate": 3.1748207361512437e-05, "loss": 0.0186, "step": 27174 }, { "epoch": 12.634123663412366, "grad_norm": 2.002068519592285, "learning_rate": 3.1823798387933195e-05, "loss": 0.055, "step": 27176 }, { "epoch": 12.635053463505347, "grad_norm": 0.46021395921707153, "learning_rate": 3.1899322066218754e-05, "loss": 0.01, "step": 27178 }, { "epoch": 12.635983263598327, "grad_norm": 1.3407965898513794, "learning_rate": 3.197477765098071e-05, "loss": 0.0251, "step": 27180 }, { "epoch": 12.636913063691306, "grad_norm": 0.953614354133606, "learning_rate": 3.2050164397503247e-05, "loss": 0.0255, "step": 27182 }, { "epoch": 12.637842863784286, "grad_norm": 1.2839560508728027, "learning_rate": 3.2125481561749456e-05, "loss": 0.0406, "step": 27184 }, { "epoch": 12.638772663877266, "grad_norm": 1.107517957687378, "learning_rate": 3.220072840036915e-05, "loss": 0.0283, "step": 27186 }, { "epoch": 12.639702463970247, "grad_norm": 1.8281705379486084, "learning_rate": 3.227590417070676e-05, "loss": 0.019, "step": 27188 }, { "epoch": 12.640632264063226, "grad_norm": 0.2755049765110016, "learning_rate": 3.235100813080761e-05, "loss": 0.0192, "step": 27190 }, { "epoch": 12.641562064156206, "grad_norm": 0.29358384013175964, "learning_rate": 3.242603953942592e-05, "loss": 0.0055, "step": 27192 }, { "epoch": 12.642491864249186, "grad_norm": 0.7137882709503174, "learning_rate": 3.2500997656031827e-05, "loss": 0.0362, "step": 27194 }, { "epoch": 12.643421664342167, "grad_norm": 1.038472056388855, "learning_rate": 3.257588174081931e-05, "loss": 0.0153, "step": 27196 }, { "epoch": 12.644351464435147, "grad_norm": 1.3003193140029907, "learning_rate": 3.265069105471268e-05, "loss": 0.0269, "step": 27198 }, { "epoch": 12.645281264528126, "grad_norm": 0.38014933466911316, "learning_rate": 3.272542485937372e-05, "loss": 0.0272, "step": 27200 }, { "epoch": 12.646211064621106, "grad_norm": 1.4581612348556519, "learning_rate": 3.280008241721044e-05, "loss": 0.0228, "step": 27202 }, { "epoch": 12.647140864714087, "grad_norm": 1.199419379234314, "learning_rate": 3.2874662991382725e-05, "loss": 0.0353, "step": 27204 }, { "epoch": 12.648070664807067, "grad_norm": 0.07017502933740616, "learning_rate": 3.294916584581042e-05, "loss": 0.0177, "step": 27206 }, { "epoch": 12.649000464900046, "grad_norm": 2.6887753009796143, "learning_rate": 3.302359024518025e-05, "loss": 0.047, "step": 27208 }, { "epoch": 12.649930264993026, "grad_norm": 0.9538714289665222, "learning_rate": 3.3097935454953786e-05, "loss": 0.0135, "step": 27210 }, { "epoch": 12.650860065086006, "grad_norm": 2.022449254989624, "learning_rate": 3.317220074137365e-05, "loss": 0.0392, "step": 27212 }, { "epoch": 12.651789865178987, "grad_norm": 0.28501299023628235, "learning_rate": 3.3246385371471286e-05, "loss": 0.0045, "step": 27214 }, { "epoch": 12.652719665271967, "grad_norm": 0.43390345573425293, "learning_rate": 3.332048861307466e-05, "loss": 0.0224, "step": 27216 }, { "epoch": 12.653649465364946, "grad_norm": 1.5357438325881958, "learning_rate": 3.339450973481455e-05, "loss": 0.0226, "step": 27218 }, { "epoch": 12.654579265457926, "grad_norm": 1.0375880002975464, "learning_rate": 3.346844800613219e-05, "loss": 0.021, "step": 27220 }, { "epoch": 12.655509065550907, "grad_norm": 2.0253193378448486, "learning_rate": 3.354230269728703e-05, "loss": 0.0294, "step": 27222 }, { "epoch": 12.656438865643887, "grad_norm": 1.2055866718292236, "learning_rate": 3.3616073079362906e-05, "loss": 0.0172, "step": 27224 }, { "epoch": 12.657368665736866, "grad_norm": 1.2971638441085815, "learning_rate": 3.36897584242761e-05, "loss": 0.0357, "step": 27226 }, { "epoch": 12.658298465829846, "grad_norm": 0.8915222883224487, "learning_rate": 3.376335800478153e-05, "loss": 0.0309, "step": 27228 }, { "epoch": 12.659228265922827, "grad_norm": 1.475021481513977, "learning_rate": 3.3836871094481526e-05, "loss": 0.0168, "step": 27230 }, { "epoch": 12.660158066015807, "grad_norm": 1.4171838760375977, "learning_rate": 3.39102969678314e-05, "loss": 0.044, "step": 27232 }, { "epoch": 12.661087866108787, "grad_norm": 1.1912542581558228, "learning_rate": 3.398363490014727e-05, "loss": 0.0134, "step": 27234 }, { "epoch": 12.662017666201766, "grad_norm": 1.653161644935608, "learning_rate": 3.4056884167613684e-05, "loss": 0.0123, "step": 27236 }, { "epoch": 12.662947466294746, "grad_norm": 0.5819841027259827, "learning_rate": 3.4130044047289775e-05, "loss": 0.0139, "step": 27238 }, { "epoch": 12.663877266387727, "grad_norm": 1.165341854095459, "learning_rate": 3.4203113817117065e-05, "loss": 0.0215, "step": 27240 }, { "epoch": 12.664807066480707, "grad_norm": 1.0140388011932373, "learning_rate": 3.4276092755926267e-05, "loss": 0.0229, "step": 27242 }, { "epoch": 12.665736866573686, "grad_norm": 0.5199986696243286, "learning_rate": 3.434898014344504e-05, "loss": 0.0131, "step": 27244 }, { "epoch": 12.666666666666666, "grad_norm": 0.8691113591194153, "learning_rate": 3.442177526030413e-05, "loss": 0.0293, "step": 27246 }, { "epoch": 12.667596466759647, "grad_norm": 1.193203091621399, "learning_rate": 3.449447738804497e-05, "loss": 0.0186, "step": 27248 }, { "epoch": 12.668526266852627, "grad_norm": 0.7255285382270813, "learning_rate": 3.456708580912723e-05, "loss": 0.0159, "step": 27250 }, { "epoch": 12.669456066945607, "grad_norm": 1.088239073753357, "learning_rate": 3.463959980693494e-05, "loss": 0.0217, "step": 27252 }, { "epoch": 12.670385867038586, "grad_norm": 0.36527958512306213, "learning_rate": 3.471201866578422e-05, "loss": 0.0437, "step": 27254 }, { "epoch": 12.671315667131566, "grad_norm": 1.3608731031417847, "learning_rate": 3.478434167093015e-05, "loss": 0.0284, "step": 27256 }, { "epoch": 12.672245467224547, "grad_norm": 1.9051270484924316, "learning_rate": 3.48565681085739e-05, "loss": 0.038, "step": 27258 }, { "epoch": 12.673175267317527, "grad_norm": 0.8611351847648621, "learning_rate": 3.4928697265869685e-05, "loss": 0.0135, "step": 27260 }, { "epoch": 12.674105067410506, "grad_norm": 0.7515602707862854, "learning_rate": 3.500072843093165e-05, "loss": 0.0244, "step": 27262 }, { "epoch": 12.675034867503486, "grad_norm": 1.356108546257019, "learning_rate": 3.507266089284163e-05, "loss": 0.0134, "step": 27264 }, { "epoch": 12.675964667596467, "grad_norm": 1.7929893732070923, "learning_rate": 3.51444939416551e-05, "loss": 0.0445, "step": 27266 }, { "epoch": 12.676894467689447, "grad_norm": 1.7022966146469116, "learning_rate": 3.52162268684087e-05, "loss": 0.0394, "step": 27268 }, { "epoch": 12.677824267782427, "grad_norm": 1.1873538494110107, "learning_rate": 3.528785896512774e-05, "loss": 0.0214, "step": 27270 }, { "epoch": 12.678754067875406, "grad_norm": 0.6168268322944641, "learning_rate": 3.535938952483216e-05, "loss": 0.0087, "step": 27272 }, { "epoch": 12.679683867968386, "grad_norm": 0.8603420257568359, "learning_rate": 3.543081784154406e-05, "loss": 0.0134, "step": 27274 }, { "epoch": 12.680613668061367, "grad_norm": 1.5124132633209229, "learning_rate": 3.550214321029511e-05, "loss": 0.0219, "step": 27276 }, { "epoch": 12.681543468154347, "grad_norm": 0.8445823788642883, "learning_rate": 3.557336492713257e-05, "loss": 0.0225, "step": 27278 }, { "epoch": 12.682473268247326, "grad_norm": 0.6524169445037842, "learning_rate": 3.5644482289127004e-05, "loss": 0.0162, "step": 27280 }, { "epoch": 12.683403068340306, "grad_norm": 0.20320701599121094, "learning_rate": 3.5715494594378277e-05, "loss": 0.0076, "step": 27282 }, { "epoch": 12.684332868433287, "grad_norm": 1.1172572374343872, "learning_rate": 3.5786401142024084e-05, "loss": 0.0289, "step": 27284 }, { "epoch": 12.685262668526267, "grad_norm": 1.796728253364563, "learning_rate": 3.585720123224526e-05, "loss": 0.0402, "step": 27286 }, { "epoch": 12.686192468619247, "grad_norm": 1.14794921875, "learning_rate": 3.592789416627335e-05, "loss": 0.0276, "step": 27288 }, { "epoch": 12.687122268712226, "grad_norm": 1.1858283281326294, "learning_rate": 3.599847924639794e-05, "loss": 0.016, "step": 27290 }, { "epoch": 12.688052068805208, "grad_norm": 0.18419228494167328, "learning_rate": 3.606895577597264e-05, "loss": 0.0171, "step": 27292 }, { "epoch": 12.688981868898187, "grad_norm": 1.0591819286346436, "learning_rate": 3.613932305942254e-05, "loss": 0.0299, "step": 27294 }, { "epoch": 12.689911668991167, "grad_norm": 1.8592069149017334, "learning_rate": 3.620958040225081e-05, "loss": 0.0413, "step": 27296 }, { "epoch": 12.690841469084146, "grad_norm": 1.3343150615692139, "learning_rate": 3.6279727111046167e-05, "loss": 0.0417, "step": 27298 }, { "epoch": 12.691771269177128, "grad_norm": 0.8965228796005249, "learning_rate": 3.6349762493488746e-05, "loss": 0.015, "step": 27300 }, { "epoch": 12.692701069270107, "grad_norm": 0.7708730697631836, "learning_rate": 3.6419685858357444e-05, "loss": 0.013, "step": 27302 }, { "epoch": 12.693630869363087, "grad_norm": 0.8119181394577026, "learning_rate": 3.6489496515537184e-05, "loss": 0.0227, "step": 27304 }, { "epoch": 12.694560669456067, "grad_norm": 1.4231623411178589, "learning_rate": 3.6559193776024815e-05, "loss": 0.0216, "step": 27306 }, { "epoch": 12.695490469549046, "grad_norm": 1.4975860118865967, "learning_rate": 3.662877695193652e-05, "loss": 0.02, "step": 27308 }, { "epoch": 12.696420269642028, "grad_norm": 1.0820938348770142, "learning_rate": 3.669824535651443e-05, "loss": 0.0198, "step": 27310 }, { "epoch": 12.697350069735007, "grad_norm": 1.0596873760223389, "learning_rate": 3.6767598304133446e-05, "loss": 0.0322, "step": 27312 }, { "epoch": 12.698279869827987, "grad_norm": 1.0393450260162354, "learning_rate": 3.683683511030797e-05, "loss": 0.0152, "step": 27314 }, { "epoch": 12.699209669920966, "grad_norm": 1.8781890869140625, "learning_rate": 3.690595509169853e-05, "loss": 0.0205, "step": 27316 }, { "epoch": 12.700139470013948, "grad_norm": 1.2024378776550293, "learning_rate": 3.697495756611911e-05, "loss": 0.0226, "step": 27318 }, { "epoch": 12.701069270106927, "grad_norm": 2.270673990249634, "learning_rate": 3.7043841852543e-05, "loss": 0.0303, "step": 27320 }, { "epoch": 12.701999070199907, "grad_norm": 1.298632025718689, "learning_rate": 3.7112607271109944e-05, "loss": 0.0314, "step": 27322 }, { "epoch": 12.702928870292887, "grad_norm": 1.3124080896377563, "learning_rate": 3.7181253143133336e-05, "loss": 0.0304, "step": 27324 }, { "epoch": 12.703858670385866, "grad_norm": 1.1907761096954346, "learning_rate": 3.724977879110598e-05, "loss": 0.0182, "step": 27326 }, { "epoch": 12.704788470478848, "grad_norm": 0.9753879308700562, "learning_rate": 3.731818353870739e-05, "loss": 0.0195, "step": 27328 }, { "epoch": 12.705718270571827, "grad_norm": 0.4445098340511322, "learning_rate": 3.738646671081017e-05, "loss": 0.0087, "step": 27330 }, { "epoch": 12.706648070664807, "grad_norm": 1.2709330320358276, "learning_rate": 3.745462763348729e-05, "loss": 0.0104, "step": 27332 }, { "epoch": 12.707577870757786, "grad_norm": 0.8887211680412292, "learning_rate": 3.752266563401779e-05, "loss": 0.0322, "step": 27334 }, { "epoch": 12.708507670850768, "grad_norm": 1.061726689338684, "learning_rate": 3.75905800408941e-05, "loss": 0.0195, "step": 27336 }, { "epoch": 12.709437470943747, "grad_norm": 0.7178056836128235, "learning_rate": 3.765837018382842e-05, "loss": 0.0084, "step": 27338 }, { "epoch": 12.710367271036727, "grad_norm": 1.6688988208770752, "learning_rate": 3.772603539375942e-05, "loss": 0.0268, "step": 27340 }, { "epoch": 12.711297071129707, "grad_norm": 0.6873472929000854, "learning_rate": 3.779357500285864e-05, "loss": 0.0192, "step": 27342 }, { "epoch": 12.712226871222686, "grad_norm": 2.5178604125976562, "learning_rate": 3.786098834453771e-05, "loss": 0.018, "step": 27344 }, { "epoch": 12.713156671315668, "grad_norm": 1.5226199626922607, "learning_rate": 3.792827475345401e-05, "loss": 0.0565, "step": 27346 }, { "epoch": 12.714086471408647, "grad_norm": 0.9096677303314209, "learning_rate": 3.799543356551786e-05, "loss": 0.0202, "step": 27348 }, { "epoch": 12.715016271501627, "grad_norm": 0.9902342557907104, "learning_rate": 3.8062464117898724e-05, "loss": 0.0134, "step": 27350 }, { "epoch": 12.715946071594606, "grad_norm": 7.173812389373779, "learning_rate": 3.812936574903243e-05, "loss": 0.0241, "step": 27352 }, { "epoch": 12.716875871687588, "grad_norm": 2.3761396408081055, "learning_rate": 3.819613779862673e-05, "loss": 0.058, "step": 27354 }, { "epoch": 12.717805671780567, "grad_norm": 1.3797005414962769, "learning_rate": 3.82627796076683e-05, "loss": 0.0475, "step": 27356 }, { "epoch": 12.718735471873547, "grad_norm": 0.6454009413719177, "learning_rate": 3.832929051842969e-05, "loss": 0.0079, "step": 27358 }, { "epoch": 12.719665271966527, "grad_norm": 2.199108362197876, "learning_rate": 3.839566987447492e-05, "loss": 0.0698, "step": 27360 }, { "epoch": 12.720595072059508, "grad_norm": 2.0809121131896973, "learning_rate": 3.8461917020666396e-05, "loss": 0.0414, "step": 27362 }, { "epoch": 12.721524872152488, "grad_norm": 0.7347822785377502, "learning_rate": 3.852803130317197e-05, "loss": 0.0235, "step": 27364 }, { "epoch": 12.722454672245467, "grad_norm": 0.3243637681007385, "learning_rate": 3.8594012069469925e-05, "loss": 0.0229, "step": 27366 }, { "epoch": 12.723384472338447, "grad_norm": 0.2849425971508026, "learning_rate": 3.8659858668356874e-05, "loss": 0.0148, "step": 27368 }, { "epoch": 12.724314272431426, "grad_norm": 0.9486817121505737, "learning_rate": 3.8725570449953326e-05, "loss": 0.0194, "step": 27370 }, { "epoch": 12.725244072524408, "grad_norm": 1.04936945438385, "learning_rate": 3.879114676571082e-05, "loss": 0.0241, "step": 27372 }, { "epoch": 12.726173872617387, "grad_norm": 1.282749056816101, "learning_rate": 3.885658696841745e-05, "loss": 0.0378, "step": 27374 }, { "epoch": 12.727103672710367, "grad_norm": 0.8549706339836121, "learning_rate": 3.8921890412204684e-05, "loss": 0.0242, "step": 27376 }, { "epoch": 12.728033472803347, "grad_norm": 0.7849053740501404, "learning_rate": 3.89870564525542e-05, "loss": 0.0182, "step": 27378 }, { "epoch": 12.728963272896328, "grad_norm": 1.7769967317581177, "learning_rate": 3.9052084446303315e-05, "loss": 0.025, "step": 27380 }, { "epoch": 12.729893072989308, "grad_norm": 1.678720235824585, "learning_rate": 3.911697375165201e-05, "loss": 0.0393, "step": 27382 }, { "epoch": 12.730822873082287, "grad_norm": 2.2632040977478027, "learning_rate": 3.918172372816887e-05, "loss": 0.0362, "step": 27384 }, { "epoch": 12.731752673175267, "grad_norm": 1.6281095743179321, "learning_rate": 3.924633373679808e-05, "loss": 0.0175, "step": 27386 }, { "epoch": 12.732682473268248, "grad_norm": 2.679466724395752, "learning_rate": 3.9310803139864795e-05, "loss": 0.0547, "step": 27388 }, { "epoch": 12.733612273361228, "grad_norm": 0.3175066113471985, "learning_rate": 3.937513130108187e-05, "loss": 0.0168, "step": 27390 }, { "epoch": 12.734542073454207, "grad_norm": 2.060793161392212, "learning_rate": 3.9439317585556784e-05, "loss": 0.0766, "step": 27392 }, { "epoch": 12.735471873547187, "grad_norm": 1.2747979164123535, "learning_rate": 3.950336135979635e-05, "loss": 0.0322, "step": 27394 }, { "epoch": 12.736401673640167, "grad_norm": 2.101452589035034, "learning_rate": 3.956726199171441e-05, "loss": 0.0382, "step": 27396 }, { "epoch": 12.737331473733148, "grad_norm": 1.5876123905181885, "learning_rate": 3.963101885063779e-05, "loss": 0.0457, "step": 27398 }, { "epoch": 12.738261273826128, "grad_norm": 1.5030689239501953, "learning_rate": 3.96946313073119e-05, "loss": 0.0345, "step": 27400 }, { "epoch": 12.739191073919107, "grad_norm": 0.8673114776611328, "learning_rate": 3.975809873390747e-05, "loss": 0.0493, "step": 27402 }, { "epoch": 12.740120874012087, "grad_norm": 2.122001886367798, "learning_rate": 3.982142050402649e-05, "loss": 0.077, "step": 27404 }, { "epoch": 12.741050674105068, "grad_norm": 1.9552983045578003, "learning_rate": 3.98845959927089e-05, "loss": 0.0293, "step": 27406 }, { "epoch": 12.741980474198048, "grad_norm": 1.9257895946502686, "learning_rate": 3.9947624576438036e-05, "loss": 0.0281, "step": 27408 }, { "epoch": 12.742910274291027, "grad_norm": 1.2984381914138794, "learning_rate": 4.001050563314705e-05, "loss": 0.0654, "step": 27410 }, { "epoch": 12.743840074384007, "grad_norm": 0.6415730118751526, "learning_rate": 4.00732385422256e-05, "loss": 0.016, "step": 27412 }, { "epoch": 12.744769874476987, "grad_norm": 1.4131461381912231, "learning_rate": 4.013582268452505e-05, "loss": 0.0394, "step": 27414 }, { "epoch": 12.745699674569968, "grad_norm": 2.311501979827881, "learning_rate": 4.019825744236518e-05, "loss": 0.0304, "step": 27416 }, { "epoch": 12.746629474662948, "grad_norm": 0.9530320763587952, "learning_rate": 4.026054219954014e-05, "loss": 0.0335, "step": 27418 }, { "epoch": 12.747559274755927, "grad_norm": 0.8311817049980164, "learning_rate": 4.0322676341324524e-05, "loss": 0.0114, "step": 27420 }, { "epoch": 12.748489074848907, "grad_norm": 1.007142424583435, "learning_rate": 4.038465925447942e-05, "loss": 0.0221, "step": 27422 }, { "epoch": 12.749418874941888, "grad_norm": 1.3150819540023804, "learning_rate": 4.044649032725839e-05, "loss": 0.0157, "step": 27424 }, { "epoch": 12.750348675034868, "grad_norm": 1.3591312170028687, "learning_rate": 4.050816894941396e-05, "loss": 0.0151, "step": 27426 }, { "epoch": 12.751278475127847, "grad_norm": 2.084897756576538, "learning_rate": 4.0569694512202906e-05, "loss": 0.0347, "step": 27428 }, { "epoch": 12.752208275220827, "grad_norm": 1.075244426727295, "learning_rate": 4.063106640839261e-05, "loss": 0.015, "step": 27430 }, { "epoch": 12.753138075313807, "grad_norm": 1.4191460609436035, "learning_rate": 4.069228403226753e-05, "loss": 0.024, "step": 27432 }, { "epoch": 12.754067875406788, "grad_norm": 0.14342521131038666, "learning_rate": 4.075334677963427e-05, "loss": 0.0193, "step": 27434 }, { "epoch": 12.754997675499768, "grad_norm": 1.2597380876541138, "learning_rate": 4.081425404782818e-05, "loss": 0.0303, "step": 27436 }, { "epoch": 12.755927475592747, "grad_norm": 0.2983620762825012, "learning_rate": 4.087500523571898e-05, "loss": 0.0225, "step": 27438 }, { "epoch": 12.756857275685727, "grad_norm": 1.0992529392242432, "learning_rate": 4.093559974371724e-05, "loss": 0.028, "step": 27440 }, { "epoch": 12.757787075778708, "grad_norm": 1.620175838470459, "learning_rate": 4.09960369737795e-05, "loss": 0.0254, "step": 27442 }, { "epoch": 12.758716875871688, "grad_norm": 1.763675570487976, "learning_rate": 4.105631632941455e-05, "loss": 0.0243, "step": 27444 }, { "epoch": 12.759646675964667, "grad_norm": 2.2457146644592285, "learning_rate": 4.1116437215689885e-05, "loss": 0.0419, "step": 27446 }, { "epoch": 12.760576476057647, "grad_norm": 0.9490606784820557, "learning_rate": 4.117639903923623e-05, "loss": 0.0152, "step": 27448 }, { "epoch": 12.761506276150628, "grad_norm": 1.0594090223312378, "learning_rate": 4.12362012082546e-05, "loss": 0.0419, "step": 27450 }, { "epoch": 12.762436076243608, "grad_norm": 1.4532982110977173, "learning_rate": 4.1295843132522014e-05, "loss": 0.0217, "step": 27452 }, { "epoch": 12.763365876336588, "grad_norm": 1.4178736209869385, "learning_rate": 4.13553242233966e-05, "loss": 0.0247, "step": 27454 }, { "epoch": 12.764295676429567, "grad_norm": 1.071010947227478, "learning_rate": 4.1414643893824015e-05, "loss": 0.0442, "step": 27456 }, { "epoch": 12.765225476522549, "grad_norm": 1.0663219690322876, "learning_rate": 4.147380155834292e-05, "loss": 0.0158, "step": 27458 }, { "epoch": 12.766155276615528, "grad_norm": 2.236523151397705, "learning_rate": 4.153279663309132e-05, "loss": 0.0571, "step": 27460 }, { "epoch": 12.767085076708508, "grad_norm": 0.6889045834541321, "learning_rate": 4.159162853581153e-05, "loss": 0.0106, "step": 27462 }, { "epoch": 12.768014876801487, "grad_norm": 1.3563168048858643, "learning_rate": 4.165029668585626e-05, "loss": 0.0253, "step": 27464 }, { "epoch": 12.768944676894467, "grad_norm": 1.0530935525894165, "learning_rate": 4.1708800504194827e-05, "loss": 0.0509, "step": 27466 }, { "epoch": 12.769874476987448, "grad_norm": 2.724468946456909, "learning_rate": 4.176713941341803e-05, "loss": 0.0519, "step": 27468 }, { "epoch": 12.770804277080428, "grad_norm": 1.2580989599227905, "learning_rate": 4.182531283774439e-05, "loss": 0.0348, "step": 27470 }, { "epoch": 12.771734077173408, "grad_norm": 2.1214182376861572, "learning_rate": 4.188332020302556e-05, "loss": 0.0458, "step": 27472 }, { "epoch": 12.772663877266387, "grad_norm": 2.0684120655059814, "learning_rate": 4.1941160936752664e-05, "loss": 0.0339, "step": 27474 }, { "epoch": 12.773593677359369, "grad_norm": 1.4049898386001587, "learning_rate": 4.199883446806062e-05, "loss": 0.0138, "step": 27476 }, { "epoch": 12.774523477452348, "grad_norm": 1.2586886882781982, "learning_rate": 4.2056340227734954e-05, "loss": 0.0205, "step": 27478 }, { "epoch": 12.775453277545328, "grad_norm": 2.5026769638061523, "learning_rate": 4.211367764821729e-05, "loss": 0.0476, "step": 27480 }, { "epoch": 12.776383077638307, "grad_norm": 1.0676406621932983, "learning_rate": 4.2170846163610315e-05, "loss": 0.0185, "step": 27482 }, { "epoch": 12.777312877731287, "grad_norm": 0.8797820210456848, "learning_rate": 4.2227845209683716e-05, "loss": 0.0276, "step": 27484 }, { "epoch": 12.778242677824268, "grad_norm": 2.4237561225891113, "learning_rate": 4.2284674223880194e-05, "loss": 0.04, "step": 27486 }, { "epoch": 12.779172477917248, "grad_norm": 1.949557900428772, "learning_rate": 4.2341332645320184e-05, "loss": 0.0261, "step": 27488 }, { "epoch": 12.780102278010228, "grad_norm": 2.6296274662017822, "learning_rate": 4.239781991480795e-05, "loss": 0.0272, "step": 27490 }, { "epoch": 12.781032078103207, "grad_norm": 2.096654176712036, "learning_rate": 4.2454135474836804e-05, "loss": 0.0286, "step": 27492 }, { "epoch": 12.781961878196189, "grad_norm": 1.0305311679840088, "learning_rate": 4.251027876959517e-05, "loss": 0.024, "step": 27494 }, { "epoch": 12.782891678289168, "grad_norm": 0.5156529545783997, "learning_rate": 4.2566249244971266e-05, "loss": 0.0153, "step": 27496 }, { "epoch": 12.783821478382148, "grad_norm": 0.6731286644935608, "learning_rate": 4.2622046348558976e-05, "loss": 0.0271, "step": 27498 }, { "epoch": 12.784751278475127, "grad_norm": 1.9671269655227661, "learning_rate": 4.2677669529663785e-05, "loss": 0.0773, "step": 27500 }, { "epoch": 12.785681078568107, "grad_norm": 1.9341175556182861, "learning_rate": 4.273311823930696e-05, "loss": 0.0445, "step": 27502 }, { "epoch": 12.786610878661088, "grad_norm": 1.8218181133270264, "learning_rate": 4.278839193023228e-05, "loss": 0.0516, "step": 27504 }, { "epoch": 12.787540678754068, "grad_norm": 0.9875701069831848, "learning_rate": 4.2843490056910575e-05, "loss": 0.0321, "step": 27506 }, { "epoch": 12.788470478847048, "grad_norm": 0.3952265679836273, "learning_rate": 4.289841207554585e-05, "loss": 0.0249, "step": 27508 }, { "epoch": 12.789400278940027, "grad_norm": 2.09900164604187, "learning_rate": 4.295315744407981e-05, "loss": 0.0311, "step": 27510 }, { "epoch": 12.790330079033009, "grad_norm": 0.6599586606025696, "learning_rate": 4.300772562219766e-05, "loss": 0.0161, "step": 27512 }, { "epoch": 12.791259879125988, "grad_norm": 1.49622642993927, "learning_rate": 4.306211607133376e-05, "loss": 0.0696, "step": 27514 }, { "epoch": 12.792189679218968, "grad_norm": 1.413885235786438, "learning_rate": 4.311632825467621e-05, "loss": 0.0265, "step": 27516 }, { "epoch": 12.793119479311947, "grad_norm": 1.5458823442459106, "learning_rate": 4.317036163717252e-05, "loss": 0.0196, "step": 27518 }, { "epoch": 12.794049279404929, "grad_norm": 1.170237421989441, "learning_rate": 4.322421568553526e-05, "loss": 0.0197, "step": 27520 }, { "epoch": 12.794979079497908, "grad_norm": 1.9627810716629028, "learning_rate": 4.32778898682466e-05, "loss": 0.0275, "step": 27522 }, { "epoch": 12.795908879590888, "grad_norm": 2.0224597454071045, "learning_rate": 4.333138365556401e-05, "loss": 0.0484, "step": 27524 }, { "epoch": 12.796838679683868, "grad_norm": 1.985824704170227, "learning_rate": 4.338469651952532e-05, "loss": 0.0401, "step": 27526 }, { "epoch": 12.797768479776847, "grad_norm": 1.8947224617004395, "learning_rate": 4.343782793395442e-05, "loss": 0.0423, "step": 27528 }, { "epoch": 12.798698279869829, "grad_norm": 2.8104805946350098, "learning_rate": 4.3490777374465335e-05, "loss": 0.0778, "step": 27530 }, { "epoch": 12.799628079962808, "grad_norm": 1.7521196603775024, "learning_rate": 4.354354431846847e-05, "loss": 0.0607, "step": 27532 }, { "epoch": 12.800557880055788, "grad_norm": 1.5380083322525024, "learning_rate": 4.359612824517564e-05, "loss": 0.0313, "step": 27534 }, { "epoch": 12.801487680148767, "grad_norm": 2.133007764816284, "learning_rate": 4.364852863560459e-05, "loss": 0.0317, "step": 27536 }, { "epoch": 12.802417480241749, "grad_norm": 1.1874172687530518, "learning_rate": 4.370074497258451e-05, "loss": 0.0162, "step": 27538 }, { "epoch": 12.803347280334728, "grad_norm": 1.3189914226531982, "learning_rate": 4.3752776740761456e-05, "loss": 0.0492, "step": 27540 }, { "epoch": 12.804277080427708, "grad_norm": 2.761821985244751, "learning_rate": 4.3804623426602784e-05, "loss": 0.0479, "step": 27542 }, { "epoch": 12.805206880520688, "grad_norm": 1.7797057628631592, "learning_rate": 4.385628451840262e-05, "loss": 0.0424, "step": 27544 }, { "epoch": 12.806136680613669, "grad_norm": 2.704803228378296, "learning_rate": 4.390775950628674e-05, "loss": 0.054, "step": 27546 }, { "epoch": 12.807066480706649, "grad_norm": 0.7837488055229187, "learning_rate": 4.395904788221801e-05, "loss": 0.0331, "step": 27548 }, { "epoch": 12.807996280799628, "grad_norm": 1.2416906356811523, "learning_rate": 4.401014914000077e-05, "loss": 0.0283, "step": 27550 }, { "epoch": 12.808926080892608, "grad_norm": 0.8818725943565369, "learning_rate": 4.4061062775286106e-05, "loss": 0.0255, "step": 27552 }, { "epoch": 12.809855880985587, "grad_norm": 2.7890467643737793, "learning_rate": 4.411178828557722e-05, "loss": 0.0385, "step": 27554 }, { "epoch": 12.810785681078569, "grad_norm": 0.880877673625946, "learning_rate": 4.416232517023381e-05, "loss": 0.0239, "step": 27556 }, { "epoch": 12.811715481171548, "grad_norm": 0.7926762104034424, "learning_rate": 4.421267293047701e-05, "loss": 0.05, "step": 27558 }, { "epoch": 12.812645281264528, "grad_norm": 1.6009132862091064, "learning_rate": 4.426283106939473e-05, "loss": 0.0459, "step": 27560 }, { "epoch": 12.813575081357508, "grad_norm": 1.4781907796859741, "learning_rate": 4.431279909194664e-05, "loss": 0.0475, "step": 27562 }, { "epoch": 12.814504881450489, "grad_norm": 1.8653793334960938, "learning_rate": 4.436257650496839e-05, "loss": 0.0388, "step": 27564 }, { "epoch": 12.815434681543469, "grad_norm": 1.5886484384536743, "learning_rate": 4.441216281717692e-05, "loss": 0.0296, "step": 27566 }, { "epoch": 12.816364481636448, "grad_norm": 1.198035478591919, "learning_rate": 4.4461557539175574e-05, "loss": 0.0509, "step": 27568 }, { "epoch": 12.817294281729428, "grad_norm": 1.1618129014968872, "learning_rate": 4.451076018345825e-05, "loss": 0.0394, "step": 27570 }, { "epoch": 12.818224081822407, "grad_norm": 1.7122292518615723, "learning_rate": 4.455977026441463e-05, "loss": 0.0491, "step": 27572 }, { "epoch": 12.819153881915389, "grad_norm": 2.8709123134613037, "learning_rate": 4.46085872983352e-05, "loss": 0.0595, "step": 27574 }, { "epoch": 12.820083682008368, "grad_norm": 1.3911067247390747, "learning_rate": 4.4657210803415444e-05, "loss": 0.0358, "step": 27576 }, { "epoch": 12.821013482101348, "grad_norm": 1.4613419771194458, "learning_rate": 4.470564029976099e-05, "loss": 0.0643, "step": 27578 }, { "epoch": 12.821943282194328, "grad_norm": 0.9479755759239197, "learning_rate": 4.475387530939217e-05, "loss": 0.0277, "step": 27580 }, { "epoch": 12.822873082287309, "grad_norm": 0.6890069246292114, "learning_rate": 4.480191535624922e-05, "loss": 0.0368, "step": 27582 }, { "epoch": 12.823802882380289, "grad_norm": 1.5031853914260864, "learning_rate": 4.4849759966195955e-05, "loss": 0.0355, "step": 27584 }, { "epoch": 12.824732682473268, "grad_norm": 1.157843828201294, "learning_rate": 4.489740866702538e-05, "loss": 0.033, "step": 27586 }, { "epoch": 12.825662482566248, "grad_norm": 0.855131208896637, "learning_rate": 4.494486098846428e-05, "loss": 0.0487, "step": 27588 }, { "epoch": 12.826592282659227, "grad_norm": 1.2272040843963623, "learning_rate": 4.499211646217729e-05, "loss": 0.0172, "step": 27590 }, { "epoch": 12.827522082752209, "grad_norm": 2.0886247158050537, "learning_rate": 4.503917462177197e-05, "loss": 0.0483, "step": 27592 }, { "epoch": 12.828451882845188, "grad_norm": 0.8428955674171448, "learning_rate": 4.508603500280316e-05, "loss": 0.0213, "step": 27594 }, { "epoch": 12.829381682938168, "grad_norm": 0.45451733469963074, "learning_rate": 4.513269714277805e-05, "loss": 0.0175, "step": 27596 }, { "epoch": 12.830311483031148, "grad_norm": 2.649380922317505, "learning_rate": 4.5179160581160025e-05, "loss": 0.0419, "step": 27598 }, { "epoch": 12.831241283124129, "grad_norm": 1.151991605758667, "learning_rate": 4.522542485937362e-05, "loss": 0.0404, "step": 27600 }, { "epoch": 12.832171083217109, "grad_norm": 1.9627066850662231, "learning_rate": 4.52714895208093e-05, "loss": 0.0198, "step": 27602 }, { "epoch": 12.833100883310088, "grad_norm": 1.5137217044830322, "learning_rate": 4.531735411082734e-05, "loss": 0.0445, "step": 27604 }, { "epoch": 12.834030683403068, "grad_norm": 2.6942191123962402, "learning_rate": 4.5363018176762645e-05, "loss": 0.0605, "step": 27606 }, { "epoch": 12.83496048349605, "grad_norm": 1.5854650735855103, "learning_rate": 4.540848126792953e-05, "loss": 0.0343, "step": 27608 }, { "epoch": 12.835890283589029, "grad_norm": 1.3953288793563843, "learning_rate": 4.5453742935625644e-05, "loss": 0.0572, "step": 27610 }, { "epoch": 12.836820083682008, "grad_norm": 1.381622314453125, "learning_rate": 4.5498802733136395e-05, "loss": 0.0662, "step": 27612 }, { "epoch": 12.837749883774988, "grad_norm": 2.287569046020508, "learning_rate": 4.5543660215739766e-05, "loss": 0.0427, "step": 27614 }, { "epoch": 12.83867968386797, "grad_norm": 1.2336279153823853, "learning_rate": 4.558831494071072e-05, "loss": 0.0424, "step": 27616 }, { "epoch": 12.839609483960949, "grad_norm": 1.8809975385665894, "learning_rate": 4.563276646732505e-05, "loss": 0.0396, "step": 27618 }, { "epoch": 12.840539284053929, "grad_norm": 0.7303993701934814, "learning_rate": 4.567701435686403e-05, "loss": 0.0393, "step": 27620 }, { "epoch": 12.841469084146908, "grad_norm": 0.8941088318824768, "learning_rate": 4.572105817261906e-05, "loss": 0.0427, "step": 27622 }, { "epoch": 12.842398884239888, "grad_norm": 0.9285408854484558, "learning_rate": 4.576489747989534e-05, "loss": 0.037, "step": 27624 }, { "epoch": 12.84332868433287, "grad_norm": 1.4892704486846924, "learning_rate": 4.580853184601653e-05, "loss": 0.054, "step": 27626 }, { "epoch": 12.844258484425849, "grad_norm": 0.601624071598053, "learning_rate": 4.585196084032925e-05, "loss": 0.0396, "step": 27628 }, { "epoch": 12.845188284518828, "grad_norm": 1.2097715139389038, "learning_rate": 4.5895184034206745e-05, "loss": 0.031, "step": 27630 }, { "epoch": 12.846118084611808, "grad_norm": 0.9179078936576843, "learning_rate": 4.593820100105356e-05, "loss": 0.0372, "step": 27632 }, { "epoch": 12.84704788470479, "grad_norm": 1.6445387601852417, "learning_rate": 4.5981011316309495e-05, "loss": 0.0286, "step": 27634 }, { "epoch": 12.847977684797769, "grad_norm": 1.0175273418426514, "learning_rate": 4.60236145574542e-05, "loss": 0.0272, "step": 27636 }, { "epoch": 12.848907484890749, "grad_norm": 1.3172134160995483, "learning_rate": 4.6066010304010895e-05, "loss": 0.0325, "step": 27638 }, { "epoch": 12.849837284983728, "grad_norm": 1.596915364265442, "learning_rate": 4.61081981375504e-05, "loss": 0.0237, "step": 27640 }, { "epoch": 12.850767085076708, "grad_norm": 1.3789699077606201, "learning_rate": 4.6150177641696095e-05, "loss": 0.03, "step": 27642 }, { "epoch": 12.851696885169689, "grad_norm": 2.1815414428710938, "learning_rate": 4.6191948402127135e-05, "loss": 0.0332, "step": 27644 }, { "epoch": 12.852626685262669, "grad_norm": 0.2810129225254059, "learning_rate": 4.623351000658299e-05, "loss": 0.0123, "step": 27646 }, { "epoch": 12.853556485355648, "grad_norm": 1.0378224849700928, "learning_rate": 4.6274862044867304e-05, "loss": 0.0395, "step": 27648 }, { "epoch": 12.854486285448628, "grad_norm": 1.7658545970916748, "learning_rate": 4.631600410885234e-05, "loss": 0.0357, "step": 27650 }, { "epoch": 12.85541608554161, "grad_norm": 0.9990221261978149, "learning_rate": 4.635693579248243e-05, "loss": 0.026, "step": 27652 }, { "epoch": 12.856345885634589, "grad_norm": 1.847195029258728, "learning_rate": 4.6397656691778304e-05, "loss": 0.046, "step": 27654 }, { "epoch": 12.857275685727569, "grad_norm": 1.9778380393981934, "learning_rate": 4.64381664048413e-05, "loss": 0.0782, "step": 27656 }, { "epoch": 12.858205485820548, "grad_norm": 1.221004605293274, "learning_rate": 4.647846453185682e-05, "loss": 0.0196, "step": 27658 }, { "epoch": 12.859135285913528, "grad_norm": 1.3427026271820068, "learning_rate": 4.651855067509853e-05, "loss": 0.0551, "step": 27660 }, { "epoch": 12.860065086006509, "grad_norm": 2.276804208755493, "learning_rate": 4.655842443893255e-05, "loss": 0.0574, "step": 27662 }, { "epoch": 12.860994886099489, "grad_norm": 1.8403046131134033, "learning_rate": 4.6598085429820854e-05, "loss": 0.0259, "step": 27664 }, { "epoch": 12.861924686192468, "grad_norm": 1.7698243856430054, "learning_rate": 4.663753325632556e-05, "loss": 0.0483, "step": 27666 }, { "epoch": 12.862854486285448, "grad_norm": 1.6451481580734253, "learning_rate": 4.6676767529112266e-05, "loss": 0.0266, "step": 27668 }, { "epoch": 12.86378428637843, "grad_norm": 2.0333456993103027, "learning_rate": 4.67157878609548e-05, "loss": 0.0454, "step": 27670 }, { "epoch": 12.864714086471409, "grad_norm": 1.3627949953079224, "learning_rate": 4.675459386673819e-05, "loss": 0.045, "step": 27672 }, { "epoch": 12.865643886564389, "grad_norm": 1.8342503309249878, "learning_rate": 4.679318516346271e-05, "loss": 0.036, "step": 27674 }, { "epoch": 12.866573686657368, "grad_norm": 1.1446139812469482, "learning_rate": 4.683156137024801e-05, "loss": 0.0277, "step": 27676 }, { "epoch": 12.86750348675035, "grad_norm": 1.0326790809631348, "learning_rate": 4.686972210833635e-05, "loss": 0.0513, "step": 27678 }, { "epoch": 12.868433286843329, "grad_norm": 1.5741374492645264, "learning_rate": 4.6907667001096645e-05, "loss": 0.0383, "step": 27680 }, { "epoch": 12.869363086936309, "grad_norm": 0.940667986869812, "learning_rate": 4.6945395674028025e-05, "loss": 0.0133, "step": 27682 }, { "epoch": 12.870292887029288, "grad_norm": 2.0727922916412354, "learning_rate": 4.6982907754763906e-05, "loss": 0.0523, "step": 27684 }, { "epoch": 12.871222687122268, "grad_norm": 1.5725581645965576, "learning_rate": 4.702020287307511e-05, "loss": 0.0558, "step": 27686 }, { "epoch": 12.87215248721525, "grad_norm": 0.5377066135406494, "learning_rate": 4.705728066087378e-05, "loss": 0.0169, "step": 27688 }, { "epoch": 12.873082287308229, "grad_norm": 2.0767297744750977, "learning_rate": 4.709414075221731e-05, "loss": 0.0484, "step": 27690 }, { "epoch": 12.874012087401209, "grad_norm": 1.4420390129089355, "learning_rate": 4.7130782783311455e-05, "loss": 0.0312, "step": 27692 }, { "epoch": 12.874941887494188, "grad_norm": 1.5021907091140747, "learning_rate": 4.7167206392513924e-05, "loss": 0.0261, "step": 27694 }, { "epoch": 12.87587168758717, "grad_norm": 1.1763793230056763, "learning_rate": 4.7203411220338646e-05, "loss": 0.0232, "step": 27696 }, { "epoch": 12.876801487680149, "grad_norm": 0.13774347305297852, "learning_rate": 4.7239396909458506e-05, "loss": 0.0075, "step": 27698 }, { "epoch": 12.877731287773129, "grad_norm": 1.9898617267608643, "learning_rate": 4.7275163104709274e-05, "loss": 0.0589, "step": 27700 }, { "epoch": 12.878661087866108, "grad_norm": 1.3179001808166504, "learning_rate": 4.731070945309295e-05, "loss": 0.0483, "step": 27702 }, { "epoch": 12.87959088795909, "grad_norm": 1.0695797204971313, "learning_rate": 4.734603560378162e-05, "loss": 0.0106, "step": 27704 }, { "epoch": 12.88052068805207, "grad_norm": 1.5110604763031006, "learning_rate": 4.738114120812033e-05, "loss": 0.0211, "step": 27706 }, { "epoch": 12.881450488145049, "grad_norm": 1.3697208166122437, "learning_rate": 4.741602591963087e-05, "loss": 0.0417, "step": 27708 }, { "epoch": 12.882380288238028, "grad_norm": 1.8590233325958252, "learning_rate": 4.7450689394015386e-05, "loss": 0.0297, "step": 27710 }, { "epoch": 12.883310088331008, "grad_norm": 1.1557633876800537, "learning_rate": 4.748513128915928e-05, "loss": 0.0413, "step": 27712 }, { "epoch": 12.88423988842399, "grad_norm": 1.007444977760315, "learning_rate": 4.751935126513497e-05, "loss": 0.0233, "step": 27714 }, { "epoch": 12.885169688516969, "grad_norm": 2.0215723514556885, "learning_rate": 4.7553348984205036e-05, "loss": 0.0553, "step": 27716 }, { "epoch": 12.886099488609949, "grad_norm": 3.1735663414001465, "learning_rate": 4.758712411082586e-05, "loss": 0.0588, "step": 27718 }, { "epoch": 12.887029288702928, "grad_norm": 1.2956963777542114, "learning_rate": 4.7620676311650585e-05, "loss": 0.0532, "step": 27720 }, { "epoch": 12.88795908879591, "grad_norm": 0.7367342710494995, "learning_rate": 4.765400525553229e-05, "loss": 0.0115, "step": 27722 }, { "epoch": 12.88888888888889, "grad_norm": 1.710911750793457, "learning_rate": 4.7687110613527994e-05, "loss": 0.0319, "step": 27724 }, { "epoch": 12.889818688981869, "grad_norm": 1.7629797458648682, "learning_rate": 4.771999205890109e-05, "loss": 0.0479, "step": 27726 }, { "epoch": 12.890748489074848, "grad_norm": 1.0602115392684937, "learning_rate": 4.7752649267124926e-05, "loss": 0.0433, "step": 27728 }, { "epoch": 12.891678289167828, "grad_norm": 1.4624611139297485, "learning_rate": 4.7785081915886174e-05, "loss": 0.0432, "step": 27730 }, { "epoch": 12.89260808926081, "grad_norm": 1.73330557346344, "learning_rate": 4.781728968508764e-05, "loss": 0.0657, "step": 27732 }, { "epoch": 12.893537889353789, "grad_norm": 0.651055634021759, "learning_rate": 4.784927225685161e-05, "loss": 0.0386, "step": 27734 }, { "epoch": 12.894467689446769, "grad_norm": 0.7374534010887146, "learning_rate": 4.788102931552297e-05, "loss": 0.0186, "step": 27736 }, { "epoch": 12.895397489539748, "grad_norm": 2.091071605682373, "learning_rate": 4.791256054767249e-05, "loss": 0.0486, "step": 27738 }, { "epoch": 12.89632728963273, "grad_norm": 2.924236536026001, "learning_rate": 4.794386564209959e-05, "loss": 0.0383, "step": 27740 }, { "epoch": 12.89725708972571, "grad_norm": 1.589706540107727, "learning_rate": 4.797494428983554e-05, "loss": 0.0483, "step": 27742 }, { "epoch": 12.898186889818689, "grad_norm": 1.074147343635559, "learning_rate": 4.800579618414678e-05, "loss": 0.025, "step": 27744 }, { "epoch": 12.899116689911668, "grad_norm": 2.210986852645874, "learning_rate": 4.803642102053749e-05, "loss": 0.0466, "step": 27746 }, { "epoch": 12.900046490004648, "grad_norm": 1.6130521297454834, "learning_rate": 4.806681849675292e-05, "loss": 0.0257, "step": 27748 }, { "epoch": 12.90097629009763, "grad_norm": 2.0700905323028564, "learning_rate": 4.8096988312782234e-05, "loss": 0.035, "step": 27750 }, { "epoch": 12.901906090190609, "grad_norm": 1.9075061082839966, "learning_rate": 4.812693017086153e-05, "loss": 0.042, "step": 27752 }, { "epoch": 12.902835890283589, "grad_norm": 3.26133131980896, "learning_rate": 4.8156643775476766e-05, "loss": 0.0316, "step": 27754 }, { "epoch": 12.903765690376568, "grad_norm": 1.5006858110427856, "learning_rate": 4.818612883336658e-05, "loss": 0.0212, "step": 27756 }, { "epoch": 12.90469549046955, "grad_norm": 2.5239717960357666, "learning_rate": 4.821538505352551e-05, "loss": 0.0813, "step": 27758 }, { "epoch": 12.90562529056253, "grad_norm": 2.346414804458618, "learning_rate": 4.824441214720638e-05, "loss": 0.0591, "step": 27760 }, { "epoch": 12.906555090655509, "grad_norm": 1.2752505540847778, "learning_rate": 4.8273209827923425e-05, "loss": 0.0254, "step": 27762 }, { "epoch": 12.907484890748488, "grad_norm": 2.3695075511932373, "learning_rate": 4.830177781145532e-05, "loss": 0.0596, "step": 27764 }, { "epoch": 12.90841469084147, "grad_norm": 1.0085936784744263, "learning_rate": 4.833011581584751e-05, "loss": 0.0234, "step": 27766 }, { "epoch": 12.90934449093445, "grad_norm": 1.0653564929962158, "learning_rate": 4.835822356141536e-05, "loss": 0.0353, "step": 27768 }, { "epoch": 12.910274291027429, "grad_norm": 1.1982671022415161, "learning_rate": 4.838610077074669e-05, "loss": 0.0328, "step": 27770 }, { "epoch": 12.911204091120409, "grad_norm": 1.5272434949874878, "learning_rate": 4.841374716870483e-05, "loss": 0.0595, "step": 27772 }, { "epoch": 12.91213389121339, "grad_norm": 2.6742656230926514, "learning_rate": 4.8441162482431e-05, "loss": 0.0803, "step": 27774 }, { "epoch": 12.91306369130637, "grad_norm": 2.6699981689453125, "learning_rate": 4.84683464413469e-05, "loss": 0.0328, "step": 27776 }, { "epoch": 12.91399349139935, "grad_norm": 1.2846503257751465, "learning_rate": 4.849529877715805e-05, "loss": 0.0179, "step": 27778 }, { "epoch": 12.914923291492329, "grad_norm": 1.471457600593567, "learning_rate": 4.852201922385571e-05, "loss": 0.0552, "step": 27780 }, { "epoch": 12.915853091585308, "grad_norm": 2.4994895458221436, "learning_rate": 4.8548507517719795e-05, "loss": 0.0438, "step": 27782 }, { "epoch": 12.91678289167829, "grad_norm": 1.828446626663208, "learning_rate": 4.8574763397321654e-05, "loss": 0.0312, "step": 27784 }, { "epoch": 12.91771269177127, "grad_norm": 2.764120578765869, "learning_rate": 4.86007866035263e-05, "loss": 0.0648, "step": 27786 }, { "epoch": 12.918642491864249, "grad_norm": 0.6085748076438904, "learning_rate": 4.862657687949519e-05, "loss": 0.0275, "step": 27788 }, { "epoch": 12.919572291957229, "grad_norm": 1.6674716472625732, "learning_rate": 4.865213397068864e-05, "loss": 0.0395, "step": 27790 }, { "epoch": 12.92050209205021, "grad_norm": 1.8682351112365723, "learning_rate": 4.867745762486863e-05, "loss": 0.0419, "step": 27792 }, { "epoch": 12.92143189214319, "grad_norm": 2.028362274169922, "learning_rate": 4.8702547592100837e-05, "loss": 0.0579, "step": 27794 }, { "epoch": 12.92236169223617, "grad_norm": 2.3087387084960938, "learning_rate": 4.872740362475736e-05, "loss": 0.0143, "step": 27796 }, { "epoch": 12.923291492329149, "grad_norm": 2.3626368045806885, "learning_rate": 4.875202547751929e-05, "loss": 0.0424, "step": 27798 }, { "epoch": 12.924221292422128, "grad_norm": 1.0846123695373535, "learning_rate": 4.877641290737884e-05, "loss": 0.0323, "step": 27800 }, { "epoch": 12.92515109251511, "grad_norm": 1.5804299116134644, "learning_rate": 4.880056567364198e-05, "loss": 0.0452, "step": 27802 }, { "epoch": 12.92608089260809, "grad_norm": 2.308957099914551, "learning_rate": 4.88244835379305e-05, "loss": 0.032, "step": 27804 }, { "epoch": 12.927010692701069, "grad_norm": 1.5958404541015625, "learning_rate": 4.8848166264184885e-05, "loss": 0.0428, "step": 27806 }, { "epoch": 12.927940492794049, "grad_norm": 1.9926141500473022, "learning_rate": 4.887161361866614e-05, "loss": 0.0563, "step": 27808 }, { "epoch": 12.92887029288703, "grad_norm": 2.0858798027038574, "learning_rate": 4.8894825369958275e-05, "loss": 0.0318, "step": 27810 }, { "epoch": 12.92980009298001, "grad_norm": 1.8194446563720703, "learning_rate": 4.891780128897078e-05, "loss": 0.0267, "step": 27812 }, { "epoch": 12.93072989307299, "grad_norm": 0.7347739934921265, "learning_rate": 4.894054114894059e-05, "loss": 0.0379, "step": 27814 }, { "epoch": 12.931659693165969, "grad_norm": 1.5488927364349365, "learning_rate": 4.89630447254344e-05, "loss": 0.0519, "step": 27816 }, { "epoch": 12.932589493258948, "grad_norm": 2.7807319164276123, "learning_rate": 4.898531179635109e-05, "loss": 0.079, "step": 27818 }, { "epoch": 12.93351929335193, "grad_norm": 1.4961596727371216, "learning_rate": 4.900734214192359e-05, "loss": 0.0561, "step": 27820 }, { "epoch": 12.93444909344491, "grad_norm": 0.9013360142707825, "learning_rate": 4.902913554472132e-05, "loss": 0.0294, "step": 27822 }, { "epoch": 12.935378893537889, "grad_norm": 2.2751920223236084, "learning_rate": 4.905069178965215e-05, "loss": 0.0428, "step": 27824 }, { "epoch": 12.936308693630869, "grad_norm": 0.7656459808349609, "learning_rate": 4.90720106639647e-05, "loss": 0.0208, "step": 27826 }, { "epoch": 12.93723849372385, "grad_norm": 1.1123390197753906, "learning_rate": 4.909309195725026e-05, "loss": 0.0251, "step": 27828 }, { "epoch": 12.93816829381683, "grad_norm": 2.83064866065979, "learning_rate": 4.911393546144498e-05, "loss": 0.0556, "step": 27830 }, { "epoch": 12.93909809390981, "grad_norm": 1.6680562496185303, "learning_rate": 4.9134540970831886e-05, "loss": 0.065, "step": 27832 }, { "epoch": 12.940027894002789, "grad_norm": 1.6691349744796753, "learning_rate": 4.915490828204291e-05, "loss": 0.0394, "step": 27834 }, { "epoch": 12.94095769409577, "grad_norm": 1.1167018413543701, "learning_rate": 4.917503719406089e-05, "loss": 0.027, "step": 27836 }, { "epoch": 12.94188749418875, "grad_norm": 0.7769821286201477, "learning_rate": 4.919492750822166e-05, "loss": 0.0196, "step": 27838 }, { "epoch": 12.94281729428173, "grad_norm": 0.9762880206108093, "learning_rate": 4.921457902821581e-05, "loss": 0.0314, "step": 27840 }, { "epoch": 12.943747094374709, "grad_norm": 1.7199453115463257, "learning_rate": 4.923399156009077e-05, "loss": 0.046, "step": 27842 }, { "epoch": 12.944676894467689, "grad_norm": 1.4714666604995728, "learning_rate": 4.9253164912252654e-05, "loss": 0.0309, "step": 27844 }, { "epoch": 12.94560669456067, "grad_norm": 1.6373026371002197, "learning_rate": 4.927209889546828e-05, "loss": 0.046, "step": 27846 }, { "epoch": 12.94653649465365, "grad_norm": 1.393334150314331, "learning_rate": 4.929079332286687e-05, "loss": 0.0352, "step": 27848 }, { "epoch": 12.94746629474663, "grad_norm": 2.059328556060791, "learning_rate": 4.93092480099419e-05, "loss": 0.045, "step": 27850 }, { "epoch": 12.948396094839609, "grad_norm": 0.6530162692070007, "learning_rate": 4.9327462774553166e-05, "loss": 0.0172, "step": 27852 }, { "epoch": 12.94932589493259, "grad_norm": 1.8516405820846558, "learning_rate": 4.934543743692823e-05, "loss": 0.0273, "step": 27854 }, { "epoch": 12.95025569502557, "grad_norm": 1.017082691192627, "learning_rate": 4.936317181966448e-05, "loss": 0.0178, "step": 27856 }, { "epoch": 12.95118549511855, "grad_norm": 1.8615294694900513, "learning_rate": 4.93806657477306e-05, "loss": 0.0451, "step": 27858 }, { "epoch": 12.952115295211529, "grad_norm": 1.033513069152832, "learning_rate": 4.939791904846871e-05, "loss": 0.027, "step": 27860 }, { "epoch": 12.95304509530451, "grad_norm": 1.5555225610733032, "learning_rate": 4.941493155159565e-05, "loss": 0.0414, "step": 27862 }, { "epoch": 12.95397489539749, "grad_norm": 1.3601830005645752, "learning_rate": 4.943170308920485e-05, "loss": 0.0346, "step": 27864 }, { "epoch": 12.95490469549047, "grad_norm": 1.9884395599365234, "learning_rate": 4.944823349576807e-05, "loss": 0.057, "step": 27866 }, { "epoch": 12.95583449558345, "grad_norm": 2.558333396911621, "learning_rate": 4.946452260813684e-05, "loss": 0.0501, "step": 27868 }, { "epoch": 12.956764295676429, "grad_norm": 2.067122459411621, "learning_rate": 4.948057026554416e-05, "loss": 0.0482, "step": 27870 }, { "epoch": 12.95769409576941, "grad_norm": 1.5883982181549072, "learning_rate": 4.9496376309606206e-05, "loss": 0.0285, "step": 27872 }, { "epoch": 12.95862389586239, "grad_norm": 2.367523431777954, "learning_rate": 4.951194058432365e-05, "loss": 0.0371, "step": 27874 }, { "epoch": 12.95955369595537, "grad_norm": 1.6014004945755005, "learning_rate": 4.952726293608341e-05, "loss": 0.026, "step": 27876 }, { "epoch": 12.960483496048349, "grad_norm": 0.8632510304450989, "learning_rate": 4.954234321366e-05, "loss": 0.0255, "step": 27878 }, { "epoch": 12.96141329614133, "grad_norm": 1.1718134880065918, "learning_rate": 4.955718126821726e-05, "loss": 0.0222, "step": 27880 }, { "epoch": 12.96234309623431, "grad_norm": 1.5339078903198242, "learning_rate": 4.957177695330952e-05, "loss": 0.0196, "step": 27882 }, { "epoch": 12.96327289632729, "grad_norm": 1.556630253791809, "learning_rate": 4.958613012488329e-05, "loss": 0.0412, "step": 27884 }, { "epoch": 12.96420269642027, "grad_norm": 1.4277480840682983, "learning_rate": 4.9600240641278544e-05, "loss": 0.0229, "step": 27886 }, { "epoch": 12.965132496513249, "grad_norm": 1.9224132299423218, "learning_rate": 4.961410836323019e-05, "loss": 0.0295, "step": 27888 }, { "epoch": 12.96606229660623, "grad_norm": 1.2823123931884766, "learning_rate": 4.9627733153869406e-05, "loss": 0.0272, "step": 27890 }, { "epoch": 12.96699209669921, "grad_norm": 1.619602918624878, "learning_rate": 4.964111487872499e-05, "loss": 0.0441, "step": 27892 }, { "epoch": 12.96792189679219, "grad_norm": 2.0918867588043213, "learning_rate": 4.965425340572476e-05, "loss": 0.0728, "step": 27894 }, { "epoch": 12.968851696885169, "grad_norm": 1.6534318923950195, "learning_rate": 4.966714860519675e-05, "loss": 0.0368, "step": 27896 }, { "epoch": 12.96978149697815, "grad_norm": 0.7616715431213379, "learning_rate": 4.967980034987051e-05, "loss": 0.0392, "step": 27898 }, { "epoch": 12.97071129707113, "grad_norm": 0.41309598088264465, "learning_rate": 4.969220851487848e-05, "loss": 0.0136, "step": 27900 }, { "epoch": 12.97164109716411, "grad_norm": 4.082530975341797, "learning_rate": 4.9704372977757055e-05, "loss": 0.1015, "step": 27902 }, { "epoch": 12.97257089725709, "grad_norm": 1.0450676679611206, "learning_rate": 4.971629361844787e-05, "loss": 0.0805, "step": 27904 }, { "epoch": 12.973500697350069, "grad_norm": 2.152336359024048, "learning_rate": 4.972797031929908e-05, "loss": 0.051, "step": 27906 }, { "epoch": 12.97443049744305, "grad_norm": 1.8711637258529663, "learning_rate": 4.97394029650663e-05, "loss": 0.038, "step": 27908 }, { "epoch": 12.97536029753603, "grad_norm": 1.5109829902648926, "learning_rate": 4.975059144291398e-05, "loss": 0.028, "step": 27910 }, { "epoch": 12.97629009762901, "grad_norm": 1.349045991897583, "learning_rate": 4.9761535642416333e-05, "loss": 0.0387, "step": 27912 }, { "epoch": 12.977219897721989, "grad_norm": 0.8895775079727173, "learning_rate": 4.977223545555853e-05, "loss": 0.0547, "step": 27914 }, { "epoch": 12.97814969781497, "grad_norm": 1.225744366645813, "learning_rate": 4.978269077673773e-05, "loss": 0.0228, "step": 27916 }, { "epoch": 12.97907949790795, "grad_norm": 1.3991420269012451, "learning_rate": 4.9792901502764116e-05, "loss": 0.0399, "step": 27918 }, { "epoch": 12.98000929800093, "grad_norm": 2.020252227783203, "learning_rate": 4.980286753286199e-05, "loss": 0.0541, "step": 27920 }, { "epoch": 12.98093909809391, "grad_norm": 1.6604344844818115, "learning_rate": 4.981258876867065e-05, "loss": 0.0393, "step": 27922 }, { "epoch": 12.98186889818689, "grad_norm": 2.3287711143493652, "learning_rate": 4.982206511424538e-05, "loss": 0.0721, "step": 27924 }, { "epoch": 12.98279869827987, "grad_norm": 1.0311757326126099, "learning_rate": 4.9831296476058524e-05, "loss": 0.0214, "step": 27926 }, { "epoch": 12.98372849837285, "grad_norm": 2.4873878955841064, "learning_rate": 4.984028276300025e-05, "loss": 0.0637, "step": 27928 }, { "epoch": 12.98465829846583, "grad_norm": 1.890254259109497, "learning_rate": 4.9849023886379525e-05, "loss": 0.0402, "step": 27930 }, { "epoch": 12.98558809855881, "grad_norm": 3.3358912467956543, "learning_rate": 4.985751975992498e-05, "loss": 0.0632, "step": 27932 }, { "epoch": 12.98651789865179, "grad_norm": 1.575071096420288, "learning_rate": 4.986577029978582e-05, "loss": 0.0264, "step": 27934 }, { "epoch": 12.98744769874477, "grad_norm": 2.297244071960449, "learning_rate": 4.987377542453252e-05, "loss": 0.0845, "step": 27936 }, { "epoch": 12.98837749883775, "grad_norm": 1.048858404159546, "learning_rate": 4.9881535055157704e-05, "loss": 0.0268, "step": 27938 }, { "epoch": 12.98930729893073, "grad_norm": 1.7096360921859741, "learning_rate": 4.9889049115077005e-05, "loss": 0.0296, "step": 27940 }, { "epoch": 12.99023709902371, "grad_norm": 1.445618987083435, "learning_rate": 4.989631753012965e-05, "loss": 0.0298, "step": 27942 }, { "epoch": 12.99116689911669, "grad_norm": 1.0022653341293335, "learning_rate": 4.990334022857932e-05, "loss": 0.0425, "step": 27944 }, { "epoch": 12.99209669920967, "grad_norm": 1.2211614847183228, "learning_rate": 4.99101171411148e-05, "loss": 0.0459, "step": 27946 }, { "epoch": 12.99302649930265, "grad_norm": 2.705132246017456, "learning_rate": 4.991664820085074e-05, "loss": 0.0894, "step": 27948 }, { "epoch": 12.99395629939563, "grad_norm": 1.8971410989761353, "learning_rate": 4.9922933343328196e-05, "loss": 0.0635, "step": 27950 }, { "epoch": 12.99488609948861, "grad_norm": 1.885061264038086, "learning_rate": 4.9928972506515334e-05, "loss": 0.037, "step": 27952 }, { "epoch": 12.99581589958159, "grad_norm": 0.8041380643844604, "learning_rate": 4.9934765630808075e-05, "loss": 0.0477, "step": 27954 }, { "epoch": 12.99674569967457, "grad_norm": 1.911424160003662, "learning_rate": 4.994031265903061e-05, "loss": 0.0346, "step": 27956 }, { "epoch": 12.99767549976755, "grad_norm": 1.0429607629776, "learning_rate": 4.9945613536436e-05, "loss": 0.0287, "step": 27958 }, { "epoch": 12.99860529986053, "grad_norm": 1.1847106218338013, "learning_rate": 4.995066821070675e-05, "loss": 0.027, "step": 27960 }, { "epoch": 12.99953509995351, "grad_norm": 1.5136761665344238, "learning_rate": 4.995547663195526e-05, "loss": 0.046, "step": 27962 }, { "epoch": 13.00046490004649, "grad_norm": 1.201892614364624, "learning_rate": 4.9960038752724346e-05, "loss": 0.057, "step": 27964 }, { "epoch": 13.00139470013947, "grad_norm": 1.391868233680725, "learning_rate": 4.99643545279877e-05, "loss": 0.0505, "step": 27966 }, { "epoch": 13.00232450023245, "grad_norm": 1.5294487476348877, "learning_rate": 4.996842391515041e-05, "loss": 0.0562, "step": 27968 }, { "epoch": 13.00325430032543, "grad_norm": 2.3918910026550293, "learning_rate": 4.9972246874049234e-05, "loss": 0.0452, "step": 27970 }, { "epoch": 13.00418410041841, "grad_norm": 1.644007682800293, "learning_rate": 4.9975823366953104e-05, "loss": 0.0389, "step": 27972 }, { "epoch": 13.00511390051139, "grad_norm": 1.8882653713226318, "learning_rate": 4.9979153358563496e-05, "loss": 0.0342, "step": 27974 }, { "epoch": 13.00604370060437, "grad_norm": 1.9804222583770752, "learning_rate": 4.9982236816014725e-05, "loss": 0.0281, "step": 27976 }, { "epoch": 13.00697350069735, "grad_norm": 1.471830129623413, "learning_rate": 4.998507370887433e-05, "loss": 0.0431, "step": 27978 }, { "epoch": 13.00790330079033, "grad_norm": 1.5176913738250732, "learning_rate": 4.998766400914328e-05, "loss": 0.0316, "step": 27980 }, { "epoch": 13.00883310088331, "grad_norm": 1.59811532497406, "learning_rate": 4.9990007691256405e-05, "loss": 0.0415, "step": 27982 }, { "epoch": 13.00976290097629, "grad_norm": 3.1598424911499023, "learning_rate": 4.9992104732082495e-05, "loss": 0.0621, "step": 27984 }, { "epoch": 13.01069270106927, "grad_norm": 1.813368320465088, "learning_rate": 4.99939551109246e-05, "loss": 0.035, "step": 27986 }, { "epoch": 13.01162250116225, "grad_norm": 2.3723371028900146, "learning_rate": 4.999555880952023e-05, "loss": 0.0385, "step": 27988 }, { "epoch": 13.01255230125523, "grad_norm": 0.6351338624954224, "learning_rate": 4.9996915812041525e-05, "loss": 0.0188, "step": 27990 }, { "epoch": 13.01348210134821, "grad_norm": 1.2814247608184814, "learning_rate": 4.9998026105095405e-05, "loss": 0.0221, "step": 27992 }, { "epoch": 13.014411901441191, "grad_norm": 2.2211952209472656, "learning_rate": 4.999888967772375e-05, "loss": 0.0281, "step": 27994 }, { "epoch": 13.01534170153417, "grad_norm": 1.9944792985916138, "learning_rate": 4.999950652140343e-05, "loss": 0.0375, "step": 27996 }, { "epoch": 13.01627150162715, "grad_norm": 3.3113443851470947, "learning_rate": 4.999987663004647e-05, "loss": 0.0708, "step": 27998 }, { "epoch": 13.01720130172013, "grad_norm": 1.8317290544509888, "learning_rate": 5.000000000000002e-05, "loss": 0.0228, "step": 28000 }, { "epoch": 13.01720130172013, "eval_cer": 0.15083374139841757, "eval_loss": 0.2827129662036896, "eval_runtime": 401.4909, "eval_samples_per_second": 31.617, "eval_steps_per_second": 0.989, "step": 28000 }, { "epoch": 13.01813110181311, "grad_norm": 1.6754473447799683, "learning_rate": 4.999987663004648e-05, "loss": 0.022, "step": 28002 }, { "epoch": 13.01906090190609, "grad_norm": 0.911959707736969, "learning_rate": 4.999950652140345e-05, "loss": 0.0233, "step": 28004 }, { "epoch": 13.01999070199907, "grad_norm": 1.5722310543060303, "learning_rate": 4.999888967772377e-05, "loss": 0.0378, "step": 28006 }, { "epoch": 13.02092050209205, "grad_norm": 1.2359367609024048, "learning_rate": 4.999802610509543e-05, "loss": 0.0313, "step": 28008 }, { "epoch": 13.02185030218503, "grad_norm": 1.1785792112350464, "learning_rate": 4.9996915812041545e-05, "loss": 0.0293, "step": 28010 }, { "epoch": 13.022780102278011, "grad_norm": 1.1961402893066406, "learning_rate": 4.9995558809520255e-05, "loss": 0.0266, "step": 28012 }, { "epoch": 13.02370990237099, "grad_norm": 1.2423338890075684, "learning_rate": 4.9993955110924636e-05, "loss": 0.0263, "step": 28014 }, { "epoch": 13.02463970246397, "grad_norm": 1.4640910625457764, "learning_rate": 4.999210473208253e-05, "loss": 0.0246, "step": 28016 }, { "epoch": 13.02556950255695, "grad_norm": 1.4059631824493408, "learning_rate": 4.999000769125643e-05, "loss": 0.0275, "step": 28018 }, { "epoch": 13.02649930264993, "grad_norm": 1.4367870092391968, "learning_rate": 4.998766400914332e-05, "loss": 0.0469, "step": 28020 }, { "epoch": 13.02742910274291, "grad_norm": 1.7687573432922363, "learning_rate": 4.9985073708874335e-05, "loss": 0.0288, "step": 28022 }, { "epoch": 13.02835890283589, "grad_norm": 2.0969879627227783, "learning_rate": 4.998223681601474e-05, "loss": 0.0449, "step": 28024 }, { "epoch": 13.02928870292887, "grad_norm": 0.9722608923912048, "learning_rate": 4.9979153358563516e-05, "loss": 0.0167, "step": 28026 }, { "epoch": 13.03021850302185, "grad_norm": 2.057262897491455, "learning_rate": 4.997582336695313e-05, "loss": 0.0532, "step": 28028 }, { "epoch": 13.031148303114831, "grad_norm": 0.9914546608924866, "learning_rate": 4.997224687404926e-05, "loss": 0.0202, "step": 28030 }, { "epoch": 13.03207810320781, "grad_norm": 0.9130971431732178, "learning_rate": 4.996842391515045e-05, "loss": 0.0151, "step": 28032 }, { "epoch": 13.03300790330079, "grad_norm": 1.6080942153930664, "learning_rate": 4.996435452798776e-05, "loss": 0.0403, "step": 28034 }, { "epoch": 13.03393770339377, "grad_norm": 2.186680316925049, "learning_rate": 4.9960038752724394e-05, "loss": 0.0585, "step": 28036 }, { "epoch": 13.034867503486751, "grad_norm": 1.8291945457458496, "learning_rate": 4.995547663195531e-05, "loss": 0.0545, "step": 28038 }, { "epoch": 13.03579730357973, "grad_norm": 0.9788724780082703, "learning_rate": 4.9950668210706806e-05, "loss": 0.0204, "step": 28040 }, { "epoch": 13.03672710367271, "grad_norm": 0.4876369833946228, "learning_rate": 4.9945613536436056e-05, "loss": 0.0173, "step": 28042 }, { "epoch": 13.03765690376569, "grad_norm": 1.8826631307601929, "learning_rate": 4.994031265903065e-05, "loss": 0.0562, "step": 28044 }, { "epoch": 13.03858670385867, "grad_norm": 1.3498501777648926, "learning_rate": 4.993476563080812e-05, "loss": 0.0327, "step": 28046 }, { "epoch": 13.039516503951651, "grad_norm": 2.6955630779266357, "learning_rate": 4.992897250651537e-05, "loss": 0.0438, "step": 28048 }, { "epoch": 13.04044630404463, "grad_norm": 1.316648006439209, "learning_rate": 4.992293334332821e-05, "loss": 0.0239, "step": 28050 }, { "epoch": 13.04137610413761, "grad_norm": 2.627760887145996, "learning_rate": 4.991664820085076e-05, "loss": 0.0518, "step": 28052 }, { "epoch": 13.04230590423059, "grad_norm": 0.8880636096000671, "learning_rate": 4.991011714111483e-05, "loss": 0.0206, "step": 28054 }, { "epoch": 13.043235704323571, "grad_norm": 1.5335288047790527, "learning_rate": 4.990334022857933e-05, "loss": 0.0655, "step": 28056 }, { "epoch": 13.04416550441655, "grad_norm": 1.4494279623031616, "learning_rate": 4.989631753012966e-05, "loss": 0.0319, "step": 28058 }, { "epoch": 13.04509530450953, "grad_norm": 1.9990355968475342, "learning_rate": 4.988904911507703e-05, "loss": 0.0421, "step": 28060 }, { "epoch": 13.04602510460251, "grad_norm": 1.038358449935913, "learning_rate": 4.988153505515773e-05, "loss": 0.0302, "step": 28062 }, { "epoch": 13.046954904695491, "grad_norm": 2.2839274406433105, "learning_rate": 4.987377542453252e-05, "loss": 0.0459, "step": 28064 }, { "epoch": 13.047884704788471, "grad_norm": 2.036315679550171, "learning_rate": 4.9865770299785815e-05, "loss": 0.0722, "step": 28066 }, { "epoch": 13.04881450488145, "grad_norm": 1.098332166671753, "learning_rate": 4.9857519759924994e-05, "loss": 0.041, "step": 28068 }, { "epoch": 13.04974430497443, "grad_norm": 2.526604652404785, "learning_rate": 4.984902388637951e-05, "loss": 0.0527, "step": 28070 }, { "epoch": 13.05067410506741, "grad_norm": 0.7682260870933533, "learning_rate": 4.984028276300023e-05, "loss": 0.0131, "step": 28072 }, { "epoch": 13.051603905160391, "grad_norm": 1.2828270196914673, "learning_rate": 4.983129647605852e-05, "loss": 0.0321, "step": 28074 }, { "epoch": 13.05253370525337, "grad_norm": 1.0892393589019775, "learning_rate": 4.9822065114245354e-05, "loss": 0.0219, "step": 28076 }, { "epoch": 13.05346350534635, "grad_norm": 2.292538642883301, "learning_rate": 4.9812588768670594e-05, "loss": 0.0691, "step": 28078 }, { "epoch": 13.05439330543933, "grad_norm": 0.6870150566101074, "learning_rate": 4.980286753286196e-05, "loss": 0.0422, "step": 28080 }, { "epoch": 13.055323105532311, "grad_norm": 1.2807823419570923, "learning_rate": 4.979290150276409e-05, "loss": 0.0242, "step": 28082 }, { "epoch": 13.056252905625291, "grad_norm": 1.2450393438339233, "learning_rate": 4.978269077673767e-05, "loss": 0.0236, "step": 28084 }, { "epoch": 13.05718270571827, "grad_norm": 2.1953020095825195, "learning_rate": 4.977223545555846e-05, "loss": 0.06, "step": 28086 }, { "epoch": 13.05811250581125, "grad_norm": 1.41026771068573, "learning_rate": 4.976153564241629e-05, "loss": 0.0264, "step": 28088 }, { "epoch": 13.05904230590423, "grad_norm": 1.259732961654663, "learning_rate": 4.975059144291395e-05, "loss": 0.0346, "step": 28090 }, { "epoch": 13.059972105997211, "grad_norm": 1.7445030212402344, "learning_rate": 4.9739402965066276e-05, "loss": 0.0414, "step": 28092 }, { "epoch": 13.06090190609019, "grad_norm": 1.022855520248413, "learning_rate": 4.9727970319299065e-05, "loss": 0.0589, "step": 28094 }, { "epoch": 13.06183170618317, "grad_norm": 0.7035691738128662, "learning_rate": 4.9716293618447863e-05, "loss": 0.0162, "step": 28096 }, { "epoch": 13.06276150627615, "grad_norm": 1.1555126905441284, "learning_rate": 4.970437297775702e-05, "loss": 0.0232, "step": 28098 }, { "epoch": 13.063691306369131, "grad_norm": 0.6710932850837708, "learning_rate": 4.969220851487848e-05, "loss": 0.021, "step": 28100 }, { "epoch": 13.064621106462111, "grad_norm": 1.01714026927948, "learning_rate": 4.9679800349870515e-05, "loss": 0.0335, "step": 28102 }, { "epoch": 13.06555090655509, "grad_norm": 1.495952844619751, "learning_rate": 4.96671486051967e-05, "loss": 0.0364, "step": 28104 }, { "epoch": 13.06648070664807, "grad_norm": 1.4414435625076294, "learning_rate": 4.965425340572471e-05, "loss": 0.035, "step": 28106 }, { "epoch": 13.067410506741052, "grad_norm": 1.6323626041412354, "learning_rate": 4.964111487872496e-05, "loss": 0.0245, "step": 28108 }, { "epoch": 13.068340306834031, "grad_norm": 0.4368002712726593, "learning_rate": 4.9627733153869345e-05, "loss": 0.0113, "step": 28110 }, { "epoch": 13.06927010692701, "grad_norm": 2.0852928161621094, "learning_rate": 4.961410836323013e-05, "loss": 0.0374, "step": 28112 }, { "epoch": 13.07019990701999, "grad_norm": 0.9262934923171997, "learning_rate": 4.9600240641278517e-05, "loss": 0.0228, "step": 28114 }, { "epoch": 13.07112970711297, "grad_norm": 2.621856689453125, "learning_rate": 4.9586130124883256e-05, "loss": 0.0506, "step": 28116 }, { "epoch": 13.072059507205951, "grad_norm": 0.6562598347663879, "learning_rate": 4.957177695330949e-05, "loss": 0.0112, "step": 28118 }, { "epoch": 13.072989307298931, "grad_norm": 1.4467699527740479, "learning_rate": 4.955718126821721e-05, "loss": 0.0233, "step": 28120 }, { "epoch": 13.07391910739191, "grad_norm": 1.1704630851745605, "learning_rate": 4.954234321366e-05, "loss": 0.0235, "step": 28122 }, { "epoch": 13.07484890748489, "grad_norm": 1.3858438730239868, "learning_rate": 4.952726293608337e-05, "loss": 0.0194, "step": 28124 }, { "epoch": 13.075778707577872, "grad_norm": 1.6363074779510498, "learning_rate": 4.951194058432362e-05, "loss": 0.0388, "step": 28126 }, { "epoch": 13.076708507670851, "grad_norm": 2.3233649730682373, "learning_rate": 4.9496376309606206e-05, "loss": 0.0321, "step": 28128 }, { "epoch": 13.07763830776383, "grad_norm": 1.0644077062606812, "learning_rate": 4.948057026554417e-05, "loss": 0.0182, "step": 28130 }, { "epoch": 13.07856810785681, "grad_norm": 1.4522144794464111, "learning_rate": 4.946452260813678e-05, "loss": 0.0225, "step": 28132 }, { "epoch": 13.07949790794979, "grad_norm": 1.2382636070251465, "learning_rate": 4.9448233495768054e-05, "loss": 0.0385, "step": 28134 }, { "epoch": 13.080427708042771, "grad_norm": 1.7106035947799683, "learning_rate": 4.943170308920483e-05, "loss": 0.0339, "step": 28136 }, { "epoch": 13.081357508135751, "grad_norm": 1.0044556856155396, "learning_rate": 4.9414931551595606e-05, "loss": 0.0393, "step": 28138 }, { "epoch": 13.08228730822873, "grad_norm": 2.097350597381592, "learning_rate": 4.939791904846867e-05, "loss": 0.0275, "step": 28140 }, { "epoch": 13.08321710832171, "grad_norm": 1.4431825876235962, "learning_rate": 4.9380665747730596e-05, "loss": 0.0466, "step": 28142 }, { "epoch": 13.084146908414692, "grad_norm": 1.3731486797332764, "learning_rate": 4.9363171819664434e-05, "loss": 0.0314, "step": 28144 }, { "epoch": 13.085076708507671, "grad_norm": 0.2664353847503662, "learning_rate": 4.934543743692821e-05, "loss": 0.0133, "step": 28146 }, { "epoch": 13.08600650860065, "grad_norm": 1.742669939994812, "learning_rate": 4.932746277455319e-05, "loss": 0.0869, "step": 28148 }, { "epoch": 13.08693630869363, "grad_norm": 2.4289705753326416, "learning_rate": 4.9309248009941934e-05, "loss": 0.0531, "step": 28150 }, { "epoch": 13.087866108786612, "grad_norm": 1.3689637184143066, "learning_rate": 4.9290793322866855e-05, "loss": 0.0299, "step": 28152 }, { "epoch": 13.088795908879591, "grad_norm": 0.5503124594688416, "learning_rate": 4.927209889546827e-05, "loss": 0.0352, "step": 28154 }, { "epoch": 13.089725708972571, "grad_norm": 0.7023043632507324, "learning_rate": 4.925316491225268e-05, "loss": 0.0132, "step": 28156 }, { "epoch": 13.09065550906555, "grad_norm": 0.8216679692268372, "learning_rate": 4.9233991560090716e-05, "loss": 0.0244, "step": 28158 }, { "epoch": 13.09158530915853, "grad_norm": 1.6544378995895386, "learning_rate": 4.9214579028215756e-05, "loss": 0.025, "step": 28160 }, { "epoch": 13.092515109251512, "grad_norm": 1.242537021636963, "learning_rate": 4.9194927508221645e-05, "loss": 0.0215, "step": 28162 }, { "epoch": 13.093444909344491, "grad_norm": 2.278615713119507, "learning_rate": 4.9175037194060876e-05, "loss": 0.0529, "step": 28164 }, { "epoch": 13.09437470943747, "grad_norm": 0.7244308590888977, "learning_rate": 4.915490828204286e-05, "loss": 0.0212, "step": 28166 }, { "epoch": 13.09530450953045, "grad_norm": 1.45523202419281, "learning_rate": 4.913454097083187e-05, "loss": 0.029, "step": 28168 }, { "epoch": 13.096234309623432, "grad_norm": 1.0071874856948853, "learning_rate": 4.911393546144497e-05, "loss": 0.0225, "step": 28170 }, { "epoch": 13.097164109716411, "grad_norm": 0.6369080543518066, "learning_rate": 4.9093091957250245e-05, "loss": 0.0106, "step": 28172 }, { "epoch": 13.098093909809391, "grad_norm": 2.6247198581695557, "learning_rate": 4.907201066396468e-05, "loss": 0.0275, "step": 28174 }, { "epoch": 13.09902370990237, "grad_norm": 1.463768482208252, "learning_rate": 4.905069178965218e-05, "loss": 0.0243, "step": 28176 }, { "epoch": 13.09995350999535, "grad_norm": 1.7828418016433716, "learning_rate": 4.902913554472132e-05, "loss": 0.0576, "step": 28178 }, { "epoch": 13.100883310088332, "grad_norm": 2.517305374145508, "learning_rate": 4.900734214192359e-05, "loss": 0.0448, "step": 28180 }, { "epoch": 13.101813110181311, "grad_norm": 1.8224928379058838, "learning_rate": 4.898531179635113e-05, "loss": 0.0677, "step": 28182 }, { "epoch": 13.10274291027429, "grad_norm": 2.8303160667419434, "learning_rate": 4.8963044725434435e-05, "loss": 0.0536, "step": 28184 }, { "epoch": 13.10367271036727, "grad_norm": 1.1660228967666626, "learning_rate": 4.894054114894053e-05, "loss": 0.029, "step": 28186 }, { "epoch": 13.104602510460252, "grad_norm": 2.562495470046997, "learning_rate": 4.8917801288970776e-05, "loss": 0.057, "step": 28188 }, { "epoch": 13.105532310553231, "grad_norm": 0.4894638657569885, "learning_rate": 4.8894825369958255e-05, "loss": 0.0195, "step": 28190 }, { "epoch": 13.106462110646211, "grad_norm": 2.317678213119507, "learning_rate": 4.8871613618666065e-05, "loss": 0.0445, "step": 28192 }, { "epoch": 13.10739191073919, "grad_norm": 2.678741931915283, "learning_rate": 4.8848166264184824e-05, "loss": 0.034, "step": 28194 }, { "epoch": 13.108321710832172, "grad_norm": 1.8105913400650024, "learning_rate": 4.882448353793049e-05, "loss": 0.0292, "step": 28196 }, { "epoch": 13.109251510925152, "grad_norm": 0.6583519577980042, "learning_rate": 4.880056567364193e-05, "loss": 0.0139, "step": 28198 }, { "epoch": 13.110181311018131, "grad_norm": 1.2577531337738037, "learning_rate": 4.877641290737884e-05, "loss": 0.0586, "step": 28200 }, { "epoch": 13.11111111111111, "grad_norm": 1.353248953819275, "learning_rate": 4.8752025477519335e-05, "loss": 0.0242, "step": 28202 }, { "epoch": 13.11204091120409, "grad_norm": 0.9431766867637634, "learning_rate": 4.87274036247574e-05, "loss": 0.0408, "step": 28204 }, { "epoch": 13.112970711297072, "grad_norm": 1.7555204629898071, "learning_rate": 4.870254759210083e-05, "loss": 0.0436, "step": 28206 }, { "epoch": 13.113900511390051, "grad_norm": 1.1446481943130493, "learning_rate": 4.8677457624868624e-05, "loss": 0.0255, "step": 28208 }, { "epoch": 13.114830311483031, "grad_norm": 2.0675244331359863, "learning_rate": 4.865213397068869e-05, "loss": 0.0337, "step": 28210 }, { "epoch": 13.11576011157601, "grad_norm": 0.27113401889801025, "learning_rate": 4.862657687949517e-05, "loss": 0.0319, "step": 28212 }, { "epoch": 13.116689911668992, "grad_norm": 0.9040731191635132, "learning_rate": 4.860078660352623e-05, "loss": 0.0324, "step": 28214 }, { "epoch": 13.117619711761972, "grad_norm": 1.2942790985107422, "learning_rate": 4.8574763397321634e-05, "loss": 0.0279, "step": 28216 }, { "epoch": 13.118549511854951, "grad_norm": 0.7827292084693909, "learning_rate": 4.8548507517719775e-05, "loss": 0.0203, "step": 28218 }, { "epoch": 13.11947931194793, "grad_norm": 1.801599144935608, "learning_rate": 4.8522019223855635e-05, "loss": 0.0528, "step": 28220 }, { "epoch": 13.12040911204091, "grad_norm": 1.440850019454956, "learning_rate": 4.849529877715803e-05, "loss": 0.023, "step": 28222 }, { "epoch": 13.121338912133892, "grad_norm": 2.2368240356445312, "learning_rate": 4.846834644134688e-05, "loss": 0.0361, "step": 28224 }, { "epoch": 13.122268712226871, "grad_norm": 0.6069585084915161, "learning_rate": 4.8441162482430904e-05, "loss": 0.0115, "step": 28226 }, { "epoch": 13.123198512319851, "grad_norm": 0.6241308450698853, "learning_rate": 4.841374716870481e-05, "loss": 0.015, "step": 28228 }, { "epoch": 13.12412831241283, "grad_norm": 1.3570349216461182, "learning_rate": 4.838610077074673e-05, "loss": 0.037, "step": 28230 }, { "epoch": 13.125058112505812, "grad_norm": 1.0517051219940186, "learning_rate": 4.835822356141534e-05, "loss": 0.0359, "step": 28232 }, { "epoch": 13.125987912598791, "grad_norm": 0.809690535068512, "learning_rate": 4.8330115815847486e-05, "loss": 0.0117, "step": 28234 }, { "epoch": 13.126917712691771, "grad_norm": 2.300786018371582, "learning_rate": 4.8301777811455344e-05, "loss": 0.0585, "step": 28236 }, { "epoch": 13.12784751278475, "grad_norm": 1.7833172082901, "learning_rate": 4.827320982792346e-05, "loss": 0.0259, "step": 28238 }, { "epoch": 13.128777312877732, "grad_norm": 1.7208619117736816, "learning_rate": 4.824441214720633e-05, "loss": 0.0376, "step": 28240 }, { "epoch": 13.129707112970712, "grad_norm": 1.2286354303359985, "learning_rate": 4.8215385053525406e-05, "loss": 0.025, "step": 28242 }, { "epoch": 13.130636913063691, "grad_norm": 2.7645351886749268, "learning_rate": 4.818612883336655e-05, "loss": 0.0532, "step": 28244 }, { "epoch": 13.131566713156671, "grad_norm": 1.0771358013153076, "learning_rate": 4.815664377547668e-05, "loss": 0.0293, "step": 28246 }, { "epoch": 13.13249651324965, "grad_norm": 2.177525043487549, "learning_rate": 4.8126930170861443e-05, "loss": 0.0592, "step": 28248 }, { "epoch": 13.133426313342632, "grad_norm": 1.583231806755066, "learning_rate": 4.8096988312782214e-05, "loss": 0.0297, "step": 28250 }, { "epoch": 13.134356113435611, "grad_norm": 3.916452407836914, "learning_rate": 4.806681849675291e-05, "loss": 0.0295, "step": 28252 }, { "epoch": 13.135285913528591, "grad_norm": 1.9814494848251343, "learning_rate": 4.803642102053748e-05, "loss": 0.0457, "step": 28254 }, { "epoch": 13.13621571362157, "grad_norm": 1.942050576210022, "learning_rate": 4.8005796184146836e-05, "loss": 0.0251, "step": 28256 }, { "epoch": 13.137145513714552, "grad_norm": 0.7088987231254578, "learning_rate": 4.79749442898356e-05, "loss": 0.0392, "step": 28258 }, { "epoch": 13.138075313807532, "grad_norm": 1.3264354467391968, "learning_rate": 4.794386564209958e-05, "loss": 0.033, "step": 28260 }, { "epoch": 13.139005113900511, "grad_norm": 0.7938783168792725, "learning_rate": 4.7912560547672486e-05, "loss": 0.0311, "step": 28262 }, { "epoch": 13.139934913993491, "grad_norm": 1.5342248678207397, "learning_rate": 4.7881029315523036e-05, "loss": 0.0346, "step": 28264 }, { "epoch": 13.140864714086472, "grad_norm": 1.1944528818130493, "learning_rate": 4.784927225685161e-05, "loss": 0.0181, "step": 28266 }, { "epoch": 13.141794514179452, "grad_norm": 1.3462913036346436, "learning_rate": 4.7817289685087556e-05, "loss": 0.0334, "step": 28268 }, { "epoch": 13.142724314272431, "grad_norm": 1.1614781618118286, "learning_rate": 4.7785081915886174e-05, "loss": 0.0315, "step": 28270 }, { "epoch": 13.143654114365411, "grad_norm": 0.9691663980484009, "learning_rate": 4.775264926712492e-05, "loss": 0.02, "step": 28272 }, { "epoch": 13.14458391445839, "grad_norm": 0.17885534465312958, "learning_rate": 4.771999205890101e-05, "loss": 0.0121, "step": 28274 }, { "epoch": 13.145513714551372, "grad_norm": 0.8657050132751465, "learning_rate": 4.768711061352799e-05, "loss": 0.028, "step": 28276 }, { "epoch": 13.146443514644352, "grad_norm": 0.5693922638893127, "learning_rate": 4.76540052555323e-05, "loss": 0.0181, "step": 28278 }, { "epoch": 13.147373314737331, "grad_norm": 1.2383832931518555, "learning_rate": 4.7620676311650524e-05, "loss": 0.0296, "step": 28280 }, { "epoch": 13.148303114830311, "grad_norm": 1.3651295900344849, "learning_rate": 4.7587124110825895e-05, "loss": 0.033, "step": 28282 }, { "epoch": 13.149232914923292, "grad_norm": 1.4323452711105347, "learning_rate": 4.755334898420516e-05, "loss": 0.0566, "step": 28284 }, { "epoch": 13.150162715016272, "grad_norm": 0.5273038744926453, "learning_rate": 4.751935126513502e-05, "loss": 0.0212, "step": 28286 }, { "epoch": 13.151092515109251, "grad_norm": 0.7452933192253113, "learning_rate": 4.7485131289159323e-05, "loss": 0.0312, "step": 28288 }, { "epoch": 13.152022315202231, "grad_norm": 1.5796297788619995, "learning_rate": 4.74506893940155e-05, "loss": 0.0441, "step": 28290 }, { "epoch": 13.15295211529521, "grad_norm": 1.4142428636550903, "learning_rate": 4.7416025919630986e-05, "loss": 0.0254, "step": 28292 }, { "epoch": 13.153881915388192, "grad_norm": 1.5216785669326782, "learning_rate": 4.7381141208120364e-05, "loss": 0.0333, "step": 28294 }, { "epoch": 13.154811715481172, "grad_norm": 2.0280845165252686, "learning_rate": 4.7346035603781564e-05, "loss": 0.0696, "step": 28296 }, { "epoch": 13.155741515574151, "grad_norm": 1.19780695438385, "learning_rate": 4.7310709453092986e-05, "loss": 0.0252, "step": 28298 }, { "epoch": 13.15667131566713, "grad_norm": 2.614802360534668, "learning_rate": 4.727516310470921e-05, "loss": 0.0432, "step": 28300 }, { "epoch": 13.157601115760112, "grad_norm": 1.983277678489685, "learning_rate": 4.7239396909458445e-05, "loss": 0.0428, "step": 28302 }, { "epoch": 13.158530915853092, "grad_norm": 1.045896291732788, "learning_rate": 4.720341122033867e-05, "loss": 0.0439, "step": 28304 }, { "epoch": 13.159460715946071, "grad_norm": 1.1214221715927124, "learning_rate": 4.716720639251395e-05, "loss": 0.024, "step": 28306 }, { "epoch": 13.160390516039051, "grad_norm": 1.9355310201644897, "learning_rate": 4.71307827833114e-05, "loss": 0.0317, "step": 28308 }, { "epoch": 13.161320316132032, "grad_norm": 1.1694002151489258, "learning_rate": 4.7094140752217424e-05, "loss": 0.0223, "step": 28310 }, { "epoch": 13.162250116225012, "grad_norm": 1.4121465682983398, "learning_rate": 4.70572806608739e-05, "loss": 0.0492, "step": 28312 }, { "epoch": 13.163179916317992, "grad_norm": 1.1827924251556396, "learning_rate": 4.702020287307514e-05, "loss": 0.0394, "step": 28314 }, { "epoch": 13.164109716410971, "grad_norm": 1.2009787559509277, "learning_rate": 4.6982907754763933e-05, "loss": 0.028, "step": 28316 }, { "epoch": 13.16503951650395, "grad_norm": 0.470450222492218, "learning_rate": 4.6945395674028134e-05, "loss": 0.0319, "step": 28318 }, { "epoch": 13.165969316596932, "grad_norm": 2.143095016479492, "learning_rate": 4.690766700109666e-05, "loss": 0.0283, "step": 28320 }, { "epoch": 13.166899116689912, "grad_norm": 0.7298703789710999, "learning_rate": 4.686972210833637e-05, "loss": 0.0302, "step": 28322 }, { "epoch": 13.167828916782891, "grad_norm": 1.287933349609375, "learning_rate": 4.6831561370248036e-05, "loss": 0.0386, "step": 28324 }, { "epoch": 13.168758716875871, "grad_norm": 1.8882341384887695, "learning_rate": 4.679318516346273e-05, "loss": 0.0807, "step": 28326 }, { "epoch": 13.169688516968852, "grad_norm": 0.8642510771751404, "learning_rate": 4.675459386673814e-05, "loss": 0.0221, "step": 28328 }, { "epoch": 13.170618317061832, "grad_norm": 1.0275688171386719, "learning_rate": 4.671578786095476e-05, "loss": 0.0275, "step": 28330 }, { "epoch": 13.171548117154812, "grad_norm": 0.8665088415145874, "learning_rate": 4.66767675291123e-05, "loss": 0.0223, "step": 28332 }, { "epoch": 13.172477917247791, "grad_norm": 1.4918118715286255, "learning_rate": 4.6637533256325504e-05, "loss": 0.0349, "step": 28334 }, { "epoch": 13.17340771734077, "grad_norm": 2.924532413482666, "learning_rate": 4.659808542982089e-05, "loss": 0.0369, "step": 28336 }, { "epoch": 13.174337517433752, "grad_norm": 2.6010279655456543, "learning_rate": 4.655842443893268e-05, "loss": 0.0692, "step": 28338 }, { "epoch": 13.175267317526732, "grad_norm": 1.915380597114563, "learning_rate": 4.6518550675098664e-05, "loss": 0.035, "step": 28340 }, { "epoch": 13.176197117619711, "grad_norm": 1.6321284770965576, "learning_rate": 4.647846453185687e-05, "loss": 0.0358, "step": 28342 }, { "epoch": 13.177126917712691, "grad_norm": 1.4698429107666016, "learning_rate": 4.6438166404841436e-05, "loss": 0.0437, "step": 28344 }, { "epoch": 13.178056717805672, "grad_norm": 1.4920108318328857, "learning_rate": 4.639765669177844e-05, "loss": 0.0341, "step": 28346 }, { "epoch": 13.178986517898652, "grad_norm": 2.55100154876709, "learning_rate": 4.635693579248248e-05, "loss": 0.0629, "step": 28348 }, { "epoch": 13.179916317991632, "grad_norm": 1.745980978012085, "learning_rate": 4.6316004108852285e-05, "loss": 0.0367, "step": 28350 }, { "epoch": 13.180846118084611, "grad_norm": 0.6093172430992126, "learning_rate": 4.627486204486735e-05, "loss": 0.0269, "step": 28352 }, { "epoch": 13.181775918177593, "grad_norm": 1.5283265113830566, "learning_rate": 4.623351000658295e-05, "loss": 0.0441, "step": 28354 }, { "epoch": 13.182705718270572, "grad_norm": 1.7853063344955444, "learning_rate": 4.6191948402127095e-05, "loss": 0.0403, "step": 28356 }, { "epoch": 13.183635518363552, "grad_norm": 2.5549535751342773, "learning_rate": 4.615017764169614e-05, "loss": 0.076, "step": 28358 }, { "epoch": 13.184565318456531, "grad_norm": 2.1579785346984863, "learning_rate": 4.610819813755045e-05, "loss": 0.058, "step": 28360 }, { "epoch": 13.185495118549511, "grad_norm": 1.0295765399932861, "learning_rate": 4.606601030401087e-05, "loss": 0.0293, "step": 28362 }, { "epoch": 13.186424918642492, "grad_norm": 1.398485779762268, "learning_rate": 4.6023614557454354e-05, "loss": 0.0449, "step": 28364 }, { "epoch": 13.187354718735472, "grad_norm": 0.8385180830955505, "learning_rate": 4.598101131630966e-05, "loss": 0.038, "step": 28366 }, { "epoch": 13.188284518828452, "grad_norm": 2.039170980453491, "learning_rate": 4.593820100105363e-05, "loss": 0.052, "step": 28368 }, { "epoch": 13.189214318921431, "grad_norm": 1.8873794078826904, "learning_rate": 4.589518403420683e-05, "loss": 0.0379, "step": 28370 }, { "epoch": 13.190144119014413, "grad_norm": 1.247903823852539, "learning_rate": 4.5851960840329425e-05, "loss": 0.0212, "step": 28372 }, { "epoch": 13.191073919107392, "grad_norm": 1.617157220840454, "learning_rate": 4.580853184601671e-05, "loss": 0.0311, "step": 28374 }, { "epoch": 13.192003719200372, "grad_norm": 1.6324819326400757, "learning_rate": 4.5764897479895425e-05, "loss": 0.0453, "step": 28376 }, { "epoch": 13.192933519293351, "grad_norm": 0.7310593724250793, "learning_rate": 4.572105817261913e-05, "loss": 0.0157, "step": 28378 }, { "epoch": 13.193863319386331, "grad_norm": 3.67167067527771, "learning_rate": 4.56770143568641e-05, "loss": 0.0374, "step": 28380 }, { "epoch": 13.194793119479312, "grad_norm": 2.6223556995391846, "learning_rate": 4.563276646732503e-05, "loss": 0.0577, "step": 28382 }, { "epoch": 13.195722919572292, "grad_norm": 0.27594414353370667, "learning_rate": 4.55883149407107e-05, "loss": 0.0161, "step": 28384 }, { "epoch": 13.196652719665272, "grad_norm": 0.387621134519577, "learning_rate": 4.5543660215739854e-05, "loss": 0.0179, "step": 28386 }, { "epoch": 13.197582519758251, "grad_norm": 1.366117000579834, "learning_rate": 4.549880273313638e-05, "loss": 0.0343, "step": 28388 }, { "epoch": 13.198512319851233, "grad_norm": 1.0740790367126465, "learning_rate": 4.545374293562564e-05, "loss": 0.0474, "step": 28390 }, { "epoch": 13.199442119944212, "grad_norm": 0.41405683755874634, "learning_rate": 4.540848126792973e-05, "loss": 0.0092, "step": 28392 }, { "epoch": 13.200371920037192, "grad_norm": 0.9464915990829468, "learning_rate": 4.5363018176762835e-05, "loss": 0.0581, "step": 28394 }, { "epoch": 13.201301720130171, "grad_norm": 1.3049108982086182, "learning_rate": 4.5317354110827426e-05, "loss": 0.0374, "step": 28396 }, { "epoch": 13.202231520223153, "grad_norm": 1.4080642461776733, "learning_rate": 4.5271489520809495e-05, "loss": 0.0405, "step": 28398 }, { "epoch": 13.203161320316132, "grad_norm": 2.0733208656311035, "learning_rate": 4.522542485937382e-05, "loss": 0.0349, "step": 28400 }, { "epoch": 13.204091120409112, "grad_norm": 1.1623409986495972, "learning_rate": 4.517916058116012e-05, "loss": 0.0535, "step": 28402 }, { "epoch": 13.205020920502092, "grad_norm": 1.3174062967300415, "learning_rate": 4.513269714277814e-05, "loss": 0.0471, "step": 28404 }, { "epoch": 13.205950720595071, "grad_norm": 0.8614821434020996, "learning_rate": 4.508603500280326e-05, "loss": 0.0225, "step": 28406 }, { "epoch": 13.206880520688053, "grad_norm": 1.208021879196167, "learning_rate": 4.503917462177196e-05, "loss": 0.0282, "step": 28408 }, { "epoch": 13.207810320781032, "grad_norm": 0.4064894914627075, "learning_rate": 4.499211646217728e-05, "loss": 0.0069, "step": 28410 }, { "epoch": 13.208740120874012, "grad_norm": 1.521828532218933, "learning_rate": 4.494486098846438e-05, "loss": 0.0347, "step": 28412 }, { "epoch": 13.209669920966991, "grad_norm": 1.3881784677505493, "learning_rate": 4.489740866702547e-05, "loss": 0.0426, "step": 28414 }, { "epoch": 13.210599721059973, "grad_norm": 2.178941011428833, "learning_rate": 4.4849759966195934e-05, "loss": 0.0386, "step": 28416 }, { "epoch": 13.211529521152952, "grad_norm": 1.9145073890686035, "learning_rate": 4.4801915356249206e-05, "loss": 0.0311, "step": 28418 }, { "epoch": 13.212459321245932, "grad_norm": 2.3100132942199707, "learning_rate": 4.4753875309392374e-05, "loss": 0.0314, "step": 28420 }, { "epoch": 13.213389121338912, "grad_norm": 1.002049207687378, "learning_rate": 4.470564029976108e-05, "loss": 0.0133, "step": 28422 }, { "epoch": 13.214318921431893, "grad_norm": 1.7956435680389404, "learning_rate": 4.465721080341553e-05, "loss": 0.0342, "step": 28424 }, { "epoch": 13.215248721524873, "grad_norm": 1.4257798194885254, "learning_rate": 4.4608587298335395e-05, "loss": 0.0425, "step": 28426 }, { "epoch": 13.216178521617852, "grad_norm": 1.1628934144973755, "learning_rate": 4.4559770264414824e-05, "loss": 0.0115, "step": 28428 }, { "epoch": 13.217108321710832, "grad_norm": 1.263693928718567, "learning_rate": 4.451076018345834e-05, "loss": 0.0278, "step": 28430 }, { "epoch": 13.218038121803811, "grad_norm": 0.5995467305183411, "learning_rate": 4.446155753917577e-05, "loss": 0.0084, "step": 28432 }, { "epoch": 13.218967921896793, "grad_norm": 1.3913391828536987, "learning_rate": 4.441216281717701e-05, "loss": 0.0254, "step": 28434 }, { "epoch": 13.219897721989772, "grad_norm": 0.24696418642997742, "learning_rate": 4.436257650496836e-05, "loss": 0.0293, "step": 28436 }, { "epoch": 13.220827522082752, "grad_norm": 2.2186434268951416, "learning_rate": 4.43127990919466e-05, "loss": 0.0677, "step": 28438 }, { "epoch": 13.221757322175732, "grad_norm": 0.5587247014045715, "learning_rate": 4.426283106939482e-05, "loss": 0.0303, "step": 28440 }, { "epoch": 13.222687122268713, "grad_norm": 0.9713272452354431, "learning_rate": 4.4212672930476976e-05, "loss": 0.0365, "step": 28442 }, { "epoch": 13.223616922361693, "grad_norm": 1.9061697721481323, "learning_rate": 4.416232517023378e-05, "loss": 0.0242, "step": 28444 }, { "epoch": 13.224546722454672, "grad_norm": 1.1406879425048828, "learning_rate": 4.41117882855774e-05, "loss": 0.0208, "step": 28446 }, { "epoch": 13.225476522547652, "grad_norm": 1.2694624662399292, "learning_rate": 4.406106277528629e-05, "loss": 0.0257, "step": 28448 }, { "epoch": 13.226406322640631, "grad_norm": 0.7208225131034851, "learning_rate": 4.4010149140000846e-05, "loss": 0.0133, "step": 28450 }, { "epoch": 13.227336122733613, "grad_norm": 1.3093088865280151, "learning_rate": 4.395904788221821e-05, "loss": 0.0551, "step": 28452 }, { "epoch": 13.228265922826592, "grad_norm": 1.4284751415252686, "learning_rate": 4.390775950628693e-05, "loss": 0.0344, "step": 28454 }, { "epoch": 13.229195722919572, "grad_norm": 0.9152253270149231, "learning_rate": 4.38562845184027e-05, "loss": 0.0399, "step": 28456 }, { "epoch": 13.230125523012552, "grad_norm": 0.8745582103729248, "learning_rate": 4.3804623426602865e-05, "loss": 0.0204, "step": 28458 }, { "epoch": 13.231055323105533, "grad_norm": 1.0694000720977783, "learning_rate": 4.3752776740761544e-05, "loss": 0.0446, "step": 28460 }, { "epoch": 13.231985123198513, "grad_norm": 1.0154755115509033, "learning_rate": 4.370074497258459e-05, "loss": 0.0296, "step": 28462 }, { "epoch": 13.232914923291492, "grad_norm": 0.2979365885257721, "learning_rate": 4.364852863560455e-05, "loss": 0.0223, "step": 28464 }, { "epoch": 13.233844723384472, "grad_norm": 0.6873626708984375, "learning_rate": 4.359612824517571e-05, "loss": 0.0094, "step": 28466 }, { "epoch": 13.234774523477453, "grad_norm": 0.6009352803230286, "learning_rate": 4.354354431846854e-05, "loss": 0.0201, "step": 28468 }, { "epoch": 13.235704323570433, "grad_norm": 1.4536051750183105, "learning_rate": 4.3490777374465274e-05, "loss": 0.0154, "step": 28470 }, { "epoch": 13.236634123663412, "grad_norm": 1.6113824844360352, "learning_rate": 4.343782793395435e-05, "loss": 0.0263, "step": 28472 }, { "epoch": 13.237563923756392, "grad_norm": 1.425447940826416, "learning_rate": 4.338469651952549e-05, "loss": 0.0353, "step": 28474 }, { "epoch": 13.238493723849372, "grad_norm": 0.5548790693283081, "learning_rate": 4.3331383655564074e-05, "loss": 0.0105, "step": 28476 }, { "epoch": 13.239423523942353, "grad_norm": 1.1305242776870728, "learning_rate": 4.327788986824665e-05, "loss": 0.031, "step": 28478 }, { "epoch": 13.240353324035333, "grad_norm": 0.6908866763114929, "learning_rate": 4.3224215685535436e-05, "loss": 0.025, "step": 28480 }, { "epoch": 13.241283124128312, "grad_norm": 0.469576895236969, "learning_rate": 4.317036163717269e-05, "loss": 0.0236, "step": 28482 }, { "epoch": 13.242212924221292, "grad_norm": 1.9639720916748047, "learning_rate": 4.311632825467627e-05, "loss": 0.0281, "step": 28484 }, { "epoch": 13.243142724314273, "grad_norm": 1.9210290908813477, "learning_rate": 4.3062116071333936e-05, "loss": 0.0378, "step": 28486 }, { "epoch": 13.244072524407253, "grad_norm": 3.8278324604034424, "learning_rate": 4.3007725622197714e-05, "loss": 0.0626, "step": 28488 }, { "epoch": 13.245002324500232, "grad_norm": 2.2937963008880615, "learning_rate": 4.295315744407973e-05, "loss": 0.028, "step": 28490 }, { "epoch": 13.245932124593212, "grad_norm": 1.3695926666259766, "learning_rate": 4.289841207554576e-05, "loss": 0.0231, "step": 28492 }, { "epoch": 13.246861924686192, "grad_norm": 1.0526777505874634, "learning_rate": 4.28434900569106e-05, "loss": 0.0215, "step": 28494 }, { "epoch": 13.247791724779173, "grad_norm": 1.7912402153015137, "learning_rate": 4.278839193023217e-05, "loss": 0.0611, "step": 28496 }, { "epoch": 13.248721524872153, "grad_norm": 1.8627336025238037, "learning_rate": 4.273311823930686e-05, "loss": 0.037, "step": 28498 }, { "epoch": 13.249651324965132, "grad_norm": 1.5308911800384521, "learning_rate": 4.2677669529663805e-05, "loss": 0.0363, "step": 28500 }, { "epoch": 13.250581125058112, "grad_norm": 1.0622851848602295, "learning_rate": 4.2622046348559125e-05, "loss": 0.0198, "step": 28502 }, { "epoch": 13.251510925151093, "grad_norm": 1.4057213068008423, "learning_rate": 4.256624924497129e-05, "loss": 0.0315, "step": 28504 }, { "epoch": 13.252440725244073, "grad_norm": 1.325223684310913, "learning_rate": 4.2510278769595195e-05, "loss": 0.0248, "step": 28506 }, { "epoch": 13.253370525337052, "grad_norm": 1.7947626113891602, "learning_rate": 4.2454135474836946e-05, "loss": 0.0223, "step": 28508 }, { "epoch": 13.254300325430032, "grad_norm": 1.7754276990890503, "learning_rate": 4.2397819914807964e-05, "loss": 0.0446, "step": 28510 }, { "epoch": 13.255230125523013, "grad_norm": 1.2599196434020996, "learning_rate": 4.23413326453202e-05, "loss": 0.0159, "step": 28512 }, { "epoch": 13.256159925615993, "grad_norm": 0.6452869772911072, "learning_rate": 4.228467422388033e-05, "loss": 0.0095, "step": 28514 }, { "epoch": 13.257089725708973, "grad_norm": 1.7819682359695435, "learning_rate": 4.222784520968372e-05, "loss": 0.0531, "step": 28516 }, { "epoch": 13.258019525801952, "grad_norm": 1.2918686866760254, "learning_rate": 4.2170846163610186e-05, "loss": 0.047, "step": 28518 }, { "epoch": 13.258949325894932, "grad_norm": 1.1563154458999634, "learning_rate": 4.21136776482173e-05, "loss": 0.0268, "step": 28520 }, { "epoch": 13.259879125987913, "grad_norm": 1.0841971635818481, "learning_rate": 4.205634022773497e-05, "loss": 0.0224, "step": 28522 }, { "epoch": 13.260808926080893, "grad_norm": 0.49426448345184326, "learning_rate": 4.19988344680605e-05, "loss": 0.0151, "step": 28524 }, { "epoch": 13.261738726173872, "grad_norm": 1.3815761804580688, "learning_rate": 4.194116093675254e-05, "loss": 0.0257, "step": 28526 }, { "epoch": 13.262668526266852, "grad_norm": 0.8850527405738831, "learning_rate": 4.1883320203025694e-05, "loss": 0.0198, "step": 28528 }, { "epoch": 13.263598326359833, "grad_norm": 1.1030312776565552, "learning_rate": 4.182531283774439e-05, "loss": 0.032, "step": 28530 }, { "epoch": 13.264528126452813, "grad_norm": 0.9031252861022949, "learning_rate": 4.1767139413418034e-05, "loss": 0.0191, "step": 28532 }, { "epoch": 13.265457926545793, "grad_norm": 1.5524160861968994, "learning_rate": 4.170880050419496e-05, "loss": 0.0586, "step": 28534 }, { "epoch": 13.266387726638772, "grad_norm": 1.0630674362182617, "learning_rate": 4.165029668585639e-05, "loss": 0.0169, "step": 28536 }, { "epoch": 13.267317526731752, "grad_norm": 1.1070311069488525, "learning_rate": 4.159162853581154e-05, "loss": 0.0259, "step": 28538 }, { "epoch": 13.268247326824733, "grad_norm": 1.6662591695785522, "learning_rate": 4.153279663309148e-05, "loss": 0.0265, "step": 28540 }, { "epoch": 13.269177126917713, "grad_norm": 1.6654406785964966, "learning_rate": 4.147380155834308e-05, "loss": 0.0312, "step": 28542 }, { "epoch": 13.270106927010692, "grad_norm": 0.9056544303894043, "learning_rate": 4.141464389382391e-05, "loss": 0.0326, "step": 28544 }, { "epoch": 13.271036727103672, "grad_norm": 0.9958344101905823, "learning_rate": 4.135532422339649e-05, "loss": 0.0255, "step": 28546 }, { "epoch": 13.271966527196653, "grad_norm": 1.3832992315292358, "learning_rate": 4.129584313252203e-05, "loss": 0.0219, "step": 28548 }, { "epoch": 13.272896327289633, "grad_norm": 0.501774787902832, "learning_rate": 4.123620120825462e-05, "loss": 0.0135, "step": 28550 }, { "epoch": 13.273826127382613, "grad_norm": 0.4416573941707611, "learning_rate": 4.1176399039236116e-05, "loss": 0.0207, "step": 28552 }, { "epoch": 13.274755927475592, "grad_norm": 1.3457047939300537, "learning_rate": 4.111643721568989e-05, "loss": 0.0161, "step": 28554 }, { "epoch": 13.275685727568574, "grad_norm": 1.1487854719161987, "learning_rate": 4.105631632941469e-05, "loss": 0.0252, "step": 28556 }, { "epoch": 13.276615527661553, "grad_norm": 0.6337630152702332, "learning_rate": 4.0996036973779505e-05, "loss": 0.0243, "step": 28558 }, { "epoch": 13.277545327754533, "grad_norm": 0.4780656099319458, "learning_rate": 4.093559974371726e-05, "loss": 0.0142, "step": 28560 }, { "epoch": 13.278475127847512, "grad_norm": 1.238659381866455, "learning_rate": 4.087500523571914e-05, "loss": 0.0224, "step": 28562 }, { "epoch": 13.279404927940492, "grad_norm": 0.7941290140151978, "learning_rate": 4.0814254047828204e-05, "loss": 0.0413, "step": 28564 }, { "epoch": 13.280334728033473, "grad_norm": 0.944268524646759, "learning_rate": 4.075334677963429e-05, "loss": 0.024, "step": 28566 }, { "epoch": 13.281264528126453, "grad_norm": 1.7147703170776367, "learning_rate": 4.069228403226768e-05, "loss": 0.0529, "step": 28568 }, { "epoch": 13.282194328219433, "grad_norm": 1.6222038269042969, "learning_rate": 4.063106640839263e-05, "loss": 0.0375, "step": 28570 }, { "epoch": 13.283124128312412, "grad_norm": 1.5370274782180786, "learning_rate": 4.0569694512202784e-05, "loss": 0.0146, "step": 28572 }, { "epoch": 13.284053928405394, "grad_norm": 1.1344306468963623, "learning_rate": 4.050816894941399e-05, "loss": 0.0373, "step": 28574 }, { "epoch": 13.284983728498373, "grad_norm": 0.7161028385162354, "learning_rate": 4.0446490327258415e-05, "loss": 0.0124, "step": 28576 }, { "epoch": 13.285913528591353, "grad_norm": 0.833814263343811, "learning_rate": 4.0384659254479313e-05, "loss": 0.0166, "step": 28578 }, { "epoch": 13.286843328684332, "grad_norm": 1.027050256729126, "learning_rate": 4.0322676341324395e-05, "loss": 0.0265, "step": 28580 }, { "epoch": 13.287773128777314, "grad_norm": 1.2009831666946411, "learning_rate": 4.026054219954016e-05, "loss": 0.016, "step": 28582 }, { "epoch": 13.288702928870293, "grad_norm": 1.3070244789123535, "learning_rate": 4.01982574423652e-05, "loss": 0.0147, "step": 28584 }, { "epoch": 13.289632728963273, "grad_norm": 0.763701319694519, "learning_rate": 4.013582268452508e-05, "loss": 0.0173, "step": 28586 }, { "epoch": 13.290562529056253, "grad_norm": 0.1507529467344284, "learning_rate": 4.007323854222577e-05, "loss": 0.0071, "step": 28588 }, { "epoch": 13.291492329149232, "grad_norm": 1.2292118072509766, "learning_rate": 4.0010505633147214e-05, "loss": 0.0158, "step": 28590 }, { "epoch": 13.292422129242214, "grad_norm": 1.2589291334152222, "learning_rate": 3.994762457643805e-05, "loss": 0.0206, "step": 28592 }, { "epoch": 13.293351929335193, "grad_norm": 1.8403266668319702, "learning_rate": 3.988459599270892e-05, "loss": 0.0201, "step": 28594 }, { "epoch": 13.294281729428173, "grad_norm": 0.6550893783569336, "learning_rate": 3.982142050402664e-05, "loss": 0.0095, "step": 28596 }, { "epoch": 13.295211529521152, "grad_norm": 0.8751586079597473, "learning_rate": 3.975809873390735e-05, "loss": 0.0209, "step": 28598 }, { "epoch": 13.296141329614134, "grad_norm": 1.0659223794937134, "learning_rate": 3.969463130731178e-05, "loss": 0.0239, "step": 28600 }, { "epoch": 13.297071129707113, "grad_norm": 2.018841505050659, "learning_rate": 3.9631018850637814e-05, "loss": 0.0398, "step": 28602 }, { "epoch": 13.298000929800093, "grad_norm": 1.323974609375, "learning_rate": 3.956726199171443e-05, "loss": 0.0438, "step": 28604 }, { "epoch": 13.298930729893073, "grad_norm": 2.148573160171509, "learning_rate": 3.950336135979624e-05, "loss": 0.0271, "step": 28606 }, { "epoch": 13.299860529986052, "grad_norm": 1.1439826488494873, "learning_rate": 3.9439317585556804e-05, "loss": 0.016, "step": 28608 }, { "epoch": 13.300790330079034, "grad_norm": 1.0280513763427734, "learning_rate": 3.937513130108205e-05, "loss": 0.0359, "step": 28610 }, { "epoch": 13.301720130172013, "grad_norm": 1.322713851928711, "learning_rate": 3.931080313986482e-05, "loss": 0.0122, "step": 28612 }, { "epoch": 13.302649930264993, "grad_norm": 1.087637186050415, "learning_rate": 3.924633373679812e-05, "loss": 0.0188, "step": 28614 }, { "epoch": 13.303579730357972, "grad_norm": 1.2023544311523438, "learning_rate": 3.9181723728169045e-05, "loss": 0.0185, "step": 28616 }, { "epoch": 13.304509530450954, "grad_norm": 1.4661226272583008, "learning_rate": 3.911697375165203e-05, "loss": 0.0368, "step": 28618 }, { "epoch": 13.305439330543933, "grad_norm": 1.4114811420440674, "learning_rate": 3.9052084446303335e-05, "loss": 0.0445, "step": 28620 }, { "epoch": 13.306369130636913, "grad_norm": 0.6603555083274841, "learning_rate": 3.898705645255436e-05, "loss": 0.017, "step": 28622 }, { "epoch": 13.307298930729893, "grad_norm": 2.5898525714874268, "learning_rate": 3.8921890412204854e-05, "loss": 0.0325, "step": 28624 }, { "epoch": 13.308228730822874, "grad_norm": 0.43291276693344116, "learning_rate": 3.8856586968417334e-05, "loss": 0.0146, "step": 28626 }, { "epoch": 13.309158530915854, "grad_norm": 2.125725746154785, "learning_rate": 3.879114676571086e-05, "loss": 0.0391, "step": 28628 }, { "epoch": 13.310088331008833, "grad_norm": 1.4564613103866577, "learning_rate": 3.872557044995337e-05, "loss": 0.0182, "step": 28630 }, { "epoch": 13.311018131101813, "grad_norm": 1.7068843841552734, "learning_rate": 3.865985866835677e-05, "loss": 0.0266, "step": 28632 }, { "epoch": 13.311947931194792, "grad_norm": 1.4724172353744507, "learning_rate": 3.8594012069469824e-05, "loss": 0.0281, "step": 28634 }, { "epoch": 13.312877731287774, "grad_norm": 1.2439900636672974, "learning_rate": 3.8528031303172024e-05, "loss": 0.0184, "step": 28636 }, { "epoch": 13.313807531380753, "grad_norm": 1.5460926294326782, "learning_rate": 3.846191702066659e-05, "loss": 0.0191, "step": 28638 }, { "epoch": 13.314737331473733, "grad_norm": 0.9304525852203369, "learning_rate": 3.839566987447496e-05, "loss": 0.0135, "step": 28640 }, { "epoch": 13.315667131566713, "grad_norm": 1.4860820770263672, "learning_rate": 3.832929051842987e-05, "loss": 0.0422, "step": 28642 }, { "epoch": 13.316596931659694, "grad_norm": 1.8250094652175903, "learning_rate": 3.8262779607668475e-05, "loss": 0.0383, "step": 28644 }, { "epoch": 13.317526731752674, "grad_norm": 1.3145076036453247, "learning_rate": 3.819613779862675e-05, "loss": 0.0175, "step": 28646 }, { "epoch": 13.318456531845653, "grad_norm": 1.4426995515823364, "learning_rate": 3.812936574903245e-05, "loss": 0.0161, "step": 28648 }, { "epoch": 13.319386331938633, "grad_norm": 1.9492071866989136, "learning_rate": 3.8062464117898894e-05, "loss": 0.0204, "step": 28650 }, { "epoch": 13.320316132031612, "grad_norm": 1.6277034282684326, "learning_rate": 3.799543356551772e-05, "loss": 0.0177, "step": 28652 }, { "epoch": 13.321245932124594, "grad_norm": 0.7490392923355103, "learning_rate": 3.7928274753453875e-05, "loss": 0.0383, "step": 28654 }, { "epoch": 13.322175732217573, "grad_norm": 0.7681199312210083, "learning_rate": 3.786098834453773e-05, "loss": 0.0278, "step": 28656 }, { "epoch": 13.323105532310553, "grad_norm": 1.1567542552947998, "learning_rate": 3.779357500285866e-05, "loss": 0.0226, "step": 28658 }, { "epoch": 13.324035332403533, "grad_norm": 0.9708775877952576, "learning_rate": 3.7726035393759285e-05, "loss": 0.0167, "step": 28660 }, { "epoch": 13.324965132496514, "grad_norm": 1.3290356397628784, "learning_rate": 3.765837018382843e-05, "loss": 0.0177, "step": 28662 }, { "epoch": 13.325894932589494, "grad_norm": 1.112943172454834, "learning_rate": 3.759058004089411e-05, "loss": 0.0212, "step": 28664 }, { "epoch": 13.326824732682473, "grad_norm": 1.1945037841796875, "learning_rate": 3.7522665634017805e-05, "loss": 0.0267, "step": 28666 }, { "epoch": 13.327754532775453, "grad_norm": 1.8979953527450562, "learning_rate": 3.7454627633487294e-05, "loss": 0.0562, "step": 28668 }, { "epoch": 13.328684332868434, "grad_norm": 0.7012708187103271, "learning_rate": 3.738646671081033e-05, "loss": 0.0152, "step": 28670 }, { "epoch": 13.329614132961414, "grad_norm": 1.4832103252410889, "learning_rate": 3.7318183538707396e-05, "loss": 0.0209, "step": 28672 }, { "epoch": 13.330543933054393, "grad_norm": 1.7877315282821655, "learning_rate": 3.724977879110599e-05, "loss": 0.052, "step": 28674 }, { "epoch": 13.331473733147373, "grad_norm": 0.9467776417732239, "learning_rate": 3.71812531431335e-05, "loss": 0.0209, "step": 28676 }, { "epoch": 13.332403533240353, "grad_norm": 1.66828453540802, "learning_rate": 3.711260727111011e-05, "loss": 0.0202, "step": 28678 }, { "epoch": 13.333333333333334, "grad_norm": 1.8249893188476562, "learning_rate": 3.7043841852542854e-05, "loss": 0.056, "step": 28680 }, { "epoch": 13.334263133426314, "grad_norm": 2.859614849090576, "learning_rate": 3.697495756611896e-05, "loss": 0.0626, "step": 28682 }, { "epoch": 13.335192933519293, "grad_norm": 1.2658823728561401, "learning_rate": 3.690595509169854e-05, "loss": 0.0282, "step": 28684 }, { "epoch": 13.336122733612273, "grad_norm": 1.0029315948486328, "learning_rate": 3.6836835110307835e-05, "loss": 0.0292, "step": 28686 }, { "epoch": 13.337052533705254, "grad_norm": 0.7964431643486023, "learning_rate": 3.676759830413332e-05, "loss": 0.0177, "step": 28688 }, { "epoch": 13.337982333798234, "grad_norm": 1.2899706363677979, "learning_rate": 3.6698245356514463e-05, "loss": 0.0181, "step": 28690 }, { "epoch": 13.338912133891213, "grad_norm": 1.1344355344772339, "learning_rate": 3.662877695193655e-05, "loss": 0.02, "step": 28692 }, { "epoch": 13.339841933984193, "grad_norm": 0.5050945281982422, "learning_rate": 3.6559193776024855e-05, "loss": 0.0146, "step": 28694 }, { "epoch": 13.340771734077173, "grad_norm": 1.3982828855514526, "learning_rate": 3.648949651553737e-05, "loss": 0.0122, "step": 28696 }, { "epoch": 13.341701534170154, "grad_norm": 2.193732261657715, "learning_rate": 3.6419685858357634e-05, "loss": 0.0315, "step": 28698 }, { "epoch": 13.342631334263134, "grad_norm": 0.5261371731758118, "learning_rate": 3.634976249348878e-05, "loss": 0.019, "step": 28700 }, { "epoch": 13.343561134356113, "grad_norm": 1.1333472728729248, "learning_rate": 3.62797271110462e-05, "loss": 0.0129, "step": 28702 }, { "epoch": 13.344490934449093, "grad_norm": 0.7347383499145508, "learning_rate": 3.6209580402251e-05, "loss": 0.0107, "step": 28704 }, { "epoch": 13.345420734542074, "grad_norm": 0.6263189911842346, "learning_rate": 3.613932305942257e-05, "loss": 0.0248, "step": 28706 }, { "epoch": 13.346350534635054, "grad_norm": 0.8777843117713928, "learning_rate": 3.606895577597251e-05, "loss": 0.0135, "step": 28708 }, { "epoch": 13.347280334728033, "grad_norm": 2.8058791160583496, "learning_rate": 3.5998479246397975e-05, "loss": 0.0628, "step": 28710 }, { "epoch": 13.348210134821013, "grad_norm": 1.4379692077636719, "learning_rate": 3.592789416627338e-05, "loss": 0.0285, "step": 28712 }, { "epoch": 13.349139934913994, "grad_norm": 2.2327542304992676, "learning_rate": 3.585720123224514e-05, "loss": 0.0368, "step": 28714 }, { "epoch": 13.350069735006974, "grad_norm": 0.5142868757247925, "learning_rate": 3.578640114202396e-05, "loss": 0.0281, "step": 28716 }, { "epoch": 13.350999535099954, "grad_norm": 0.8770007491111755, "learning_rate": 3.571549459437832e-05, "loss": 0.0182, "step": 28718 }, { "epoch": 13.351929335192933, "grad_norm": 1.3979798555374146, "learning_rate": 3.564448228912689e-05, "loss": 0.0518, "step": 28720 }, { "epoch": 13.352859135285913, "grad_norm": 1.589596152305603, "learning_rate": 3.557336492713261e-05, "loss": 0.0286, "step": 28722 }, { "epoch": 13.353788935378894, "grad_norm": 1.2188206911087036, "learning_rate": 3.5502143210295314e-05, "loss": 0.0352, "step": 28724 }, { "epoch": 13.354718735471874, "grad_norm": 0.5999637842178345, "learning_rate": 3.5430817841544255e-05, "loss": 0.012, "step": 28726 }, { "epoch": 13.355648535564853, "grad_norm": 1.5107572078704834, "learning_rate": 3.535938952483219e-05, "loss": 0.0157, "step": 28728 }, { "epoch": 13.356578335657833, "grad_norm": 1.6209979057312012, "learning_rate": 3.528785896512793e-05, "loss": 0.0356, "step": 28730 }, { "epoch": 13.357508135750814, "grad_norm": 0.7432815432548523, "learning_rate": 3.5216226868408896e-05, "loss": 0.0099, "step": 28732 }, { "epoch": 13.358437935843794, "grad_norm": 1.7636672258377075, "learning_rate": 3.514449394165497e-05, "loss": 0.0184, "step": 28734 }, { "epoch": 13.359367735936774, "grad_norm": 1.3732150793075562, "learning_rate": 3.50726608928415e-05, "loss": 0.0233, "step": 28736 }, { "epoch": 13.360297536029753, "grad_norm": 1.351360559463501, "learning_rate": 3.5000728430931686e-05, "loss": 0.027, "step": 28738 }, { "epoch": 13.361227336122734, "grad_norm": 1.4937901496887207, "learning_rate": 3.492869726586954e-05, "loss": 0.0282, "step": 28740 }, { "epoch": 13.362157136215714, "grad_norm": 0.42304763197898865, "learning_rate": 3.485656810857376e-05, "loss": 0.0249, "step": 28742 }, { "epoch": 13.363086936308694, "grad_norm": 0.9254206418991089, "learning_rate": 3.478434167093017e-05, "loss": 0.0358, "step": 28744 }, { "epoch": 13.364016736401673, "grad_norm": 1.5376955270767212, "learning_rate": 3.4712018665784226e-05, "loss": 0.0324, "step": 28746 }, { "epoch": 13.364946536494653, "grad_norm": 1.859100103378296, "learning_rate": 3.4639599806934946e-05, "loss": 0.0212, "step": 28748 }, { "epoch": 13.365876336587634, "grad_norm": 1.4734588861465454, "learning_rate": 3.4567085809127405e-05, "loss": 0.0186, "step": 28750 }, { "epoch": 13.366806136680614, "grad_norm": 0.8268744349479675, "learning_rate": 3.449447738804515e-05, "loss": 0.0262, "step": 28752 }, { "epoch": 13.367735936773594, "grad_norm": 2.141690969467163, "learning_rate": 3.4421775260304156e-05, "loss": 0.0402, "step": 28754 }, { "epoch": 13.368665736866573, "grad_norm": 0.8630391359329224, "learning_rate": 3.434898014344506e-05, "loss": 0.0081, "step": 28756 }, { "epoch": 13.369595536959554, "grad_norm": 1.3103481531143188, "learning_rate": 3.427609275592645e-05, "loss": 0.0287, "step": 28758 }, { "epoch": 13.370525337052534, "grad_norm": 1.5563948154449463, "learning_rate": 3.420311381711709e-05, "loss": 0.0488, "step": 28760 }, { "epoch": 13.371455137145514, "grad_norm": 1.3931231498718262, "learning_rate": 3.413004404728963e-05, "loss": 0.0199, "step": 28762 }, { "epoch": 13.372384937238493, "grad_norm": 1.9597349166870117, "learning_rate": 3.4056884167613704e-05, "loss": 0.0411, "step": 28764 }, { "epoch": 13.373314737331473, "grad_norm": 0.6747094988822937, "learning_rate": 3.39836349001473e-05, "loss": 0.0444, "step": 28766 }, { "epoch": 13.374244537424454, "grad_norm": 2.2612574100494385, "learning_rate": 3.391029696783126e-05, "loss": 0.0267, "step": 28768 }, { "epoch": 13.375174337517434, "grad_norm": 1.6247224807739258, "learning_rate": 3.3836871094481384e-05, "loss": 0.019, "step": 28770 }, { "epoch": 13.376104137610414, "grad_norm": 1.7714686393737793, "learning_rate": 3.3763358004781556e-05, "loss": 0.0239, "step": 28772 }, { "epoch": 13.377033937703393, "grad_norm": 2.078740358352661, "learning_rate": 3.368975842427597e-05, "loss": 0.0263, "step": 28774 }, { "epoch": 13.377963737796374, "grad_norm": 1.5459586381912231, "learning_rate": 3.361607307936294e-05, "loss": 0.0323, "step": 28776 }, { "epoch": 13.378893537889354, "grad_norm": 0.4254922866821289, "learning_rate": 3.354230269728723e-05, "loss": 0.0085, "step": 28778 }, { "epoch": 13.379823337982334, "grad_norm": 1.9745445251464844, "learning_rate": 3.3468448006132394e-05, "loss": 0.0297, "step": 28780 }, { "epoch": 13.380753138075313, "grad_norm": 2.044740915298462, "learning_rate": 3.339450973481459e-05, "loss": 0.0354, "step": 28782 }, { "epoch": 13.381682938168293, "grad_norm": 0.8913601040840149, "learning_rate": 3.332048861307487e-05, "loss": 0.0327, "step": 28784 }, { "epoch": 13.382612738261274, "grad_norm": 0.9199577569961548, "learning_rate": 3.324638537147149e-05, "loss": 0.0149, "step": 28786 }, { "epoch": 13.383542538354254, "grad_norm": 1.8181757926940918, "learning_rate": 3.31722007413737e-05, "loss": 0.0342, "step": 28788 }, { "epoch": 13.384472338447233, "grad_norm": 0.4959680736064911, "learning_rate": 3.309793545495367e-05, "loss": 0.0088, "step": 28790 }, { "epoch": 13.385402138540213, "grad_norm": 0.3395017683506012, "learning_rate": 3.30235902451803e-05, "loss": 0.0063, "step": 28792 }, { "epoch": 13.386331938633194, "grad_norm": 0.9412838220596313, "learning_rate": 3.2949165845810296e-05, "loss": 0.0547, "step": 28794 }, { "epoch": 13.387261738726174, "grad_norm": 1.2013052701950073, "learning_rate": 3.287466299138261e-05, "loss": 0.0201, "step": 28796 }, { "epoch": 13.388191538819154, "grad_norm": 2.326307773590088, "learning_rate": 3.280008241721049e-05, "loss": 0.0203, "step": 28798 }, { "epoch": 13.389121338912133, "grad_norm": 1.3636853694915771, "learning_rate": 3.272542485937377e-05, "loss": 0.0244, "step": 28800 }, { "epoch": 13.390051139005115, "grad_norm": 0.5156038403511047, "learning_rate": 3.2650691054712566e-05, "loss": 0.0131, "step": 28802 }, { "epoch": 13.390980939098094, "grad_norm": 1.2430191040039062, "learning_rate": 3.257588174081936e-05, "loss": 0.0147, "step": 28804 }, { "epoch": 13.391910739191074, "grad_norm": 1.0609290599822998, "learning_rate": 3.250099765603205e-05, "loss": 0.0257, "step": 28806 }, { "epoch": 13.392840539284053, "grad_norm": 1.0905146598815918, "learning_rate": 3.242603953942598e-05, "loss": 0.0138, "step": 28808 }, { "epoch": 13.393770339377033, "grad_norm": 1.2610511779785156, "learning_rate": 3.235100813080767e-05, "loss": 0.0431, "step": 28810 }, { "epoch": 13.394700139470014, "grad_norm": 1.3641035556793213, "learning_rate": 3.227590417070699e-05, "loss": 0.0316, "step": 28812 }, { "epoch": 13.395629939562994, "grad_norm": 0.6922093629837036, "learning_rate": 3.2200728400369396e-05, "loss": 0.0143, "step": 28814 }, { "epoch": 13.396559739655974, "grad_norm": 1.0499417781829834, "learning_rate": 3.2125481561749524e-05, "loss": 0.0149, "step": 28816 }, { "epoch": 13.397489539748953, "grad_norm": 0.2589520514011383, "learning_rate": 3.205016439750332e-05, "loss": 0.0264, "step": 28818 }, { "epoch": 13.398419339841935, "grad_norm": 0.8774664998054504, "learning_rate": 3.197477765098078e-05, "loss": 0.0187, "step": 28820 }, { "epoch": 13.399349139934914, "grad_norm": 0.7709702253341675, "learning_rate": 3.189932206621866e-05, "loss": 0.0158, "step": 28822 }, { "epoch": 13.400278940027894, "grad_norm": 1.0181868076324463, "learning_rate": 3.182379838793311e-05, "loss": 0.0476, "step": 28824 }, { "epoch": 13.401208740120873, "grad_norm": 1.097733974456787, "learning_rate": 3.1748207361512525e-05, "loss": 0.021, "step": 28826 }, { "epoch": 13.402138540213855, "grad_norm": 0.4476917088031769, "learning_rate": 3.167254973300947e-05, "loss": 0.0154, "step": 28828 }, { "epoch": 13.403068340306834, "grad_norm": 1.753361701965332, "learning_rate": 3.1596826249134365e-05, "loss": 0.0316, "step": 28830 }, { "epoch": 13.403998140399814, "grad_norm": 1.012663722038269, "learning_rate": 3.15210376572476e-05, "loss": 0.0164, "step": 28832 }, { "epoch": 13.404927940492794, "grad_norm": 1.1643627882003784, "learning_rate": 3.144518470535163e-05, "loss": 0.02, "step": 28834 }, { "epoch": 13.405857740585773, "grad_norm": 2.1721482276916504, "learning_rate": 3.136926814208465e-05, "loss": 0.0182, "step": 28836 }, { "epoch": 13.406787540678755, "grad_norm": 1.2260465621948242, "learning_rate": 3.1293288716712654e-05, "loss": 0.0092, "step": 28838 }, { "epoch": 13.407717340771734, "grad_norm": 1.5100046396255493, "learning_rate": 3.121724717912156e-05, "loss": 0.023, "step": 28840 }, { "epoch": 13.408647140864714, "grad_norm": 1.0788800716400146, "learning_rate": 3.11411442798108e-05, "loss": 0.0204, "step": 28842 }, { "epoch": 13.409576940957693, "grad_norm": 0.46545782685279846, "learning_rate": 3.106498076988512e-05, "loss": 0.0221, "step": 28844 }, { "epoch": 13.410506741050675, "grad_norm": 0.6367983818054199, "learning_rate": 3.098875740104811e-05, "loss": 0.0143, "step": 28846 }, { "epoch": 13.411436541143654, "grad_norm": 1.490410327911377, "learning_rate": 3.091247492559315e-05, "loss": 0.0218, "step": 28848 }, { "epoch": 13.412366341236634, "grad_norm": 1.5749934911727905, "learning_rate": 3.083613409639763e-05, "loss": 0.0458, "step": 28850 }, { "epoch": 13.413296141329614, "grad_norm": 0.2317245900630951, "learning_rate": 3.075973566691489e-05, "loss": 0.0054, "step": 28852 }, { "epoch": 13.414225941422593, "grad_norm": 1.6144307851791382, "learning_rate": 3.068328039116625e-05, "loss": 0.0147, "step": 28854 }, { "epoch": 13.415155741515575, "grad_norm": 0.5324499607086182, "learning_rate": 3.0606769023734576e-05, "loss": 0.0165, "step": 28856 }, { "epoch": 13.416085541608554, "grad_norm": 1.6931562423706055, "learning_rate": 3.053020231975619e-05, "loss": 0.017, "step": 28858 }, { "epoch": 13.417015341701534, "grad_norm": 1.1107054948806763, "learning_rate": 3.045358103491371e-05, "loss": 0.0146, "step": 28860 }, { "epoch": 13.417945141794513, "grad_norm": 0.9826430678367615, "learning_rate": 3.0376905925427938e-05, "loss": 0.0174, "step": 28862 }, { "epoch": 13.418874941887495, "grad_norm": 0.8741912245750427, "learning_rate": 3.030017774805143e-05, "loss": 0.0076, "step": 28864 }, { "epoch": 13.419804741980474, "grad_norm": 0.2803342640399933, "learning_rate": 3.022339726006049e-05, "loss": 0.0057, "step": 28866 }, { "epoch": 13.420734542073454, "grad_norm": 0.3371357321739197, "learning_rate": 3.0146565219247195e-05, "loss": 0.007, "step": 28868 }, { "epoch": 13.421664342166434, "grad_norm": 0.4521391987800598, "learning_rate": 3.006968238391294e-05, "loss": 0.0071, "step": 28870 }, { "epoch": 13.422594142259415, "grad_norm": 0.41936323046684265, "learning_rate": 2.999274951286026e-05, "loss": 0.0231, "step": 28872 }, { "epoch": 13.423523942352395, "grad_norm": 0.9316744208335876, "learning_rate": 2.991576736538555e-05, "loss": 0.0182, "step": 28874 }, { "epoch": 13.424453742445374, "grad_norm": 0.9787613749504089, "learning_rate": 2.983873670127152e-05, "loss": 0.018, "step": 28876 }, { "epoch": 13.425383542538354, "grad_norm": 0.2174871265888214, "learning_rate": 2.976165828077972e-05, "loss": 0.0191, "step": 28878 }, { "epoch": 13.426313342631333, "grad_norm": 1.4311145544052124, "learning_rate": 2.9684532864643224e-05, "loss": 0.0203, "step": 28880 }, { "epoch": 13.427243142724315, "grad_norm": 0.3805941343307495, "learning_rate": 2.96073612140584e-05, "loss": 0.0072, "step": 28882 }, { "epoch": 13.428172942817294, "grad_norm": 0.8265999555587769, "learning_rate": 2.9530144090678466e-05, "loss": 0.0118, "step": 28884 }, { "epoch": 13.429102742910274, "grad_norm": 0.7859352827072144, "learning_rate": 2.945288225660542e-05, "loss": 0.0169, "step": 28886 }, { "epoch": 13.430032543003254, "grad_norm": 0.23668941855430603, "learning_rate": 2.937557647438203e-05, "loss": 0.0086, "step": 28888 }, { "epoch": 13.430962343096235, "grad_norm": 1.1630574464797974, "learning_rate": 2.929822750698533e-05, "loss": 0.0179, "step": 28890 }, { "epoch": 13.431892143189215, "grad_norm": 1.9185024499893188, "learning_rate": 2.92208361178184e-05, "loss": 0.0299, "step": 28892 }, { "epoch": 13.432821943282194, "grad_norm": 2.3958663940429688, "learning_rate": 2.9143403070703183e-05, "loss": 0.0137, "step": 28894 }, { "epoch": 13.433751743375174, "grad_norm": 1.4308011531829834, "learning_rate": 2.9065929129872243e-05, "loss": 0.0206, "step": 28896 }, { "epoch": 13.434681543468155, "grad_norm": 0.3117310702800751, "learning_rate": 2.898841505996227e-05, "loss": 0.0097, "step": 28898 }, { "epoch": 13.435611343561135, "grad_norm": 0.8834417462348938, "learning_rate": 2.8910861626005844e-05, "loss": 0.0088, "step": 28900 }, { "epoch": 13.436541143654114, "grad_norm": 0.399169385433197, "learning_rate": 2.883326959342405e-05, "loss": 0.008, "step": 28902 }, { "epoch": 13.437470943747094, "grad_norm": 0.13936764001846313, "learning_rate": 2.8755639728018916e-05, "loss": 0.0048, "step": 28904 }, { "epoch": 13.438400743840074, "grad_norm": 0.39971086382865906, "learning_rate": 2.8677972795966065e-05, "loss": 0.0097, "step": 28906 }, { "epoch": 13.439330543933055, "grad_norm": 1.3416928052902222, "learning_rate": 2.860026956380639e-05, "loss": 0.0259, "step": 28908 }, { "epoch": 13.440260344026035, "grad_norm": 1.0210813283920288, "learning_rate": 2.8522530798439615e-05, "loss": 0.0207, "step": 28910 }, { "epoch": 13.441190144119014, "grad_norm": 1.2731555700302124, "learning_rate": 2.844475726711596e-05, "loss": 0.0208, "step": 28912 }, { "epoch": 13.442119944211994, "grad_norm": 0.5452859997749329, "learning_rate": 2.836694973742896e-05, "loss": 0.0065, "step": 28914 }, { "epoch": 13.443049744304975, "grad_norm": 1.0092401504516602, "learning_rate": 2.828910897730717e-05, "loss": 0.0086, "step": 28916 }, { "epoch": 13.443979544397955, "grad_norm": 2.1833341121673584, "learning_rate": 2.821123575500764e-05, "loss": 0.0118, "step": 28918 }, { "epoch": 13.444909344490934, "grad_norm": 2.8387420177459717, "learning_rate": 2.813333083910781e-05, "loss": 0.0239, "step": 28920 }, { "epoch": 13.445839144583914, "grad_norm": 0.9190188050270081, "learning_rate": 2.8055394998497403e-05, "loss": 0.0123, "step": 28922 }, { "epoch": 13.446768944676894, "grad_norm": 0.3356773853302002, "learning_rate": 2.797742900237187e-05, "loss": 0.0138, "step": 28924 }, { "epoch": 13.447698744769875, "grad_norm": 2.108137369155884, "learning_rate": 2.7899433620224293e-05, "loss": 0.0211, "step": 28926 }, { "epoch": 13.448628544862855, "grad_norm": 0.6559754610061646, "learning_rate": 2.7821409621837084e-05, "loss": 0.0091, "step": 28928 }, { "epoch": 13.449558344955834, "grad_norm": 0.4398682117462158, "learning_rate": 2.7743357777276133e-05, "loss": 0.0208, "step": 28930 }, { "epoch": 13.450488145048814, "grad_norm": 0.4657003879547119, "learning_rate": 2.7665278856881458e-05, "loss": 0.0123, "step": 28932 }, { "epoch": 13.451417945141795, "grad_norm": 2.3486714363098145, "learning_rate": 2.758717363126066e-05, "loss": 0.023, "step": 28934 }, { "epoch": 13.452347745234775, "grad_norm": 1.0173640251159668, "learning_rate": 2.7509042871280426e-05, "loss": 0.0154, "step": 28936 }, { "epoch": 13.453277545327754, "grad_norm": 0.8106443881988525, "learning_rate": 2.7430887348059993e-05, "loss": 0.0123, "step": 28938 }, { "epoch": 13.454207345420734, "grad_norm": 0.33375677466392517, "learning_rate": 2.7352707832963007e-05, "loss": 0.0187, "step": 28940 }, { "epoch": 13.455137145513714, "grad_norm": 1.3253477811813354, "learning_rate": 2.7274505097589364e-05, "loss": 0.0106, "step": 28942 }, { "epoch": 13.456066945606695, "grad_norm": 2.208808660507202, "learning_rate": 2.7196279913768652e-05, "loss": 0.0164, "step": 28944 }, { "epoch": 13.456996745699675, "grad_norm": 1.214354395866394, "learning_rate": 2.7118033053551876e-05, "loss": 0.0198, "step": 28946 }, { "epoch": 13.457926545792654, "grad_norm": 0.3580605685710907, "learning_rate": 2.7039765289204115e-05, "loss": 0.0082, "step": 28948 }, { "epoch": 13.458856345885634, "grad_norm": 0.6692527532577515, "learning_rate": 2.6961477393196258e-05, "loss": 0.0178, "step": 28950 }, { "epoch": 13.459786145978615, "grad_norm": 1.5830192565917969, "learning_rate": 2.688317013819841e-05, "loss": 0.0272, "step": 28952 }, { "epoch": 13.460715946071595, "grad_norm": 0.3084111213684082, "learning_rate": 2.6804844297071566e-05, "loss": 0.0048, "step": 28954 }, { "epoch": 13.461645746164574, "grad_norm": 1.0726892948150635, "learning_rate": 2.6726500642860157e-05, "loss": 0.0219, "step": 28956 }, { "epoch": 13.462575546257554, "grad_norm": 0.2916136384010315, "learning_rate": 2.6648139948784423e-05, "loss": 0.0065, "step": 28958 }, { "epoch": 13.463505346350535, "grad_norm": 0.6728818416595459, "learning_rate": 2.656976298823295e-05, "loss": 0.0119, "step": 28960 }, { "epoch": 13.464435146443515, "grad_norm": 0.11103416979312897, "learning_rate": 2.649137053475435e-05, "loss": 0.0032, "step": 28962 }, { "epoch": 13.465364946536495, "grad_norm": 2.1554455757141113, "learning_rate": 2.6412963362050652e-05, "loss": 0.0278, "step": 28964 }, { "epoch": 13.466294746629474, "grad_norm": 1.054848074913025, "learning_rate": 2.6334542243969014e-05, "loss": 0.0146, "step": 28966 }, { "epoch": 13.467224546722454, "grad_norm": 0.22999776899814606, "learning_rate": 2.625610795449438e-05, "loss": 0.0049, "step": 28968 }, { "epoch": 13.468154346815435, "grad_norm": 1.361410140991211, "learning_rate": 2.6177661267741163e-05, "loss": 0.0338, "step": 28970 }, { "epoch": 13.469084146908415, "grad_norm": 1.2449584007263184, "learning_rate": 2.609920295794668e-05, "loss": 0.0166, "step": 28972 }, { "epoch": 13.470013947001394, "grad_norm": 1.0283437967300415, "learning_rate": 2.6020733799462947e-05, "loss": 0.03, "step": 28974 }, { "epoch": 13.470943747094374, "grad_norm": 0.17390702664852142, "learning_rate": 2.5942254566748523e-05, "loss": 0.0039, "step": 28976 }, { "epoch": 13.471873547187355, "grad_norm": 0.7203593254089355, "learning_rate": 2.5863766034361923e-05, "loss": 0.0167, "step": 28978 }, { "epoch": 13.472803347280335, "grad_norm": 0.2860070466995239, "learning_rate": 2.5785268976953284e-05, "loss": 0.0075, "step": 28980 }, { "epoch": 13.473733147373315, "grad_norm": 0.11741219460964203, "learning_rate": 2.5706764169256865e-05, "loss": 0.0049, "step": 28982 }, { "epoch": 13.474662947466294, "grad_norm": 0.626125156879425, "learning_rate": 2.5628252386083437e-05, "loss": 0.019, "step": 28984 }, { "epoch": 13.475592747559276, "grad_norm": 0.7654721140861511, "learning_rate": 2.554973440231259e-05, "loss": 0.0124, "step": 28986 }, { "epoch": 13.476522547652255, "grad_norm": 0.5587970614433289, "learning_rate": 2.5471210992885297e-05, "loss": 0.0052, "step": 28988 }, { "epoch": 13.477452347745235, "grad_norm": 0.26232433319091797, "learning_rate": 2.539268293279557e-05, "loss": 0.0269, "step": 28990 }, { "epoch": 13.478382147838214, "grad_norm": 0.8558154702186584, "learning_rate": 2.531415099708383e-05, "loss": 0.0103, "step": 28992 }, { "epoch": 13.479311947931194, "grad_norm": 0.9577729105949402, "learning_rate": 2.523561596082875e-05, "loss": 0.0132, "step": 28994 }, { "epoch": 13.480241748024175, "grad_norm": 0.8975548148155212, "learning_rate": 2.515707859913909e-05, "loss": 0.0285, "step": 28996 }, { "epoch": 13.481171548117155, "grad_norm": 0.6063873171806335, "learning_rate": 2.507853968714706e-05, "loss": 0.0064, "step": 28998 }, { "epoch": 13.482101348210135, "grad_norm": 1.3410053253173828, "learning_rate": 2.500000000000003e-05, "loss": 0.0171, "step": 29000 }, { "epoch": 13.482101348210135, "eval_cer": 0.12153529311453046, "eval_loss": 0.21058471500873566, "eval_runtime": 399.2075, "eval_samples_per_second": 31.798, "eval_steps_per_second": 0.994, "step": 29000 }, { "epoch": 13.483031148303114, "grad_norm": 1.18586266040802, "learning_rate": 2.492146031285318e-05, "loss": 0.0209, "step": 29002 }, { "epoch": 13.483960948396096, "grad_norm": 0.9205421209335327, "learning_rate": 2.4842921400861154e-05, "loss": 0.016, "step": 29004 }, { "epoch": 13.484890748489075, "grad_norm": 1.1382454633712769, "learning_rate": 2.4764384039171485e-05, "loss": 0.0157, "step": 29006 }, { "epoch": 13.485820548582055, "grad_norm": 0.8762359619140625, "learning_rate": 2.468584900291641e-05, "loss": 0.0114, "step": 29008 }, { "epoch": 13.486750348675034, "grad_norm": 0.5870047807693481, "learning_rate": 2.460731706720449e-05, "loss": 0.0121, "step": 29010 }, { "epoch": 13.487680148768014, "grad_norm": 0.8696269392967224, "learning_rate": 2.4528789007114763e-05, "loss": 0.0076, "step": 29012 }, { "epoch": 13.488609948860995, "grad_norm": 0.5585898756980896, "learning_rate": 2.4450265597687477e-05, "loss": 0.0155, "step": 29014 }, { "epoch": 13.489539748953975, "grad_norm": 0.9717873334884644, "learning_rate": 2.437174761391663e-05, "loss": 0.0185, "step": 29016 }, { "epoch": 13.490469549046955, "grad_norm": 0.45084649324417114, "learning_rate": 2.4293235830743198e-05, "loss": 0.0091, "step": 29018 }, { "epoch": 13.491399349139934, "grad_norm": 0.22196461260318756, "learning_rate": 2.4214731023046782e-05, "loss": 0.0102, "step": 29020 }, { "epoch": 13.492329149232916, "grad_norm": 0.6057983040809631, "learning_rate": 2.413623396563832e-05, "loss": 0.0106, "step": 29022 }, { "epoch": 13.493258949325895, "grad_norm": 1.4608383178710938, "learning_rate": 2.4057745433251726e-05, "loss": 0.0164, "step": 29024 }, { "epoch": 13.494188749418875, "grad_norm": 1.075777530670166, "learning_rate": 2.39792662005373e-05, "loss": 0.0125, "step": 29026 }, { "epoch": 13.495118549511854, "grad_norm": 1.7792439460754395, "learning_rate": 2.390079704205356e-05, "loss": 0.0515, "step": 29028 }, { "epoch": 13.496048349604836, "grad_norm": 0.15019118785858154, "learning_rate": 2.382233873225908e-05, "loss": 0.0073, "step": 29030 }, { "epoch": 13.496978149697815, "grad_norm": 0.8063752055168152, "learning_rate": 2.3743892045505865e-05, "loss": 0.0176, "step": 29032 }, { "epoch": 13.497907949790795, "grad_norm": 0.3004462420940399, "learning_rate": 2.3665457756031056e-05, "loss": 0.0054, "step": 29034 }, { "epoch": 13.498837749883775, "grad_norm": 0.3057268261909485, "learning_rate": 2.3587036637949415e-05, "loss": 0.0083, "step": 29036 }, { "epoch": 13.499767549976754, "grad_norm": 0.7127088904380798, "learning_rate": 2.3508629465245724e-05, "loss": 0.0037, "step": 29038 }, { "epoch": 13.500697350069736, "grad_norm": 0.14464928209781647, "learning_rate": 2.3430237011767116e-05, "loss": 0.004, "step": 29040 }, { "epoch": 13.501627150162715, "grad_norm": 0.2791987955570221, "learning_rate": 2.3351860051215647e-05, "loss": 0.0067, "step": 29042 }, { "epoch": 13.502556950255695, "grad_norm": 0.6756880879402161, "learning_rate": 2.3273499357139906e-05, "loss": 0.0111, "step": 29044 }, { "epoch": 13.503486750348674, "grad_norm": 0.47061672806739807, "learning_rate": 2.3195155702928497e-05, "loss": 0.0073, "step": 29046 }, { "epoch": 13.504416550441656, "grad_norm": 0.25314876437187195, "learning_rate": 2.3116829861801835e-05, "loss": 0.0049, "step": 29048 }, { "epoch": 13.505346350534635, "grad_norm": 1.2965387105941772, "learning_rate": 2.303852260680399e-05, "loss": 0.0111, "step": 29050 }, { "epoch": 13.506276150627615, "grad_norm": 0.5916604995727539, "learning_rate": 2.2960234710796135e-05, "loss": 0.0098, "step": 29052 }, { "epoch": 13.507205950720595, "grad_norm": 0.5346304774284363, "learning_rate": 2.2881966946448197e-05, "loss": 0.0123, "step": 29054 }, { "epoch": 13.508135750813576, "grad_norm": 0.3439299762248993, "learning_rate": 2.280372008623159e-05, "loss": 0.0045, "step": 29056 }, { "epoch": 13.509065550906556, "grad_norm": 1.691726803779602, "learning_rate": 2.2725494902410882e-05, "loss": 0.028, "step": 29058 }, { "epoch": 13.509995350999535, "grad_norm": 0.36920756101608276, "learning_rate": 2.2647292167037236e-05, "loss": 0.014, "step": 29060 }, { "epoch": 13.510925151092515, "grad_norm": 0.34322530031204224, "learning_rate": 2.2569112651940246e-05, "loss": 0.0153, "step": 29062 }, { "epoch": 13.511854951185494, "grad_norm": 0.9708769917488098, "learning_rate": 2.2490957128719644e-05, "loss": 0.0099, "step": 29064 }, { "epoch": 13.512784751278476, "grad_norm": 1.3164907693862915, "learning_rate": 2.2412826368739413e-05, "loss": 0.0116, "step": 29066 }, { "epoch": 13.513714551371455, "grad_norm": 0.5765326023101807, "learning_rate": 2.2334721143118436e-05, "loss": 0.014, "step": 29068 }, { "epoch": 13.514644351464435, "grad_norm": 0.7403779029846191, "learning_rate": 2.225664222272394e-05, "loss": 0.009, "step": 29070 }, { "epoch": 13.515574151557415, "grad_norm": 0.5418375134468079, "learning_rate": 2.2178590378162993e-05, "loss": 0.0128, "step": 29072 }, { "epoch": 13.516503951650396, "grad_norm": 1.036850094795227, "learning_rate": 2.210056637977596e-05, "loss": 0.0265, "step": 29074 }, { "epoch": 13.517433751743376, "grad_norm": 1.3441529273986816, "learning_rate": 2.2022570997628388e-05, "loss": 0.0243, "step": 29076 }, { "epoch": 13.518363551836355, "grad_norm": 0.9027857780456543, "learning_rate": 2.1944605001502854e-05, "loss": 0.0114, "step": 29078 }, { "epoch": 13.519293351929335, "grad_norm": 1.3972704410552979, "learning_rate": 2.186666916089244e-05, "loss": 0.0267, "step": 29080 }, { "epoch": 13.520223152022314, "grad_norm": 1.2585105895996094, "learning_rate": 2.1788764244992617e-05, "loss": 0.0244, "step": 29082 }, { "epoch": 13.521152952115296, "grad_norm": 0.43072420358657837, "learning_rate": 2.1710891022693088e-05, "loss": 0.0153, "step": 29084 }, { "epoch": 13.522082752208275, "grad_norm": 0.5592052340507507, "learning_rate": 2.1633050262571297e-05, "loss": 0.0058, "step": 29086 }, { "epoch": 13.523012552301255, "grad_norm": 0.5899016857147217, "learning_rate": 2.155524273288412e-05, "loss": 0.0051, "step": 29088 }, { "epoch": 13.523942352394235, "grad_norm": 0.4663795232772827, "learning_rate": 2.147746920156064e-05, "loss": 0.006, "step": 29090 }, { "epoch": 13.524872152487216, "grad_norm": 0.6845855116844177, "learning_rate": 2.1399730436193687e-05, "loss": 0.0142, "step": 29092 }, { "epoch": 13.525801952580196, "grad_norm": 1.1735353469848633, "learning_rate": 2.1322027204034022e-05, "loss": 0.0109, "step": 29094 }, { "epoch": 13.526731752673175, "grad_norm": 0.8143046498298645, "learning_rate": 2.1244360271981168e-05, "loss": 0.0089, "step": 29096 }, { "epoch": 13.527661552766155, "grad_norm": 0.6980871558189392, "learning_rate": 2.116673040657604e-05, "loss": 0.0091, "step": 29098 }, { "epoch": 13.528591352859134, "grad_norm": 0.9588138461112976, "learning_rate": 2.1089138373994247e-05, "loss": 0.0156, "step": 29100 }, { "epoch": 13.529521152952116, "grad_norm": 0.4858517646789551, "learning_rate": 2.1011584940038e-05, "loss": 0.0153, "step": 29102 }, { "epoch": 13.530450953045095, "grad_norm": 0.7860823273658752, "learning_rate": 2.0934070870128027e-05, "loss": 0.0125, "step": 29104 }, { "epoch": 13.531380753138075, "grad_norm": 1.5287097692489624, "learning_rate": 2.0856596929297087e-05, "loss": 0.0191, "step": 29106 }, { "epoch": 13.532310553231055, "grad_norm": 0.46552321314811707, "learning_rate": 2.0779163882181695e-05, "loss": 0.0072, "step": 29108 }, { "epoch": 13.533240353324036, "grad_norm": 0.7519544959068298, "learning_rate": 2.070177249301494e-05, "loss": 0.0123, "step": 29110 }, { "epoch": 13.534170153417016, "grad_norm": 0.290689080953598, "learning_rate": 2.0624423525618237e-05, "loss": 0.007, "step": 29112 }, { "epoch": 13.535099953509995, "grad_norm": 0.10823774337768555, "learning_rate": 2.0547117743394846e-05, "loss": 0.0071, "step": 29114 }, { "epoch": 13.536029753602975, "grad_norm": 0.715453028678894, "learning_rate": 2.0469855909321804e-05, "loss": 0.0081, "step": 29116 }, { "epoch": 13.536959553695956, "grad_norm": 0.9807211756706238, "learning_rate": 2.0392638785941867e-05, "loss": 0.0071, "step": 29118 }, { "epoch": 13.537889353788936, "grad_norm": 0.3426057696342468, "learning_rate": 2.0315467135356873e-05, "loss": 0.0108, "step": 29120 }, { "epoch": 13.538819153881915, "grad_norm": 0.20442703366279602, "learning_rate": 2.0238341719220207e-05, "loss": 0.0023, "step": 29122 }, { "epoch": 13.539748953974895, "grad_norm": 0.6020929217338562, "learning_rate": 2.016126329872858e-05, "loss": 0.0098, "step": 29124 }, { "epoch": 13.540678754067875, "grad_norm": 0.6097304224967957, "learning_rate": 2.0084232634614544e-05, "loss": 0.0265, "step": 29126 }, { "epoch": 13.541608554160856, "grad_norm": 0.2644115388393402, "learning_rate": 2.0007250487139833e-05, "loss": 0.0072, "step": 29128 }, { "epoch": 13.542538354253836, "grad_norm": 0.5347984433174133, "learning_rate": 1.993031761608733e-05, "loss": 0.0067, "step": 29130 }, { "epoch": 13.543468154346815, "grad_norm": 2.4409217834472656, "learning_rate": 1.985343478075307e-05, "loss": 0.0299, "step": 29132 }, { "epoch": 13.544397954439795, "grad_norm": 0.5243724584579468, "learning_rate": 1.9776602739939775e-05, "loss": 0.0061, "step": 29134 }, { "epoch": 13.545327754532776, "grad_norm": 0.11967363208532333, "learning_rate": 1.9699822251948833e-05, "loss": 0.0092, "step": 29136 }, { "epoch": 13.546257554625756, "grad_norm": 0.3665925860404968, "learning_rate": 1.9623094074572325e-05, "loss": 0.0046, "step": 29138 }, { "epoch": 13.547187354718735, "grad_norm": 1.2431329488754272, "learning_rate": 1.9546418965086554e-05, "loss": 0.0104, "step": 29140 }, { "epoch": 13.548117154811715, "grad_norm": 0.3482242524623871, "learning_rate": 1.94697976802439e-05, "loss": 0.0062, "step": 29142 }, { "epoch": 13.549046954904696, "grad_norm": 0.25381916761398315, "learning_rate": 1.9393230976265684e-05, "loss": 0.0106, "step": 29144 }, { "epoch": 13.549976754997676, "grad_norm": 1.5914210081100464, "learning_rate": 1.931671960883384e-05, "loss": 0.0231, "step": 29146 }, { "epoch": 13.550906555090656, "grad_norm": 1.0595403909683228, "learning_rate": 1.9240264333085194e-05, "loss": 0.0305, "step": 29148 }, { "epoch": 13.551836355183635, "grad_norm": 0.408502995967865, "learning_rate": 1.9163865903602458e-05, "loss": 0.0287, "step": 29150 }, { "epoch": 13.552766155276615, "grad_norm": 0.029648201540112495, "learning_rate": 1.908752507440694e-05, "loss": 0.0082, "step": 29152 }, { "epoch": 13.553695955369596, "grad_norm": 0.7967826128005981, "learning_rate": 1.9011242598951976e-05, "loss": 0.0081, "step": 29154 }, { "epoch": 13.554625755462576, "grad_norm": 0.8548628091812134, "learning_rate": 1.8935019230114795e-05, "loss": 0.0088, "step": 29156 }, { "epoch": 13.555555555555555, "grad_norm": 0.17997193336486816, "learning_rate": 1.885885572018946e-05, "loss": 0.0072, "step": 29158 }, { "epoch": 13.556485355648535, "grad_norm": 1.0153274536132812, "learning_rate": 1.878275282087871e-05, "loss": 0.0199, "step": 29160 }, { "epoch": 13.557415155741516, "grad_norm": 0.8605949878692627, "learning_rate": 1.8706711283287613e-05, "loss": 0.0063, "step": 29162 }, { "epoch": 13.558344955834496, "grad_norm": 1.0462040901184082, "learning_rate": 1.863073185791562e-05, "loss": 0.0214, "step": 29164 }, { "epoch": 13.559274755927476, "grad_norm": 0.9710755944252014, "learning_rate": 1.855481529464864e-05, "loss": 0.0145, "step": 29166 }, { "epoch": 13.560204556020455, "grad_norm": 0.6462039351463318, "learning_rate": 1.847896234275267e-05, "loss": 0.0085, "step": 29168 }, { "epoch": 13.561134356113435, "grad_norm": 0.3362285792827606, "learning_rate": 1.8403173750865905e-05, "loss": 0.006, "step": 29170 }, { "epoch": 13.562064156206416, "grad_norm": 0.24311921000480652, "learning_rate": 1.83274502669908e-05, "loss": 0.012, "step": 29172 }, { "epoch": 13.562993956299396, "grad_norm": 0.9390515685081482, "learning_rate": 1.8251792638487573e-05, "loss": 0.0171, "step": 29174 }, { "epoch": 13.563923756392375, "grad_norm": 0.278586745262146, "learning_rate": 1.8176201612066814e-05, "loss": 0.0085, "step": 29176 }, { "epoch": 13.564853556485355, "grad_norm": 0.8715469241142273, "learning_rate": 1.8100677933781435e-05, "loss": 0.01, "step": 29178 }, { "epoch": 13.565783356578336, "grad_norm": 0.7968894839286804, "learning_rate": 1.8025222349019308e-05, "loss": 0.0114, "step": 29180 }, { "epoch": 13.566713156671316, "grad_norm": 1.185371994972229, "learning_rate": 1.7949835602496766e-05, "loss": 0.0157, "step": 29182 }, { "epoch": 13.567642956764296, "grad_norm": 1.1633278131484985, "learning_rate": 1.7874518438250733e-05, "loss": 0.0171, "step": 29184 }, { "epoch": 13.568572756857275, "grad_norm": 0.823597252368927, "learning_rate": 1.7799271599630868e-05, "loss": 0.0184, "step": 29186 }, { "epoch": 13.569502556950255, "grad_norm": 0.0691278800368309, "learning_rate": 1.772409582929326e-05, "loss": 0.0024, "step": 29188 }, { "epoch": 13.570432357043236, "grad_norm": 0.4481280744075775, "learning_rate": 1.7648991869192588e-05, "loss": 0.0079, "step": 29190 }, { "epoch": 13.571362157136216, "grad_norm": 0.30245593190193176, "learning_rate": 1.757396046057428e-05, "loss": 0.0086, "step": 29192 }, { "epoch": 13.572291957229195, "grad_norm": 0.39976024627685547, "learning_rate": 1.7499002343968206e-05, "loss": 0.0056, "step": 29194 }, { "epoch": 13.573221757322175, "grad_norm": 0.2255149781703949, "learning_rate": 1.7424118259180726e-05, "loss": 0.0041, "step": 29196 }, { "epoch": 13.574151557415156, "grad_norm": 0.12416409701108932, "learning_rate": 1.7349308945287687e-05, "loss": 0.0034, "step": 29198 }, { "epoch": 13.575081357508136, "grad_norm": 0.2545473873615265, "learning_rate": 1.727457514062648e-05, "loss": 0.0041, "step": 29200 }, { "epoch": 13.576011157601116, "grad_norm": 0.6238731741905212, "learning_rate": 1.719991758278959e-05, "loss": 0.0105, "step": 29202 }, { "epoch": 13.576940957694095, "grad_norm": 1.6281068325042725, "learning_rate": 1.7125337008617474e-05, "loss": 0.0104, "step": 29204 }, { "epoch": 13.577870757787077, "grad_norm": 0.5687057971954346, "learning_rate": 1.7050834154189787e-05, "loss": 0.0117, "step": 29206 }, { "epoch": 13.578800557880056, "grad_norm": 0.25084230303764343, "learning_rate": 1.697640975481978e-05, "loss": 0.0067, "step": 29208 }, { "epoch": 13.579730357973036, "grad_norm": 1.557847023010254, "learning_rate": 1.6902064545046244e-05, "loss": 0.0128, "step": 29210 }, { "epoch": 13.580660158066015, "grad_norm": 0.11714388430118561, "learning_rate": 1.6827799258626547e-05, "loss": 0.0039, "step": 29212 }, { "epoch": 13.581589958158997, "grad_norm": 1.1363279819488525, "learning_rate": 1.675361462852875e-05, "loss": 0.0249, "step": 29214 }, { "epoch": 13.582519758251976, "grad_norm": 0.21653041243553162, "learning_rate": 1.6679511386925364e-05, "loss": 0.0156, "step": 29216 }, { "epoch": 13.583449558344956, "grad_norm": 1.1836518049240112, "learning_rate": 1.660549026518564e-05, "loss": 0.0088, "step": 29218 }, { "epoch": 13.584379358437936, "grad_norm": 1.4086633920669556, "learning_rate": 1.653155199386784e-05, "loss": 0.0168, "step": 29220 }, { "epoch": 13.585309158530915, "grad_norm": 0.29979753494262695, "learning_rate": 1.6457697302713002e-05, "loss": 0.0065, "step": 29222 }, { "epoch": 13.586238958623897, "grad_norm": 0.23410116136074066, "learning_rate": 1.6383926920637287e-05, "loss": 0.0026, "step": 29224 }, { "epoch": 13.587168758716876, "grad_norm": 1.8041807413101196, "learning_rate": 1.631024157572425e-05, "loss": 0.0185, "step": 29226 }, { "epoch": 13.588098558809856, "grad_norm": 1.0310394763946533, "learning_rate": 1.62366419952185e-05, "loss": 0.0113, "step": 29228 }, { "epoch": 13.589028358902835, "grad_norm": 0.2247619330883026, "learning_rate": 1.616312890551851e-05, "loss": 0.0051, "step": 29230 }, { "epoch": 13.589958158995817, "grad_norm": 0.9390997886657715, "learning_rate": 1.6089703032168798e-05, "loss": 0.0168, "step": 29232 }, { "epoch": 13.590887959088796, "grad_norm": 0.35476160049438477, "learning_rate": 1.601636509985276e-05, "loss": 0.0075, "step": 29234 }, { "epoch": 13.591817759181776, "grad_norm": 0.47545310854911804, "learning_rate": 1.594311583238635e-05, "loss": 0.0083, "step": 29236 }, { "epoch": 13.592747559274756, "grad_norm": 0.2615659534931183, "learning_rate": 1.5869955952710423e-05, "loss": 0.0079, "step": 29238 }, { "epoch": 13.593677359367735, "grad_norm": 0.736853301525116, "learning_rate": 1.5796886182883134e-05, "loss": 0.0145, "step": 29240 }, { "epoch": 13.594607159460717, "grad_norm": 0.39833271503448486, "learning_rate": 1.572390724407377e-05, "loss": 0.0046, "step": 29242 }, { "epoch": 13.595536959553696, "grad_norm": 0.16236525774002075, "learning_rate": 1.5651019856554995e-05, "loss": 0.0035, "step": 29244 }, { "epoch": 13.596466759646676, "grad_norm": 0.740256667137146, "learning_rate": 1.557822473969607e-05, "loss": 0.0125, "step": 29246 }, { "epoch": 13.597396559739655, "grad_norm": 0.9854142069816589, "learning_rate": 1.5505522611955073e-05, "loss": 0.0286, "step": 29248 }, { "epoch": 13.598326359832637, "grad_norm": 0.15164843201637268, "learning_rate": 1.5432914190872818e-05, "loss": 0.003, "step": 29250 }, { "epoch": 13.599256159925616, "grad_norm": 0.44317448139190674, "learning_rate": 1.5360400193065273e-05, "loss": 0.0048, "step": 29252 }, { "epoch": 13.600185960018596, "grad_norm": 1.1987768411636353, "learning_rate": 1.5287981334216e-05, "loss": 0.0164, "step": 29254 }, { "epoch": 13.601115760111576, "grad_norm": 1.5136407613754272, "learning_rate": 1.5215658329069892e-05, "loss": 0.0145, "step": 29256 }, { "epoch": 13.602045560204555, "grad_norm": 0.7549495100975037, "learning_rate": 1.5143431891426303e-05, "loss": 0.0182, "step": 29258 }, { "epoch": 13.602975360297537, "grad_norm": 1.0807156562805176, "learning_rate": 1.5071302734130528e-05, "loss": 0.0066, "step": 29260 }, { "epoch": 13.603905160390516, "grad_norm": 0.6972198486328125, "learning_rate": 1.4999271569068387e-05, "loss": 0.0143, "step": 29262 }, { "epoch": 13.604834960483496, "grad_norm": 1.0767494440078735, "learning_rate": 1.4927339107158406e-05, "loss": 0.0181, "step": 29264 }, { "epoch": 13.605764760576475, "grad_norm": 0.7192476391792297, "learning_rate": 1.4855506058345098e-05, "loss": 0.008, "step": 29266 }, { "epoch": 13.606694560669457, "grad_norm": 1.0864481925964355, "learning_rate": 1.4783773131591336e-05, "loss": 0.0067, "step": 29268 }, { "epoch": 13.607624360762436, "grad_norm": 0.048028357326984406, "learning_rate": 1.4712141034872306e-05, "loss": 0.006, "step": 29270 }, { "epoch": 13.608554160855416, "grad_norm": 0.1704261600971222, "learning_rate": 1.4640610475168047e-05, "loss": 0.0037, "step": 29272 }, { "epoch": 13.609483960948396, "grad_norm": 0.9292804002761841, "learning_rate": 1.456918215845598e-05, "loss": 0.0126, "step": 29274 }, { "epoch": 13.610413761041377, "grad_norm": 1.2927113771438599, "learning_rate": 1.4497856789704917e-05, "loss": 0.0167, "step": 29276 }, { "epoch": 13.611343561134357, "grad_norm": 0.20015117526054382, "learning_rate": 1.4426635072867616e-05, "loss": 0.0044, "step": 29278 }, { "epoch": 13.612273361227336, "grad_norm": 0.1916186511516571, "learning_rate": 1.4355517710873348e-05, "loss": 0.0085, "step": 29280 }, { "epoch": 13.613203161320316, "grad_norm": 0.28942203521728516, "learning_rate": 1.4284505405621919e-05, "loss": 0.0043, "step": 29282 }, { "epoch": 13.614132961413295, "grad_norm": 0.9791666865348816, "learning_rate": 1.4213598857975954e-05, "loss": 0.0089, "step": 29284 }, { "epoch": 13.615062761506277, "grad_norm": 0.5465717911720276, "learning_rate": 1.4142798767754942e-05, "loss": 0.0057, "step": 29286 }, { "epoch": 13.615992561599256, "grad_norm": 0.7027687430381775, "learning_rate": 1.4072105833726699e-05, "loss": 0.0051, "step": 29288 }, { "epoch": 13.616922361692236, "grad_norm": 0.4721783697605133, "learning_rate": 1.4001520753602107e-05, "loss": 0.004, "step": 29290 }, { "epoch": 13.617852161785216, "grad_norm": 0.39452436566352844, "learning_rate": 1.393104422402757e-05, "loss": 0.0101, "step": 29292 }, { "epoch": 13.618781961878197, "grad_norm": 0.2932630181312561, "learning_rate": 1.3860676940577665e-05, "loss": 0.0041, "step": 29294 }, { "epoch": 13.619711761971177, "grad_norm": 0.11920154094696045, "learning_rate": 1.3790419597749231e-05, "loss": 0.0256, "step": 29296 }, { "epoch": 13.620641562064156, "grad_norm": 0.6061639785766602, "learning_rate": 1.3720272888953876e-05, "loss": 0.012, "step": 29298 }, { "epoch": 13.621571362157136, "grad_norm": 0.126048281788826, "learning_rate": 1.3650237506511458e-05, "loss": 0.0027, "step": 29300 }, { "epoch": 13.622501162250117, "grad_norm": 1.0428494215011597, "learning_rate": 1.3580314141642604e-05, "loss": 0.0252, "step": 29302 }, { "epoch": 13.623430962343097, "grad_norm": 0.5462584495544434, "learning_rate": 1.351050348446286e-05, "loss": 0.0068, "step": 29304 }, { "epoch": 13.624360762436076, "grad_norm": 1.253392219543457, "learning_rate": 1.3440806223975384e-05, "loss": 0.0087, "step": 29306 }, { "epoch": 13.625290562529056, "grad_norm": 0.32292142510414124, "learning_rate": 1.3371223048063683e-05, "loss": 0.0069, "step": 29308 }, { "epoch": 13.626220362622036, "grad_norm": 0.10729625821113586, "learning_rate": 1.3301754643485615e-05, "loss": 0.0022, "step": 29310 }, { "epoch": 13.627150162715017, "grad_norm": 0.24942122399806976, "learning_rate": 1.3232401695866755e-05, "loss": 0.0041, "step": 29312 }, { "epoch": 13.628079962807996, "grad_norm": 1.3993836641311646, "learning_rate": 1.3163164889692233e-05, "loss": 0.0169, "step": 29314 }, { "epoch": 13.629009762900976, "grad_norm": 0.35080769658088684, "learning_rate": 1.3094044908301523e-05, "loss": 0.0087, "step": 29316 }, { "epoch": 13.629939562993956, "grad_norm": 0.628913164138794, "learning_rate": 1.3025042433880941e-05, "loss": 0.0069, "step": 29318 }, { "epoch": 13.630869363086937, "grad_norm": 1.1845357418060303, "learning_rate": 1.2956158147457204e-05, "loss": 0.0079, "step": 29320 }, { "epoch": 13.631799163179917, "grad_norm": 0.3863305449485779, "learning_rate": 1.28873927288901e-05, "loss": 0.0044, "step": 29322 }, { "epoch": 13.632728963272896, "grad_norm": 0.3460441529750824, "learning_rate": 1.2818746856866707e-05, "loss": 0.0033, "step": 29324 }, { "epoch": 13.633658763365876, "grad_norm": 0.5132070779800415, "learning_rate": 1.275022120889422e-05, "loss": 0.0191, "step": 29326 }, { "epoch": 13.634588563458856, "grad_norm": 0.6693975925445557, "learning_rate": 1.2681816461292808e-05, "loss": 0.005, "step": 29328 }, { "epoch": 13.635518363551837, "grad_norm": 0.39764025807380676, "learning_rate": 1.2613533289189875e-05, "loss": 0.004, "step": 29330 }, { "epoch": 13.636448163644816, "grad_norm": 0.7501876354217529, "learning_rate": 1.2545372366512764e-05, "loss": 0.0059, "step": 29332 }, { "epoch": 13.637377963737796, "grad_norm": 0.6071747541427612, "learning_rate": 1.2477334365982404e-05, "loss": 0.0052, "step": 29334 }, { "epoch": 13.638307763830776, "grad_norm": 0.7685719132423401, "learning_rate": 1.2409419959106098e-05, "loss": 0.0172, "step": 29336 }, { "epoch": 13.639237563923757, "grad_norm": 0.6393289566040039, "learning_rate": 1.234162981617162e-05, "loss": 0.0088, "step": 29338 }, { "epoch": 13.640167364016737, "grad_norm": 0.38478603959083557, "learning_rate": 1.227396460624077e-05, "loss": 0.0257, "step": 29340 }, { "epoch": 13.641097164109716, "grad_norm": 0.45043182373046875, "learning_rate": 1.2206424997141395e-05, "loss": 0.0174, "step": 29342 }, { "epoch": 13.642026964202696, "grad_norm": 1.3396894931793213, "learning_rate": 1.2139011655462323e-05, "loss": 0.0101, "step": 29344 }, { "epoch": 13.642956764295675, "grad_norm": 0.1352231651544571, "learning_rate": 1.2071725246546173e-05, "loss": 0.0022, "step": 29346 }, { "epoch": 13.643886564388657, "grad_norm": 0.2481890618801117, "learning_rate": 1.200456643448233e-05, "loss": 0.0058, "step": 29348 }, { "epoch": 13.644816364481636, "grad_norm": 0.7259321212768555, "learning_rate": 1.1937535882101314e-05, "loss": 0.0089, "step": 29350 }, { "epoch": 13.645746164574616, "grad_norm": 0.591925323009491, "learning_rate": 1.1870634250967611e-05, "loss": 0.0069, "step": 29352 }, { "epoch": 13.646675964667596, "grad_norm": 1.0001513957977295, "learning_rate": 1.1803862201373459e-05, "loss": 0.006, "step": 29354 }, { "epoch": 13.647605764760577, "grad_norm": 0.4480149745941162, "learning_rate": 1.1737220392331737e-05, "loss": 0.0059, "step": 29356 }, { "epoch": 13.648535564853557, "grad_norm": 0.31781744956970215, "learning_rate": 1.1670709481570336e-05, "loss": 0.0093, "step": 29358 }, { "epoch": 13.649465364946536, "grad_norm": 1.3889635801315308, "learning_rate": 1.1604330125525255e-05, "loss": 0.0277, "step": 29360 }, { "epoch": 13.650395165039516, "grad_norm": 0.24514761567115784, "learning_rate": 1.1538082979333632e-05, "loss": 0.0087, "step": 29362 }, { "epoch": 13.651324965132497, "grad_norm": 0.1538873165845871, "learning_rate": 1.1471968696828205e-05, "loss": 0.0021, "step": 29364 }, { "epoch": 13.652254765225477, "grad_norm": 0.13688983023166656, "learning_rate": 1.1405987930530257e-05, "loss": 0.0041, "step": 29366 }, { "epoch": 13.653184565318456, "grad_norm": 0.6303370594978333, "learning_rate": 1.134014133164331e-05, "loss": 0.0041, "step": 29368 }, { "epoch": 13.654114365411436, "grad_norm": 1.0405986309051514, "learning_rate": 1.1274429550046709e-05, "loss": 0.0057, "step": 29370 }, { "epoch": 13.655044165504417, "grad_norm": 0.11715668439865112, "learning_rate": 1.1208853234289212e-05, "loss": 0.0068, "step": 29372 }, { "epoch": 13.655973965597397, "grad_norm": 0.55275559425354, "learning_rate": 1.1143413031582731e-05, "loss": 0.0073, "step": 29374 }, { "epoch": 13.656903765690377, "grad_norm": 0.5169792175292969, "learning_rate": 1.107810958779535e-05, "loss": 0.0083, "step": 29376 }, { "epoch": 13.657833565783356, "grad_norm": 0.6851186752319336, "learning_rate": 1.1012943547445842e-05, "loss": 0.0095, "step": 29378 }, { "epoch": 13.658763365876336, "grad_norm": 0.07217807322740555, "learning_rate": 1.0947915553696864e-05, "loss": 0.0034, "step": 29380 }, { "epoch": 13.659693165969317, "grad_norm": 0.9897016882896423, "learning_rate": 1.0883026248348171e-05, "loss": 0.0162, "step": 29382 }, { "epoch": 13.660622966062297, "grad_norm": 0.4747740924358368, "learning_rate": 1.0818276271831152e-05, "loss": 0.0044, "step": 29384 }, { "epoch": 13.661552766155276, "grad_norm": 0.34262752532958984, "learning_rate": 1.0753666263201938e-05, "loss": 0.0049, "step": 29386 }, { "epoch": 13.662482566248256, "grad_norm": 0.4655458331108093, "learning_rate": 1.0689196860135375e-05, "loss": 0.0037, "step": 29388 }, { "epoch": 13.663412366341237, "grad_norm": 0.35814690589904785, "learning_rate": 1.062486869891815e-05, "loss": 0.0043, "step": 29390 }, { "epoch": 13.664342166434217, "grad_norm": 0.23190271854400635, "learning_rate": 1.0560682414443393e-05, "loss": 0.0076, "step": 29392 }, { "epoch": 13.665271966527197, "grad_norm": 0.09888993203639984, "learning_rate": 1.0496638640203816e-05, "loss": 0.0052, "step": 29394 }, { "epoch": 13.666201766620176, "grad_norm": 0.23227453231811523, "learning_rate": 1.0432738008285619e-05, "loss": 0.0041, "step": 29396 }, { "epoch": 13.667131566713156, "grad_norm": 0.7488930225372314, "learning_rate": 1.0368981149362234e-05, "loss": 0.0104, "step": 29398 }, { "epoch": 13.668061366806137, "grad_norm": 0.05080199986696243, "learning_rate": 1.030536869268827e-05, "loss": 0.0061, "step": 29400 }, { "epoch": 13.668991166899117, "grad_norm": 1.6634522676467896, "learning_rate": 1.02419012660927e-05, "loss": 0.0125, "step": 29402 }, { "epoch": 13.669920966992096, "grad_norm": 0.6922221779823303, "learning_rate": 1.0178579495973551e-05, "loss": 0.0186, "step": 29404 }, { "epoch": 13.670850767085076, "grad_norm": 0.136464923620224, "learning_rate": 1.0115404007291126e-05, "loss": 0.0058, "step": 29406 }, { "epoch": 13.671780567178057, "grad_norm": 1.8842679262161255, "learning_rate": 1.0052375423562138e-05, "loss": 0.0069, "step": 29408 }, { "epoch": 13.672710367271037, "grad_norm": 0.7471148371696472, "learning_rate": 9.989494366852976e-06, "loss": 0.0092, "step": 29410 }, { "epoch": 13.673640167364017, "grad_norm": 0.27008312940597534, "learning_rate": 9.926761457774428e-06, "loss": 0.0062, "step": 29412 }, { "epoch": 13.674569967456996, "grad_norm": 0.1280176192522049, "learning_rate": 9.864177315475119e-06, "loss": 0.0108, "step": 29414 }, { "epoch": 13.675499767549976, "grad_norm": 0.6330601572990417, "learning_rate": 9.801742557634989e-06, "loss": 0.0054, "step": 29416 }, { "epoch": 13.676429567642957, "grad_norm": 1.220424771308899, "learning_rate": 9.739457800460028e-06, "loss": 0.0063, "step": 29418 }, { "epoch": 13.677359367735937, "grad_norm": 1.2661771774291992, "learning_rate": 9.677323658675504e-06, "loss": 0.0064, "step": 29420 }, { "epoch": 13.678289167828916, "grad_norm": 0.9525020718574524, "learning_rate": 9.61534074552074e-06, "loss": 0.0112, "step": 29422 }, { "epoch": 13.679218967921896, "grad_norm": 0.11777015775442123, "learning_rate": 9.553509672741638e-06, "loss": 0.002, "step": 29424 }, { "epoch": 13.680148768014877, "grad_norm": 0.36081111431121826, "learning_rate": 9.491831050586064e-06, "loss": 0.0084, "step": 29426 }, { "epoch": 13.681078568107857, "grad_norm": 0.25286799669265747, "learning_rate": 9.43030548779726e-06, "loss": 0.0035, "step": 29428 }, { "epoch": 13.682008368200837, "grad_norm": 0.3474348485469818, "learning_rate": 9.36893359160741e-06, "loss": 0.0073, "step": 29430 }, { "epoch": 13.682938168293816, "grad_norm": 1.8872946500778198, "learning_rate": 9.307715967732498e-06, "loss": 0.0132, "step": 29432 }, { "epoch": 13.683867968386798, "grad_norm": 0.4163765609264374, "learning_rate": 9.24665322036589e-06, "loss": 0.0206, "step": 29434 }, { "epoch": 13.684797768479777, "grad_norm": 0.07327129691839218, "learning_rate": 9.185745952171973e-06, "loss": 0.0061, "step": 29436 }, { "epoch": 13.685727568572757, "grad_norm": 0.3205026388168335, "learning_rate": 9.124994764281039e-06, "loss": 0.0086, "step": 29438 }, { "epoch": 13.686657368665736, "grad_norm": 1.0401850938796997, "learning_rate": 9.064400256282782e-06, "loss": 0.0096, "step": 29440 }, { "epoch": 13.687587168758716, "grad_norm": 0.1749458611011505, "learning_rate": 9.00396302622067e-06, "loss": 0.0034, "step": 29442 }, { "epoch": 13.688516968851697, "grad_norm": 0.43166959285736084, "learning_rate": 8.943683670585492e-06, "loss": 0.0053, "step": 29444 }, { "epoch": 13.689446768944677, "grad_norm": 0.48743340373039246, "learning_rate": 8.88356278431029e-06, "loss": 0.0041, "step": 29446 }, { "epoch": 13.690376569037657, "grad_norm": 0.8796719908714294, "learning_rate": 8.823600960763934e-06, "loss": 0.0135, "step": 29448 }, { "epoch": 13.691306369130636, "grad_norm": 0.2687015235424042, "learning_rate": 8.76379879174542e-06, "loss": 0.0103, "step": 29450 }, { "epoch": 13.692236169223618, "grad_norm": 0.8452580571174622, "learning_rate": 8.704156867478014e-06, "loss": 0.0134, "step": 29452 }, { "epoch": 13.693165969316597, "grad_norm": 0.30235913395881653, "learning_rate": 8.644675776603557e-06, "loss": 0.0036, "step": 29454 }, { "epoch": 13.694095769409577, "grad_norm": 1.048554539680481, "learning_rate": 8.585356106176145e-06, "loss": 0.013, "step": 29456 }, { "epoch": 13.695025569502556, "grad_norm": 0.2948647141456604, "learning_rate": 8.526198441657103e-06, "loss": 0.0038, "step": 29458 }, { "epoch": 13.695955369595538, "grad_norm": 0.6926432251930237, "learning_rate": 8.467203366908697e-06, "loss": 0.0092, "step": 29460 }, { "epoch": 13.696885169688517, "grad_norm": 0.7341447472572327, "learning_rate": 8.408371464188636e-06, "loss": 0.0048, "step": 29462 }, { "epoch": 13.697814969781497, "grad_norm": 0.2591087818145752, "learning_rate": 8.349703314143779e-06, "loss": 0.0037, "step": 29464 }, { "epoch": 13.698744769874477, "grad_norm": 0.36810290813446045, "learning_rate": 8.291199495805213e-06, "loss": 0.0044, "step": 29466 }, { "epoch": 13.699674569967456, "grad_norm": 1.0047273635864258, "learning_rate": 8.232860586582138e-06, "loss": 0.0106, "step": 29468 }, { "epoch": 13.700604370060438, "grad_norm": 0.42796677350997925, "learning_rate": 8.174687162255777e-06, "loss": 0.0032, "step": 29470 }, { "epoch": 13.701534170153417, "grad_norm": 0.24739445745944977, "learning_rate": 8.116679796974471e-06, "loss": 0.005, "step": 29472 }, { "epoch": 13.702463970246397, "grad_norm": 0.49427828192710876, "learning_rate": 8.058839063247494e-06, "loss": 0.0057, "step": 29474 }, { "epoch": 13.703393770339376, "grad_norm": 1.1455434560775757, "learning_rate": 8.001165531939545e-06, "loss": 0.0253, "step": 29476 }, { "epoch": 13.704323570432358, "grad_norm": 0.12967467308044434, "learning_rate": 7.943659772265084e-06, "loss": 0.01, "step": 29478 }, { "epoch": 13.705253370525337, "grad_norm": 0.05261269956827164, "learning_rate": 7.886322351782749e-06, "loss": 0.0025, "step": 29480 }, { "epoch": 13.706183170618317, "grad_norm": 0.8565409183502197, "learning_rate": 7.82915383638986e-06, "loss": 0.0114, "step": 29482 }, { "epoch": 13.707112970711297, "grad_norm": 0.43816012144088745, "learning_rate": 7.772154790316325e-06, "loss": 0.0039, "step": 29484 }, { "epoch": 13.708042770804276, "grad_norm": 0.17943333089351654, "learning_rate": 7.71532577611985e-06, "loss": 0.0084, "step": 29486 }, { "epoch": 13.708972570897258, "grad_norm": 0.21809729933738708, "learning_rate": 7.658667354679984e-06, "loss": 0.0039, "step": 29488 }, { "epoch": 13.709902370990237, "grad_norm": 0.10648613423109055, "learning_rate": 7.602180085192219e-06, "loss": 0.0087, "step": 29490 }, { "epoch": 13.710832171083217, "grad_norm": 0.7242735028266907, "learning_rate": 7.545864525163233e-06, "loss": 0.0068, "step": 29492 }, { "epoch": 13.711761971176196, "grad_norm": 0.21373994648456573, "learning_rate": 7.489721230404863e-06, "loss": 0.0039, "step": 29494 }, { "epoch": 13.712691771269178, "grad_norm": 0.3283582627773285, "learning_rate": 7.433750755028888e-06, "loss": 0.0052, "step": 29496 }, { "epoch": 13.713621571362157, "grad_norm": 0.05788745358586311, "learning_rate": 7.377953651441057e-06, "loss": 0.0122, "step": 29498 }, { "epoch": 13.714551371455137, "grad_norm": 0.27820542454719543, "learning_rate": 7.322330470336376e-06, "loss": 0.0034, "step": 29500 }, { "epoch": 13.715481171548117, "grad_norm": 0.06437166035175323, "learning_rate": 7.266881760693312e-06, "loss": 0.0066, "step": 29502 }, { "epoch": 13.716410971641096, "grad_norm": 0.4195483922958374, "learning_rate": 7.211608069767873e-06, "loss": 0.0035, "step": 29504 }, { "epoch": 13.717340771734078, "grad_norm": 1.0038329362869263, "learning_rate": 7.156509943089446e-06, "loss": 0.0096, "step": 29506 }, { "epoch": 13.718270571827057, "grad_norm": 0.06667687743902206, "learning_rate": 7.101587924454173e-06, "loss": 0.0022, "step": 29508 }, { "epoch": 13.719200371920037, "grad_norm": 0.12443629652261734, "learning_rate": 7.046842555920326e-06, "loss": 0.0029, "step": 29510 }, { "epoch": 13.720130172013016, "grad_norm": 0.7546462416648865, "learning_rate": 6.992274377802345e-06, "loss": 0.0147, "step": 29512 }, { "epoch": 13.721059972105998, "grad_norm": 0.39250025153160095, "learning_rate": 6.937883928666246e-06, "loss": 0.013, "step": 29514 }, { "epoch": 13.721989772198977, "grad_norm": 1.0340954065322876, "learning_rate": 6.883671745323921e-06, "loss": 0.0038, "step": 29516 }, { "epoch": 13.722919572291957, "grad_norm": 0.1172432154417038, "learning_rate": 6.8296383628274875e-06, "loss": 0.0091, "step": 29518 }, { "epoch": 13.723849372384937, "grad_norm": 0.5673695802688599, "learning_rate": 6.7757843144647425e-06, "loss": 0.0053, "step": 29520 }, { "epoch": 13.724779172477918, "grad_norm": 0.806138277053833, "learning_rate": 6.722110131753524e-06, "loss": 0.0202, "step": 29522 }, { "epoch": 13.725708972570898, "grad_norm": 0.28461918234825134, "learning_rate": 6.668616344436097e-06, "loss": 0.007, "step": 29524 }, { "epoch": 13.726638772663877, "grad_norm": 0.5609574913978577, "learning_rate": 6.6153034804746755e-06, "loss": 0.0051, "step": 29526 }, { "epoch": 13.727568572756857, "grad_norm": 0.07007644325494766, "learning_rate": 6.562172066045697e-06, "loss": 0.0026, "step": 29528 }, { "epoch": 13.728498372849838, "grad_norm": 0.38172778487205505, "learning_rate": 6.509222625534776e-06, "loss": 0.0035, "step": 29530 }, { "epoch": 13.729428172942818, "grad_norm": 0.5262783765792847, "learning_rate": 6.456455681531509e-06, "loss": 0.0119, "step": 29532 }, { "epoch": 13.730357973035797, "grad_norm": 0.15859873592853546, "learning_rate": 6.403871754824336e-06, "loss": 0.0022, "step": 29534 }, { "epoch": 13.731287773128777, "grad_norm": 0.07936687022447586, "learning_rate": 6.3514713643955025e-06, "loss": 0.0018, "step": 29536 }, { "epoch": 13.732217573221757, "grad_norm": 0.12343598902225494, "learning_rate": 6.299255027415465e-06, "loss": 0.0135, "step": 29538 }, { "epoch": 13.733147373314738, "grad_norm": 0.4508042335510254, "learning_rate": 6.247223259238513e-06, "loss": 0.0077, "step": 29540 }, { "epoch": 13.734077173407718, "grad_norm": 0.6827161312103271, "learning_rate": 6.19537657339731e-06, "loss": 0.007, "step": 29542 }, { "epoch": 13.735006973500697, "grad_norm": 2.033438205718994, "learning_rate": 6.143715481597472e-06, "loss": 0.0082, "step": 29544 }, { "epoch": 13.735936773593677, "grad_norm": 0.12313330918550491, "learning_rate": 6.092240493713243e-06, "loss": 0.0107, "step": 29546 }, { "epoch": 13.736866573686658, "grad_norm": 0.4841700494289398, "learning_rate": 6.040952117781969e-06, "loss": 0.0042, "step": 29548 }, { "epoch": 13.737796373779638, "grad_norm": 1.561564326286316, "learning_rate": 5.989850859999332e-06, "loss": 0.0102, "step": 29550 }, { "epoch": 13.738726173872617, "grad_norm": 0.2921311557292938, "learning_rate": 5.938937224713882e-06, "loss": 0.0048, "step": 29552 }, { "epoch": 13.739655973965597, "grad_norm": 0.5204646587371826, "learning_rate": 5.888211714422763e-06, "loss": 0.0055, "step": 29554 }, { "epoch": 13.740585774058577, "grad_norm": 1.1930667161941528, "learning_rate": 5.837674829766395e-06, "loss": 0.0223, "step": 29556 }, { "epoch": 13.741515574151558, "grad_norm": 1.2552995681762695, "learning_rate": 5.787327069523086e-06, "loss": 0.0184, "step": 29558 }, { "epoch": 13.742445374244538, "grad_norm": 0.30463770031929016, "learning_rate": 5.7371689306052445e-06, "loss": 0.0035, "step": 29560 }, { "epoch": 13.743375174337517, "grad_norm": 0.10688271373510361, "learning_rate": 5.687200908053338e-06, "loss": 0.0021, "step": 29562 }, { "epoch": 13.744304974430497, "grad_norm": 0.31333673000335693, "learning_rate": 5.637423495031692e-06, "loss": 0.0032, "step": 29564 }, { "epoch": 13.745234774523478, "grad_norm": 0.24861173331737518, "learning_rate": 5.5878371828230455e-06, "loss": 0.0032, "step": 29566 }, { "epoch": 13.746164574616458, "grad_norm": 0.5117030739784241, "learning_rate": 5.5384424608244016e-06, "loss": 0.0037, "step": 29568 }, { "epoch": 13.747094374709437, "grad_norm": 0.19541889429092407, "learning_rate": 5.4892398165418295e-06, "loss": 0.0037, "step": 29570 }, { "epoch": 13.748024174802417, "grad_norm": 0.715913712978363, "learning_rate": 5.440229735585348e-06, "loss": 0.0045, "step": 29572 }, { "epoch": 13.748953974895397, "grad_norm": 0.9570848345756531, "learning_rate": 5.391412701664773e-06, "loss": 0.0088, "step": 29574 }, { "epoch": 13.749883774988378, "grad_norm": 0.09756757318973541, "learning_rate": 5.342789196584639e-06, "loss": 0.0044, "step": 29576 }, { "epoch": 13.750813575081358, "grad_norm": 0.5210034251213074, "learning_rate": 5.294359700239086e-06, "loss": 0.0026, "step": 29578 }, { "epoch": 13.751743375174337, "grad_norm": 0.924758791923523, "learning_rate": 5.246124690607802e-06, "loss": 0.01, "step": 29580 }, { "epoch": 13.752673175267317, "grad_norm": 1.33892023563385, "learning_rate": 5.198084643750862e-06, "loss": 0.0099, "step": 29582 }, { "epoch": 13.753602975360298, "grad_norm": 0.31835028529167175, "learning_rate": 5.150240033804238e-06, "loss": 0.0031, "step": 29584 }, { "epoch": 13.754532775453278, "grad_norm": 0.3739386200904846, "learning_rate": 5.1025913329745906e-06, "loss": 0.0083, "step": 29586 }, { "epoch": 13.755462575546257, "grad_norm": 0.2318556308746338, "learning_rate": 5.0551390115356885e-06, "loss": 0.0036, "step": 29588 }, { "epoch": 13.756392375639237, "grad_norm": 0.926458477973938, "learning_rate": 5.007883537822784e-06, "loss": 0.0133, "step": 29590 }, { "epoch": 13.757322175732218, "grad_norm": 0.5095632672309875, "learning_rate": 4.960825378228107e-06, "loss": 0.0067, "step": 29592 }, { "epoch": 13.758251975825198, "grad_norm": 0.6799783110618591, "learning_rate": 4.913964997196807e-06, "loss": 0.0066, "step": 29594 }, { "epoch": 13.759181775918178, "grad_norm": 0.467935174703598, "learning_rate": 4.867302857221926e-06, "loss": 0.02, "step": 29596 }, { "epoch": 13.760111576011157, "grad_norm": 0.21890929341316223, "learning_rate": 4.820839418840048e-06, "loss": 0.0086, "step": 29598 }, { "epoch": 13.761041376104137, "grad_norm": 0.1816042810678482, "learning_rate": 4.774575140626347e-06, "loss": 0.002, "step": 29600 }, { "epoch": 13.761971176197118, "grad_norm": 0.36986351013183594, "learning_rate": 4.728510479190673e-06, "loss": 0.0158, "step": 29602 }, { "epoch": 13.762900976290098, "grad_norm": 0.2580362856388092, "learning_rate": 4.68264588917274e-06, "loss": 0.0029, "step": 29604 }, { "epoch": 13.763830776383077, "grad_norm": 0.2605929374694824, "learning_rate": 4.636981823237332e-06, "loss": 0.0241, "step": 29606 }, { "epoch": 13.764760576476057, "grad_norm": 0.07857544720172882, "learning_rate": 4.591518732070443e-06, "loss": 0.0083, "step": 29608 }, { "epoch": 13.765690376569038, "grad_norm": 0.43466916680336, "learning_rate": 4.546257064374537e-06, "loss": 0.011, "step": 29610 }, { "epoch": 13.766620176662018, "grad_norm": 0.5816594958305359, "learning_rate": 4.5011972668636875e-06, "loss": 0.0076, "step": 29612 }, { "epoch": 13.767549976754998, "grad_norm": 1.0635064840316772, "learning_rate": 4.456339784260218e-06, "loss": 0.0085, "step": 29614 }, { "epoch": 13.768479776847977, "grad_norm": 0.20265841484069824, "learning_rate": 4.4116850592892666e-06, "loss": 0.0121, "step": 29616 }, { "epoch": 13.769409576940959, "grad_norm": 0.5084145665168762, "learning_rate": 4.367233532675042e-06, "loss": 0.0031, "step": 29618 }, { "epoch": 13.770339377033938, "grad_norm": 0.20539717376232147, "learning_rate": 4.322985643135963e-06, "loss": 0.0052, "step": 29620 }, { "epoch": 13.771269177126918, "grad_norm": 0.19822877645492554, "learning_rate": 4.2789418273809395e-06, "loss": 0.0032, "step": 29622 }, { "epoch": 13.772198977219897, "grad_norm": 0.3793184757232666, "learning_rate": 4.235102520104748e-06, "loss": 0.0048, "step": 29624 }, { "epoch": 13.773128777312877, "grad_norm": 0.5378710627555847, "learning_rate": 4.191468153983456e-06, "loss": 0.0109, "step": 29626 }, { "epoch": 13.774058577405858, "grad_norm": 0.46702080965042114, "learning_rate": 4.148039159670742e-06, "loss": 0.0029, "step": 29628 }, { "epoch": 13.774988377498838, "grad_norm": 1.2715610265731812, "learning_rate": 4.104815965793338e-06, "loss": 0.0107, "step": 29630 }, { "epoch": 13.775918177591818, "grad_norm": 0.49724942445755005, "learning_rate": 4.061798998946531e-06, "loss": 0.004, "step": 29632 }, { "epoch": 13.776847977684797, "grad_norm": 1.226495385169983, "learning_rate": 4.0189886836905086e-06, "loss": 0.0134, "step": 29634 }, { "epoch": 13.777777777777779, "grad_norm": 0.45052626729011536, "learning_rate": 3.976385442545802e-06, "loss": 0.005, "step": 29636 }, { "epoch": 13.778707577870758, "grad_norm": 0.6317138075828552, "learning_rate": 3.933989695989298e-06, "loss": 0.0077, "step": 29638 }, { "epoch": 13.779637377963738, "grad_norm": 0.30701887607574463, "learning_rate": 3.891801862449609e-06, "loss": 0.0052, "step": 29640 }, { "epoch": 13.780567178056717, "grad_norm": 0.1883675456047058, "learning_rate": 3.849822358303911e-06, "loss": 0.0066, "step": 29642 }, { "epoch": 13.781496978149697, "grad_norm": 0.12047314643859863, "learning_rate": 3.8080515978729573e-06, "loss": 0.0037, "step": 29644 }, { "epoch": 13.782426778242678, "grad_norm": 0.25827619433403015, "learning_rate": 3.766489993417104e-06, "loss": 0.0148, "step": 29646 }, { "epoch": 13.783356578335658, "grad_norm": 0.16857874393463135, "learning_rate": 3.7251379551327003e-06, "loss": 0.0098, "step": 29648 }, { "epoch": 13.784286378428638, "grad_norm": 0.8590726256370544, "learning_rate": 3.68399589114767e-06, "loss": 0.0108, "step": 29650 }, { "epoch": 13.785216178521617, "grad_norm": 0.5617759227752686, "learning_rate": 3.6430642075176673e-06, "loss": 0.01, "step": 29652 }, { "epoch": 13.786145978614599, "grad_norm": 0.14875265955924988, "learning_rate": 3.602343308221702e-06, "loss": 0.0039, "step": 29654 }, { "epoch": 13.787075778707578, "grad_norm": 0.810243546962738, "learning_rate": 3.561833595158699e-06, "loss": 0.0063, "step": 29656 }, { "epoch": 13.788005578800558, "grad_norm": 0.27932626008987427, "learning_rate": 3.5215354681432676e-06, "loss": 0.0038, "step": 29658 }, { "epoch": 13.788935378893537, "grad_norm": 0.9037229418754578, "learning_rate": 3.481449324901466e-06, "loss": 0.0078, "step": 29660 }, { "epoch": 13.789865178986517, "grad_norm": 0.17090460658073425, "learning_rate": 3.4415755610674378e-06, "loss": 0.0024, "step": 29662 }, { "epoch": 13.790794979079498, "grad_norm": 0.10617169737815857, "learning_rate": 3.401914570179222e-06, "loss": 0.0015, "step": 29664 }, { "epoch": 13.791724779172478, "grad_norm": 0.3309746980667114, "learning_rate": 3.3624667436746124e-06, "loss": 0.0042, "step": 29666 }, { "epoch": 13.792654579265458, "grad_norm": 1.3722143173217773, "learning_rate": 3.3232324708877253e-06, "loss": 0.0236, "step": 29668 }, { "epoch": 13.793584379358437, "grad_norm": 0.0615844763815403, "learning_rate": 3.284212139045177e-06, "loss": 0.0016, "step": 29670 }, { "epoch": 13.794514179451419, "grad_norm": 0.061131641268730164, "learning_rate": 3.2454061332618844e-06, "loss": 0.0016, "step": 29672 }, { "epoch": 13.795443979544398, "grad_norm": 0.28477659821510315, "learning_rate": 3.2068148365372853e-06, "loss": 0.0045, "step": 29674 }, { "epoch": 13.796373779637378, "grad_norm": 0.39030471444129944, "learning_rate": 3.168438629751984e-06, "loss": 0.0032, "step": 29676 }, { "epoch": 13.797303579730357, "grad_norm": 0.1501028835773468, "learning_rate": 3.130277891663734e-06, "loss": 0.0028, "step": 29678 }, { "epoch": 13.798233379823339, "grad_norm": 0.4893513321876526, "learning_rate": 3.092332998903445e-06, "loss": 0.0032, "step": 29680 }, { "epoch": 13.799163179916318, "grad_norm": 0.7292957901954651, "learning_rate": 3.0546043259719723e-06, "loss": 0.0157, "step": 29682 }, { "epoch": 13.800092980009298, "grad_norm": 0.7923303842544556, "learning_rate": 3.01709224523609e-06, "loss": 0.0067, "step": 29684 }, { "epoch": 13.801022780102278, "grad_norm": 2.935307025909424, "learning_rate": 2.9797971269249725e-06, "loss": 0.0179, "step": 29686 }, { "epoch": 13.801952580195259, "grad_norm": 0.5052317976951599, "learning_rate": 2.942719339126211e-06, "loss": 0.0121, "step": 29688 }, { "epoch": 13.802882380288239, "grad_norm": 0.17262491583824158, "learning_rate": 2.9058592477826894e-06, "loss": 0.0027, "step": 29690 }, { "epoch": 13.803812180381218, "grad_norm": 0.11879999190568924, "learning_rate": 2.8692172166887087e-06, "loss": 0.003, "step": 29692 }, { "epoch": 13.804741980474198, "grad_norm": 0.1826726198196411, "learning_rate": 2.8327936074861567e-06, "loss": 0.0025, "step": 29694 }, { "epoch": 13.805671780567177, "grad_norm": 0.1207733005285263, "learning_rate": 2.796588779661356e-06, "loss": 0.0039, "step": 29696 }, { "epoch": 13.806601580660159, "grad_norm": 0.35102513432502747, "learning_rate": 2.760603090541584e-06, "loss": 0.0087, "step": 29698 }, { "epoch": 13.807531380753138, "grad_norm": 0.6594264507293701, "learning_rate": 2.724836895290817e-06, "loss": 0.0043, "step": 29700 }, { "epoch": 13.808461180846118, "grad_norm": 0.5781947374343872, "learning_rate": 2.689290546907048e-06, "loss": 0.0056, "step": 29702 }, { "epoch": 13.809390980939098, "grad_norm": 0.3080902099609375, "learning_rate": 2.6539643962183845e-06, "loss": 0.0038, "step": 29704 }, { "epoch": 13.810320781032079, "grad_norm": 0.09964258223772049, "learning_rate": 2.6188587918797476e-06, "loss": 0.0069, "step": 29706 }, { "epoch": 13.811250581125059, "grad_norm": 0.15427900850772858, "learning_rate": 2.5839740803691265e-06, "loss": 0.0032, "step": 29708 }, { "epoch": 13.812180381218038, "grad_norm": 0.03992847353219986, "learning_rate": 2.549310605984616e-06, "loss": 0.0026, "step": 29710 }, { "epoch": 13.813110181311018, "grad_norm": 0.5829887390136719, "learning_rate": 2.5148687108407912e-06, "loss": 0.0086, "step": 29712 }, { "epoch": 13.814039981403997, "grad_norm": 0.10035013407468796, "learning_rate": 2.480648734865097e-06, "loss": 0.0042, "step": 29714 }, { "epoch": 13.814969781496979, "grad_norm": 0.9173797369003296, "learning_rate": 2.446651015794961e-06, "loss": 0.011, "step": 29716 }, { "epoch": 13.815899581589958, "grad_norm": 0.25689584016799927, "learning_rate": 2.412875889174142e-06, "loss": 0.0094, "step": 29718 }, { "epoch": 13.816829381682938, "grad_norm": 0.37113499641418457, "learning_rate": 2.3793236883495842e-06, "loss": 0.0042, "step": 29720 }, { "epoch": 13.817759181775918, "grad_norm": 0.18736006319522858, "learning_rate": 2.3459947444677346e-06, "loss": 0.002, "step": 29722 }, { "epoch": 13.818688981868899, "grad_norm": 0.42110776901245117, "learning_rate": 2.31288938647204e-06, "loss": 0.0032, "step": 29724 }, { "epoch": 13.819618781961879, "grad_norm": 0.03162965178489685, "learning_rate": 2.28000794109902e-06, "loss": 0.0058, "step": 29726 }, { "epoch": 13.820548582054858, "grad_norm": 0.1769348829984665, "learning_rate": 2.2473507328751124e-06, "loss": 0.003, "step": 29728 }, { "epoch": 13.821478382147838, "grad_norm": 0.363511860370636, "learning_rate": 2.2149180841138574e-06, "loss": 0.0048, "step": 29730 }, { "epoch": 13.822408182240817, "grad_norm": 0.15466149151325226, "learning_rate": 2.1827103149124732e-06, "loss": 0.0021, "step": 29732 }, { "epoch": 13.823337982333799, "grad_norm": 0.14469149708747864, "learning_rate": 2.150727743148498e-06, "loss": 0.0021, "step": 29734 }, { "epoch": 13.824267782426778, "grad_norm": 0.45242446660995483, "learning_rate": 2.1189706844770726e-06, "loss": 0.0048, "step": 29736 }, { "epoch": 13.825197582519758, "grad_norm": 0.2534917891025543, "learning_rate": 2.087439452327547e-06, "loss": 0.0119, "step": 29738 }, { "epoch": 13.826127382612738, "grad_norm": 1.5322211980819702, "learning_rate": 2.0561343579005215e-06, "loss": 0.0163, "step": 29740 }, { "epoch": 13.827057182705719, "grad_norm": 0.19787494838237762, "learning_rate": 2.0250557101645e-06, "loss": 0.0024, "step": 29742 }, { "epoch": 13.827986982798699, "grad_norm": 0.857239842414856, "learning_rate": 1.9942038158532598e-06, "loss": 0.0099, "step": 29744 }, { "epoch": 13.828916782891678, "grad_norm": 0.2547113001346588, "learning_rate": 1.9635789794626102e-06, "loss": 0.0038, "step": 29746 }, { "epoch": 13.829846582984658, "grad_norm": 0.3572632968425751, "learning_rate": 1.933181503247186e-06, "loss": 0.0056, "step": 29748 }, { "epoch": 13.83077638307764, "grad_norm": 0.09747514873743057, "learning_rate": 1.903011687217804e-06, "loss": 0.0197, "step": 29750 }, { "epoch": 13.831706183170619, "grad_norm": 0.7389886379241943, "learning_rate": 1.873069829138574e-06, "loss": 0.0058, "step": 29752 }, { "epoch": 13.832635983263598, "grad_norm": 0.22908152639865875, "learning_rate": 1.8433562245233461e-06, "loss": 0.0036, "step": 29754 }, { "epoch": 13.833565783356578, "grad_norm": 0.13618040084838867, "learning_rate": 1.8138711666334628e-06, "loss": 0.0035, "step": 29756 }, { "epoch": 13.834495583449558, "grad_norm": 0.5016068816184998, "learning_rate": 1.7846149464745498e-06, "loss": 0.0073, "step": 29758 }, { "epoch": 13.835425383542539, "grad_norm": 0.4377065598964691, "learning_rate": 1.755587852793747e-06, "loss": 0.0047, "step": 29760 }, { "epoch": 13.836355183635519, "grad_norm": 0.15913477540016174, "learning_rate": 1.7267901720766254e-06, "loss": 0.0022, "step": 29762 }, { "epoch": 13.837284983728498, "grad_norm": 0.4224522113800049, "learning_rate": 1.698222188544729e-06, "loss": 0.0081, "step": 29764 }, { "epoch": 13.838214783821478, "grad_norm": 0.41163370013237, "learning_rate": 1.6698841841525895e-06, "loss": 0.0101, "step": 29766 }, { "epoch": 13.83914458391446, "grad_norm": 0.18880021572113037, "learning_rate": 1.6417764385847382e-06, "loss": 0.0065, "step": 29768 }, { "epoch": 13.840074384007439, "grad_norm": 0.10172995924949646, "learning_rate": 1.6138992292533401e-06, "loss": 0.012, "step": 29770 }, { "epoch": 13.841004184100418, "grad_norm": 0.08348206430673599, "learning_rate": 1.5862528312952008e-06, "loss": 0.0028, "step": 29772 }, { "epoch": 13.841933984193398, "grad_norm": 0.6568590998649597, "learning_rate": 1.5588375175691666e-06, "loss": 0.0046, "step": 29774 }, { "epoch": 13.84286378428638, "grad_norm": 0.08179233223199844, "learning_rate": 1.5316535586531922e-06, "loss": 0.0022, "step": 29776 }, { "epoch": 13.843793584379359, "grad_norm": 0.732352614402771, "learning_rate": 1.5047012228419776e-06, "loss": 0.0038, "step": 29778 }, { "epoch": 13.844723384472339, "grad_norm": 0.14949765801429749, "learning_rate": 1.4779807761443797e-06, "loss": 0.002, "step": 29780 }, { "epoch": 13.845653184565318, "grad_norm": 1.3326340913772583, "learning_rate": 1.451492482280236e-06, "loss": 0.0078, "step": 29782 }, { "epoch": 13.846582984658298, "grad_norm": 0.7948963642120361, "learning_rate": 1.4252366026783778e-06, "loss": 0.0118, "step": 29784 }, { "epoch": 13.84751278475128, "grad_norm": 0.07466501742601395, "learning_rate": 1.3992133964737908e-06, "loss": 0.0019, "step": 29786 }, { "epoch": 13.848442584844259, "grad_norm": 0.019021425396203995, "learning_rate": 1.373423120504901e-06, "loss": 0.001, "step": 29788 }, { "epoch": 13.849372384937238, "grad_norm": 0.43174850940704346, "learning_rate": 1.347866029311375e-06, "loss": 0.0047, "step": 29790 }, { "epoch": 13.850302185030218, "grad_norm": 0.6828911900520325, "learning_rate": 1.3225423751313874e-06, "loss": 0.0192, "step": 29792 }, { "epoch": 13.8512319851232, "grad_norm": 0.1927366703748703, "learning_rate": 1.29745240789924e-06, "loss": 0.0024, "step": 29794 }, { "epoch": 13.852161785216179, "grad_norm": 0.23161061108112335, "learning_rate": 1.2725963752426616e-06, "loss": 0.01, "step": 29796 }, { "epoch": 13.853091585309159, "grad_norm": 0.10009771585464478, "learning_rate": 1.2479745224807203e-06, "loss": 0.002, "step": 29798 }, { "epoch": 13.854021385402138, "grad_norm": 0.5628454685211182, "learning_rate": 1.2235870926212188e-06, "loss": 0.0062, "step": 29800 }, { "epoch": 13.854951185495118, "grad_norm": 0.38923507928848267, "learning_rate": 1.1994343263581273e-06, "loss": 0.0058, "step": 29802 }, { "epoch": 13.855880985588099, "grad_norm": 1.211409091949463, "learning_rate": 1.1755164620695077e-06, "loss": 0.0283, "step": 29804 }, { "epoch": 13.856810785681079, "grad_norm": 0.2476339489221573, "learning_rate": 1.1518337358151263e-06, "loss": 0.0024, "step": 29806 }, { "epoch": 13.857740585774058, "grad_norm": 0.24408015608787537, "learning_rate": 1.1283863813339331e-06, "loss": 0.0088, "step": 29808 }, { "epoch": 13.858670385867038, "grad_norm": 0.2003469169139862, "learning_rate": 1.1051746300417427e-06, "loss": 0.0122, "step": 29810 }, { "epoch": 13.85960018596002, "grad_norm": 0.41492462158203125, "learning_rate": 1.082198711029221e-06, "loss": 0.0026, "step": 29812 }, { "epoch": 13.860529986052999, "grad_norm": 0.3209458589553833, "learning_rate": 1.059458851059468e-06, "loss": 0.0043, "step": 29814 }, { "epoch": 13.861459786145979, "grad_norm": 0.09853029251098633, "learning_rate": 1.0369552745656166e-06, "loss": 0.0033, "step": 29816 }, { "epoch": 13.862389586238958, "grad_norm": 0.24818746745586395, "learning_rate": 1.0146882036489292e-06, "loss": 0.0041, "step": 29818 }, { "epoch": 13.863319386331938, "grad_norm": 0.16903679072856903, "learning_rate": 9.926578580764634e-07, "loss": 0.0021, "step": 29820 }, { "epoch": 13.864249186424919, "grad_norm": 0.46312621235847473, "learning_rate": 9.708644552787315e-07, "loss": 0.0077, "step": 29822 }, { "epoch": 13.865178986517899, "grad_norm": 0.5240738391876221, "learning_rate": 9.493082103478693e-07, "loss": 0.007, "step": 29824 }, { "epoch": 13.866108786610878, "grad_norm": 0.5673319101333618, "learning_rate": 9.279893360353158e-07, "loss": 0.0257, "step": 29826 }, { "epoch": 13.867038586703858, "grad_norm": 0.7570368051528931, "learning_rate": 9.069080427497999e-07, "loss": 0.0075, "step": 29828 }, { "epoch": 13.86796838679684, "grad_norm": 0.37346893548965454, "learning_rate": 8.860645385550797e-07, "loss": 0.0036, "step": 29830 }, { "epoch": 13.868898186889819, "grad_norm": 0.4087144732475281, "learning_rate": 8.654590291681291e-07, "loss": 0.0145, "step": 29832 }, { "epoch": 13.869827986982799, "grad_norm": 0.10752689838409424, "learning_rate": 8.450917179571427e-07, "loss": 0.0019, "step": 29834 }, { "epoch": 13.870757787075778, "grad_norm": 0.09131059050559998, "learning_rate": 8.249628059391262e-07, "loss": 0.0038, "step": 29836 }, { "epoch": 13.87168758716876, "grad_norm": 0.42736145853996277, "learning_rate": 8.050724917783536e-07, "loss": 0.0083, "step": 29838 }, { "epoch": 13.872617387261739, "grad_norm": 1.227126121520996, "learning_rate": 7.854209717842464e-07, "loss": 0.0167, "step": 29840 }, { "epoch": 13.873547187354719, "grad_norm": 0.08247619867324829, "learning_rate": 7.660084399092837e-07, "loss": 0.002, "step": 29842 }, { "epoch": 13.874476987447698, "grad_norm": 0.2451585829257965, "learning_rate": 7.468350877473589e-07, "loss": 0.0039, "step": 29844 }, { "epoch": 13.87540678754068, "grad_norm": 0.22167004644870758, "learning_rate": 7.279011045317207e-07, "loss": 0.0072, "step": 29846 }, { "epoch": 13.87633658763366, "grad_norm": 0.05130327120423317, "learning_rate": 7.092066771331795e-07, "loss": 0.0013, "step": 29848 }, { "epoch": 13.877266387726639, "grad_norm": 0.13969938457012177, "learning_rate": 6.907519900581038e-07, "loss": 0.005, "step": 29850 }, { "epoch": 13.878196187819619, "grad_norm": 0.08405889570713043, "learning_rate": 6.725372254468465e-07, "loss": 0.0018, "step": 29852 }, { "epoch": 13.879125987912598, "grad_norm": 0.32696861028671265, "learning_rate": 6.545625630718182e-07, "loss": 0.0052, "step": 29854 }, { "epoch": 13.88005578800558, "grad_norm": 1.4949243068695068, "learning_rate": 6.368281803356035e-07, "loss": 0.0206, "step": 29856 }, { "epoch": 13.880985588098559, "grad_norm": 1.000941276550293, "learning_rate": 6.193342522694361e-07, "loss": 0.0123, "step": 29858 }, { "epoch": 13.881915388191539, "grad_norm": 0.13273780047893524, "learning_rate": 6.020809515312902e-07, "loss": 0.0041, "step": 29860 }, { "epoch": 13.882845188284518, "grad_norm": 0.3644525706768036, "learning_rate": 5.850684484043893e-07, "loss": 0.0125, "step": 29862 }, { "epoch": 13.8837749883775, "grad_norm": 0.3412869870662689, "learning_rate": 5.682969107951612e-07, "loss": 0.0034, "step": 29864 }, { "epoch": 13.88470478847048, "grad_norm": 0.12335454672574997, "learning_rate": 5.517665042319441e-07, "loss": 0.0021, "step": 29866 }, { "epoch": 13.885634588563459, "grad_norm": 1.008522868156433, "learning_rate": 5.354773918632159e-07, "loss": 0.0075, "step": 29868 }, { "epoch": 13.886564388656438, "grad_norm": 0.1634264439344406, "learning_rate": 5.194297344558655e-07, "loss": 0.0021, "step": 29870 }, { "epoch": 13.887494188749418, "grad_norm": 0.13019388914108276, "learning_rate": 5.036236903938293e-07, "loss": 0.0022, "step": 29872 }, { "epoch": 13.8884239888424, "grad_norm": 0.20968623459339142, "learning_rate": 4.880594156764182e-07, "loss": 0.0025, "step": 29874 }, { "epoch": 13.889353788935379, "grad_norm": 0.8288216590881348, "learning_rate": 4.727370639166681e-07, "loss": 0.008, "step": 29876 }, { "epoch": 13.890283589028359, "grad_norm": 0.916913628578186, "learning_rate": 4.5765678634003907e-07, "loss": 0.0073, "step": 29878 }, { "epoch": 13.891213389121338, "grad_norm": 0.9773091077804565, "learning_rate": 4.4281873178278835e-07, "loss": 0.0084, "step": 29880 }, { "epoch": 13.89214318921432, "grad_norm": 0.16623684763908386, "learning_rate": 4.282230466905492e-07, "loss": 0.0126, "step": 29882 }, { "epoch": 13.8930729893073, "grad_norm": 0.6804378628730774, "learning_rate": 4.1386987511677986e-07, "loss": 0.006, "step": 29884 }, { "epoch": 13.894002789400279, "grad_norm": 0.09746422618627548, "learning_rate": 3.9975935872148893e-07, "loss": 0.0049, "step": 29886 }, { "epoch": 13.894932589493258, "grad_norm": 0.8784428834915161, "learning_rate": 3.85891636769873e-07, "loss": 0.0136, "step": 29888 }, { "epoch": 13.895862389586238, "grad_norm": 0.16211506724357605, "learning_rate": 3.722668461306512e-07, "loss": 0.0016, "step": 29890 }, { "epoch": 13.89679218967922, "grad_norm": 1.0927419662475586, "learning_rate": 3.5888512127504387e-07, "loss": 0.0115, "step": 29892 }, { "epoch": 13.897721989772199, "grad_norm": 0.11840052157640457, "learning_rate": 3.4574659427526524e-07, "loss": 0.0034, "step": 29894 }, { "epoch": 13.898651789865179, "grad_norm": 1.2611483335494995, "learning_rate": 3.3285139480330794e-07, "loss": 0.0083, "step": 29896 }, { "epoch": 13.899581589958158, "grad_norm": 0.5570144057273865, "learning_rate": 3.201996501295245e-07, "loss": 0.0073, "step": 29898 }, { "epoch": 13.90051139005114, "grad_norm": 0.729580819606781, "learning_rate": 3.0779148512155343e-07, "loss": 0.0101, "step": 29900 }, { "epoch": 13.90144119014412, "grad_norm": 0.20412583649158478, "learning_rate": 2.9562702224300624e-07, "loss": 0.0072, "step": 29902 }, { "epoch": 13.902370990237099, "grad_norm": 1.1368352174758911, "learning_rate": 2.837063815521628e-07, "loss": 0.0247, "step": 29904 }, { "epoch": 13.903300790330078, "grad_norm": 0.09162263572216034, "learning_rate": 2.7202968070096136e-07, "loss": 0.0031, "step": 29906 }, { "epoch": 13.90423059042306, "grad_norm": 0.08213433623313904, "learning_rate": 2.6059703493375205e-07, "loss": 0.0057, "step": 29908 }, { "epoch": 13.90516039051604, "grad_norm": 0.6064892411231995, "learning_rate": 2.494085570860814e-07, "loss": 0.0064, "step": 29910 }, { "epoch": 13.906090190609019, "grad_norm": 0.1841920018196106, "learning_rate": 2.3846435758373456e-07, "loss": 0.0034, "step": 29912 }, { "epoch": 13.907019990701999, "grad_norm": 0.7131078243255615, "learning_rate": 2.27764544441517e-07, "loss": 0.0036, "step": 29914 }, { "epoch": 13.907949790794978, "grad_norm": 0.4288526773452759, "learning_rate": 2.173092232623384e-07, "loss": 0.0042, "step": 29916 }, { "epoch": 13.90887959088796, "grad_norm": 1.3690251111984253, "learning_rate": 2.07098497235925e-07, "loss": 0.0195, "step": 29918 }, { "epoch": 13.90980939098094, "grad_norm": 0.6254481673240662, "learning_rate": 1.971324671380479e-07, "loss": 0.0071, "step": 29920 }, { "epoch": 13.910739191073919, "grad_norm": 0.3924870491027832, "learning_rate": 1.8741123132941548e-07, "loss": 0.0033, "step": 29922 }, { "epoch": 13.911668991166898, "grad_norm": 0.2669554650783539, "learning_rate": 1.779348857546634e-07, "loss": 0.0031, "step": 29924 }, { "epoch": 13.91259879125988, "grad_norm": 0.49874988198280334, "learning_rate": 1.6870352394151605e-07, "loss": 0.0084, "step": 29926 }, { "epoch": 13.91352859135286, "grad_norm": 0.06438687443733215, "learning_rate": 1.5971723699980424e-07, "loss": 0.0077, "step": 29928 }, { "epoch": 13.914458391445839, "grad_norm": 0.3891589939594269, "learning_rate": 1.5097611362051866e-07, "loss": 0.0052, "step": 29930 }, { "epoch": 13.915388191538819, "grad_norm": 0.4965618848800659, "learning_rate": 1.4248024007503268e-07, "loss": 0.0041, "step": 29932 }, { "epoch": 13.9163179916318, "grad_norm": 0.39232543110847473, "learning_rate": 1.34229700214192e-07, "loss": 0.0047, "step": 29934 }, { "epoch": 13.91724779172478, "grad_norm": 0.27966514229774475, "learning_rate": 1.262245754675125e-07, "loss": 0.0087, "step": 29936 }, { "epoch": 13.91817759181776, "grad_norm": 0.7828666567802429, "learning_rate": 1.1846494484230325e-07, "loss": 0.0039, "step": 29938 }, { "epoch": 13.919107391910739, "grad_norm": 0.5809452533721924, "learning_rate": 1.1095088492300581e-07, "loss": 0.0081, "step": 29940 }, { "epoch": 13.920037192003718, "grad_norm": 0.27105313539505005, "learning_rate": 1.036824698703616e-07, "loss": 0.0044, "step": 29942 }, { "epoch": 13.9209669920967, "grad_norm": 0.4260653853416443, "learning_rate": 9.665977142068751e-08, "loss": 0.0037, "step": 29944 }, { "epoch": 13.92189679218968, "grad_norm": 0.8446748852729797, "learning_rate": 8.988285888518757e-08, "loss": 0.012, "step": 29946 }, { "epoch": 13.922826592282659, "grad_norm": 0.4622461199760437, "learning_rate": 8.335179914924508e-08, "loss": 0.0214, "step": 29948 }, { "epoch": 13.923756392375639, "grad_norm": 0.29910027980804443, "learning_rate": 7.70666566718038e-08, "loss": 0.0032, "step": 29950 }, { "epoch": 13.92468619246862, "grad_norm": 0.121853306889534, "learning_rate": 7.102749348465176e-08, "loss": 0.0028, "step": 29952 }, { "epoch": 13.9256159925616, "grad_norm": 0.6205908060073853, "learning_rate": 6.523436919190782e-08, "loss": 0.0126, "step": 29954 }, { "epoch": 13.92654579265458, "grad_norm": 0.09408044815063477, "learning_rate": 5.968734096937775e-08, "loss": 0.0013, "step": 29956 }, { "epoch": 13.927475592747559, "grad_norm": 0.3307828903198242, "learning_rate": 5.438646356396856e-08, "loss": 0.003, "step": 29958 }, { "epoch": 13.928405392840538, "grad_norm": 1.1399260759353638, "learning_rate": 4.933178929321387e-08, "loss": 0.0078, "step": 29960 }, { "epoch": 13.92933519293352, "grad_norm": 0.32088059186935425, "learning_rate": 4.4523368044716086e-08, "loss": 0.0025, "step": 29962 }, { "epoch": 13.9302649930265, "grad_norm": 1.126752495765686, "learning_rate": 3.996124727563283e-08, "loss": 0.0072, "step": 29964 }, { "epoch": 13.931194793119479, "grad_norm": 2.449986219406128, "learning_rate": 3.564547201226071e-08, "loss": 0.017, "step": 29966 }, { "epoch": 13.932124593212459, "grad_norm": 0.814751148223877, "learning_rate": 3.157608484956614e-08, "loss": 0.0069, "step": 29968 }, { "epoch": 13.93305439330544, "grad_norm": 0.0986282154917717, "learning_rate": 2.7753125950752456e-08, "loss": 0.0022, "step": 29970 }, { "epoch": 13.93398419339842, "grad_norm": 0.17969800531864166, "learning_rate": 2.41766330468796e-08, "loss": 0.0029, "step": 29972 }, { "epoch": 13.9349139934914, "grad_norm": 0.3073396384716034, "learning_rate": 2.0846641436494986e-08, "loss": 0.0045, "step": 29974 }, { "epoch": 13.935843793584379, "grad_norm": 0.19806531071662903, "learning_rate": 1.7763183985272685e-08, "loss": 0.0023, "step": 29976 }, { "epoch": 13.936773593677358, "grad_norm": 0.05516946688294411, "learning_rate": 1.4926291125680357e-08, "loss": 0.0126, "step": 29978 }, { "epoch": 13.93770339377034, "grad_norm": 0.09134212136268616, "learning_rate": 1.2335990856710016e-08, "loss": 0.0034, "step": 29980 }, { "epoch": 13.93863319386332, "grad_norm": 0.06675445288419724, "learning_rate": 9.9923087435866e-09, "loss": 0.0013, "step": 29982 }, { "epoch": 13.939562993956299, "grad_norm": 0.29120808839797974, "learning_rate": 7.895267917501514e-09, "loss": 0.0037, "step": 29984 }, { "epoch": 13.940492794049279, "grad_norm": 0.5629797577857971, "learning_rate": 6.044889075401691e-09, "loss": 0.0076, "step": 29986 }, { "epoch": 13.94142259414226, "grad_norm": 0.178298220038414, "learning_rate": 4.441190479775876e-09, "loss": 0.0025, "step": 29988 }, { "epoch": 13.94235239423524, "grad_norm": 0.04699594900012016, "learning_rate": 3.0841879584880874e-09, "loss": 0.0039, "step": 29990 }, { "epoch": 13.94328219432822, "grad_norm": 0.2753047049045563, "learning_rate": 1.9738949045972097e-09, "loss": 0.0136, "step": 29992 }, { "epoch": 13.944211994421199, "grad_norm": 0.253605455160141, "learning_rate": 1.1103222762542958e-09, "loss": 0.0038, "step": 29994 }, { "epoch": 13.94514179451418, "grad_norm": 0.4627235531806946, "learning_rate": 4.934785965721174e-10, "loss": 0.0024, "step": 29996 }, { "epoch": 13.94607159460716, "grad_norm": 0.1968177706003189, "learning_rate": 1.2336995354467215e-10, "loss": 0.0046, "step": 29998 }, { "epoch": 13.94700139470014, "grad_norm": 0.6651080846786499, "learning_rate": 0.0, "loss": 0.0375, "step": 30000 }, { "epoch": 13.94700139470014, "eval_cer": 0.10492065154768764, "eval_loss": 0.18082347512245178, "eval_runtime": 401.0331, "eval_samples_per_second": 31.653, "eval_steps_per_second": 0.99, "step": 30000 }, { "epoch": 13.947931194793119, "grad_norm": 0.08652544021606445, "learning_rate": 1.2336995354744752e-10, "loss": 0.0019, "step": 30002 }, { "epoch": 13.9488609948861, "grad_norm": 0.15596771240234375, "learning_rate": 4.934785965721167e-10, "loss": 0.0046, "step": 30004 }, { "epoch": 13.94979079497908, "grad_norm": 0.296305775642395, "learning_rate": 1.1103222762542941e-09, "loss": 0.0087, "step": 30006 }, { "epoch": 13.95072059507206, "grad_norm": 0.07041045278310776, "learning_rate": 1.973894904597207e-09, "loss": 0.0039, "step": 30008 }, { "epoch": 13.95165039516504, "grad_norm": 0.825819730758667, "learning_rate": 3.0841879584825317e-09, "loss": 0.0142, "step": 30010 }, { "epoch": 13.952580195258019, "grad_norm": 0.055475879460573196, "learning_rate": 4.441190479773094e-09, "loss": 0.0029, "step": 30012 }, { "epoch": 13.953509995351, "grad_norm": 1.2462843656539917, "learning_rate": 6.044889075398907e-09, "loss": 0.0063, "step": 30014 }, { "epoch": 13.95443979544398, "grad_norm": 0.3269294798374176, "learning_rate": 7.895267917495952e-09, "loss": 0.0084, "step": 30016 }, { "epoch": 13.95536959553696, "grad_norm": 1.2278231382369995, "learning_rate": 9.992308743583813e-09, "loss": 0.0101, "step": 30018 }, { "epoch": 13.956299395629939, "grad_norm": 1.947463035583496, "learning_rate": 1.2335990856707224e-08, "loss": 0.0136, "step": 30020 }, { "epoch": 13.95722919572292, "grad_norm": 0.10346124321222305, "learning_rate": 1.4926291125674786e-08, "loss": 0.0045, "step": 30022 }, { "epoch": 13.9581589958159, "grad_norm": 0.7451969385147095, "learning_rate": 1.776318398526711e-08, "loss": 0.0091, "step": 30024 }, { "epoch": 13.95908879590888, "grad_norm": 0.14962491393089294, "learning_rate": 2.0846641436497733e-08, "loss": 0.0026, "step": 30026 }, { "epoch": 13.96001859600186, "grad_norm": 0.6763772964477539, "learning_rate": 2.417663304688512e-08, "loss": 0.0048, "step": 30028 }, { "epoch": 13.960948396094839, "grad_norm": 0.30535688996315, "learning_rate": 2.7753125950746875e-08, "loss": 0.0037, "step": 30030 }, { "epoch": 13.96187819618782, "grad_norm": 0.14779549837112427, "learning_rate": 3.157608484956055e-08, "loss": 0.0032, "step": 30032 }, { "epoch": 13.9628079962808, "grad_norm": 0.1610032469034195, "learning_rate": 3.564547201225788e-08, "loss": 0.002, "step": 30034 }, { "epoch": 13.96373779637378, "grad_norm": 0.10360448807477951, "learning_rate": 3.9961247275627227e-08, "loss": 0.002, "step": 30036 }, { "epoch": 13.964667596466759, "grad_norm": 0.08681638538837433, "learning_rate": 4.452336804469937e-08, "loss": 0.0027, "step": 30038 }, { "epoch": 13.96559739655974, "grad_norm": 0.9112300276756287, "learning_rate": 4.933178929320827e-08, "loss": 0.009, "step": 30040 }, { "epoch": 13.96652719665272, "grad_norm": 0.1478370577096939, "learning_rate": 5.438646356396295e-08, "loss": 0.0108, "step": 30042 }, { "epoch": 13.9674569967457, "grad_norm": 0.11503852158784866, "learning_rate": 5.968734096935824e-08, "loss": 0.0016, "step": 30044 }, { "epoch": 13.96838679683868, "grad_norm": 0.18806606531143188, "learning_rate": 6.523436919189942e-08, "loss": 0.0025, "step": 30046 }, { "epoch": 13.969316596931659, "grad_norm": 0.9040982127189636, "learning_rate": 7.102749348464612e-08, "loss": 0.0191, "step": 30048 }, { "epoch": 13.97024639702464, "grad_norm": 0.09525856375694275, "learning_rate": 7.706665667178429e-08, "loss": 0.0021, "step": 30050 }, { "epoch": 13.97117619711762, "grad_norm": 0.7075115442276001, "learning_rate": 8.335179914925332e-08, "loss": 0.006, "step": 30052 }, { "epoch": 13.9721059972106, "grad_norm": 0.3181791305541992, "learning_rate": 8.988285888519304e-08, "loss": 0.0038, "step": 30054 }, { "epoch": 13.973035797303579, "grad_norm": 0.19418376684188843, "learning_rate": 9.665977142069574e-08, "loss": 0.0074, "step": 30056 }, { "epoch": 13.97396559739656, "grad_norm": 2.815220594406128, "learning_rate": 1.0368246987035315e-07, "loss": 0.0144, "step": 30058 }, { "epoch": 13.97489539748954, "grad_norm": 0.19790415465831757, "learning_rate": 1.1095088492299735e-07, "loss": 0.0028, "step": 30060 }, { "epoch": 13.97582519758252, "grad_norm": 0.3737892508506775, "learning_rate": 1.184649448422948e-07, "loss": 0.0029, "step": 30062 }, { "epoch": 13.9767549976755, "grad_norm": 1.0893968343734741, "learning_rate": 1.262245754674846e-07, "loss": 0.0187, "step": 30064 }, { "epoch": 13.97768479776848, "grad_norm": 0.4295744299888611, "learning_rate": 1.3422970021418348e-07, "loss": 0.0061, "step": 30066 }, { "epoch": 13.97861459786146, "grad_norm": 0.4534996747970581, "learning_rate": 1.4248024007502416e-07, "loss": 0.0145, "step": 30068 }, { "epoch": 13.97954439795444, "grad_norm": 0.06861530244350433, "learning_rate": 1.5097611362050738e-07, "loss": 0.0063, "step": 30070 }, { "epoch": 13.98047419804742, "grad_norm": 0.6768627166748047, "learning_rate": 1.5971723699977348e-07, "loss": 0.0094, "step": 30072 }, { "epoch": 13.981403998140399, "grad_norm": 0.16323083639144897, "learning_rate": 1.687035239415047e-07, "loss": 0.0025, "step": 30074 }, { "epoch": 13.98233379823338, "grad_norm": 0.9332080483436584, "learning_rate": 1.7793488575465203e-07, "loss": 0.0045, "step": 30076 }, { "epoch": 13.98326359832636, "grad_norm": 0.05873970687389374, "learning_rate": 1.874112313293819e-07, "loss": 0.0014, "step": 30078 }, { "epoch": 13.98419339841934, "grad_norm": 0.18078792095184326, "learning_rate": 1.971324671380587e-07, "loss": 0.0152, "step": 30080 }, { "epoch": 13.98512319851232, "grad_norm": 0.0496731698513031, "learning_rate": 2.070984972359358e-07, "loss": 0.0123, "step": 30082 }, { "epoch": 13.9860529986053, "grad_norm": 0.19078277051448822, "learning_rate": 2.17309223262327e-07, "loss": 0.0169, "step": 30084 }, { "epoch": 13.98698279869828, "grad_norm": 1.0173060894012451, "learning_rate": 2.2776454444152777e-07, "loss": 0.0209, "step": 30086 }, { "epoch": 13.98791259879126, "grad_norm": 0.15541166067123413, "learning_rate": 2.384643575837204e-07, "loss": 0.0055, "step": 30088 }, { "epoch": 13.98884239888424, "grad_norm": 1.3948413133621216, "learning_rate": 2.4940855708606714e-07, "loss": 0.0169, "step": 30090 }, { "epoch": 13.98977219897722, "grad_norm": 0.574107825756073, "learning_rate": 2.605970349337127e-07, "loss": 0.0057, "step": 30092 }, { "epoch": 13.9907019990702, "grad_norm": 0.34662702679634094, "learning_rate": 2.7202968070094696e-07, "loss": 0.0034, "step": 30094 }, { "epoch": 13.99163179916318, "grad_norm": 0.11203297972679138, "learning_rate": 2.837063815521484e-07, "loss": 0.0053, "step": 30096 }, { "epoch": 13.99256159925616, "grad_norm": 0.08750130236148834, "learning_rate": 2.9562702224296405e-07, "loss": 0.0114, "step": 30098 }, { "epoch": 13.99349139934914, "grad_norm": 0.7964156270027161, "learning_rate": 3.07791485121539e-07, "loss": 0.0084, "step": 30100 }, { "epoch": 13.99442119944212, "grad_norm": 2.3507235050201416, "learning_rate": 3.2019965012950723e-07, "loss": 0.0117, "step": 30102 }, { "epoch": 13.9953509995351, "grad_norm": 0.1729487180709839, "learning_rate": 3.328513948032628e-07, "loss": 0.0018, "step": 30104 }, { "epoch": 13.99628079962808, "grad_norm": 0.8607238531112671, "learning_rate": 3.457465942752783e-07, "loss": 0.0119, "step": 30106 }, { "epoch": 13.99721059972106, "grad_norm": 0.5308325886726379, "learning_rate": 3.588851212750569e-07, "loss": 0.0046, "step": 30108 }, { "epoch": 13.99814039981404, "grad_norm": 1.0129146575927734, "learning_rate": 3.72266846130667e-07, "loss": 0.008, "step": 30110 }, { "epoch": 13.99907019990702, "grad_norm": 0.43400251865386963, "learning_rate": 3.858916367698554e-07, "loss": 0.0034, "step": 30112 }, { "epoch": 14.0, "grad_norm": 0.7426713109016418, "learning_rate": 3.997593587215046e-07, "loss": 0.0048, "step": 30114 }, { "epoch": 14.00092980009298, "grad_norm": 0.6018356680870056, "learning_rate": 4.1386987511676223e-07, "loss": 0.0045, "step": 30116 }, { "epoch": 14.00185960018596, "grad_norm": 0.02717386931180954, "learning_rate": 4.282230466904984e-07, "loss": 0.0015, "step": 30118 }, { "epoch": 14.00278940027894, "grad_norm": 0.26609036326408386, "learning_rate": 4.42818731782768e-07, "loss": 0.0064, "step": 30120 }, { "epoch": 14.00371920037192, "grad_norm": 0.3104061484336853, "learning_rate": 4.576567863400215e-07, "loss": 0.009, "step": 30122 }, { "epoch": 14.0046490004649, "grad_norm": 0.13522003591060638, "learning_rate": 4.7273706391664764e-07, "loss": 0.0027, "step": 30124 }, { "epoch": 14.00557880055788, "grad_norm": 0.06171660125255585, "learning_rate": 4.880594156763644e-07, "loss": 0.0013, "step": 30126 }, { "epoch": 14.00650860065086, "grad_norm": 0.06970100104808807, "learning_rate": 5.036236903938088e-07, "loss": 0.0033, "step": 30128 }, { "epoch": 14.00743840074384, "grad_norm": 0.07544492185115814, "learning_rate": 5.194297344558449e-07, "loss": 0.0028, "step": 30130 }, { "epoch": 14.00836820083682, "grad_norm": 0.44741010665893555, "learning_rate": 5.354773918631592e-07, "loss": 0.0038, "step": 30132 }, { "epoch": 14.0092980009298, "grad_norm": 0.06211334466934204, "learning_rate": 5.517665042319595e-07, "loss": 0.0023, "step": 30134 }, { "epoch": 14.010227801022781, "grad_norm": 0.3522106409072876, "learning_rate": 5.682969107951767e-07, "loss": 0.0065, "step": 30136 }, { "epoch": 14.01115760111576, "grad_norm": 0.09983526915311813, "learning_rate": 5.850684484043686e-07, "loss": 0.0024, "step": 30138 }, { "epoch": 14.01208740120874, "grad_norm": 0.07995253801345825, "learning_rate": 6.020809515313083e-07, "loss": 0.0032, "step": 30140 }, { "epoch": 14.01301720130172, "grad_norm": 0.21271564066410065, "learning_rate": 6.193342522694153e-07, "loss": 0.0071, "step": 30142 }, { "epoch": 14.0139470013947, "grad_norm": 0.515906572341919, "learning_rate": 6.368281803355826e-07, "loss": 0.0057, "step": 30144 }, { "epoch": 14.01487680148768, "grad_norm": 0.11954006552696228, "learning_rate": 6.545625630717556e-07, "loss": 0.0018, "step": 30146 }, { "epoch": 14.01580660158066, "grad_norm": 0.041757505387067795, "learning_rate": 6.725372254468226e-07, "loss": 0.0015, "step": 30148 }, { "epoch": 14.01673640167364, "grad_norm": 0.03699616342782974, "learning_rate": 6.907519900580828e-07, "loss": 0.0027, "step": 30150 }, { "epoch": 14.01766620176662, "grad_norm": 0.5089284181594849, "learning_rate": 7.092066771331141e-07, "loss": 0.0043, "step": 30152 }, { "epoch": 14.018596001859601, "grad_norm": 0.30379021167755127, "learning_rate": 7.279011045316969e-07, "loss": 0.0052, "step": 30154 }, { "epoch": 14.01952580195258, "grad_norm": 0.5546357035636902, "learning_rate": 7.468350877473351e-07, "loss": 0.0044, "step": 30156 }, { "epoch": 14.02045560204556, "grad_norm": 0.08521898090839386, "learning_rate": 7.660084399092598e-07, "loss": 0.0028, "step": 30158 }, { "epoch": 14.02138540213854, "grad_norm": 0.052558962255716324, "learning_rate": 7.854209717841782e-07, "loss": 0.002, "step": 30160 }, { "epoch": 14.02231520223152, "grad_norm": 0.1361863911151886, "learning_rate": 8.050724917783743e-07, "loss": 0.0027, "step": 30162 }, { "epoch": 14.0232450023245, "grad_norm": 0.6184684038162231, "learning_rate": 8.249628059391443e-07, "loss": 0.0026, "step": 30164 }, { "epoch": 14.02417480241748, "grad_norm": 0.07195631414651871, "learning_rate": 8.450917179571192e-07, "loss": 0.0014, "step": 30166 }, { "epoch": 14.02510460251046, "grad_norm": 0.7178915143013, "learning_rate": 8.654590291681501e-07, "loss": 0.0036, "step": 30168 }, { "epoch": 14.02603440260344, "grad_norm": 0.11027506738901138, "learning_rate": 8.860645385550535e-07, "loss": 0.0023, "step": 30170 }, { "epoch": 14.026964202696421, "grad_norm": 0.10586996376514435, "learning_rate": 9.069080427497267e-07, "loss": 0.0027, "step": 30172 }, { "epoch": 14.0278940027894, "grad_norm": 0.2785012722015381, "learning_rate": 9.279893360352896e-07, "loss": 0.0031, "step": 30174 }, { "epoch": 14.02882380288238, "grad_norm": 0.0649859756231308, "learning_rate": 9.493082103478432e-07, "loss": 0.0053, "step": 30176 }, { "epoch": 14.02975360297536, "grad_norm": 0.12583287060260773, "learning_rate": 9.708644552787052e-07, "loss": 0.0045, "step": 30178 }, { "epoch": 14.030683403068341, "grad_norm": 0.06612996757030487, "learning_rate": 9.926578580763872e-07, "loss": 0.0036, "step": 30180 }, { "epoch": 14.03161320316132, "grad_norm": 0.4546986520290375, "learning_rate": 1.014688203648903e-06, "loss": 0.0036, "step": 30182 }, { "epoch": 14.0325430032543, "grad_norm": 0.05046948790550232, "learning_rate": 1.0369552745655871e-06, "loss": 0.0013, "step": 30184 }, { "epoch": 14.03347280334728, "grad_norm": 0.08184432983398438, "learning_rate": 1.0594588510593886e-06, "loss": 0.0043, "step": 30186 }, { "epoch": 14.03440260344026, "grad_norm": 0.2803100049495697, "learning_rate": 1.0821987110292443e-06, "loss": 0.0018, "step": 30188 }, { "epoch": 14.035332403533241, "grad_norm": 0.07418500632047653, "learning_rate": 1.105174630041766e-06, "loss": 0.0134, "step": 30190 }, { "epoch": 14.03626220362622, "grad_norm": 0.5193578600883484, "learning_rate": 1.1283863813339037e-06, "loss": 0.0071, "step": 30192 }, { "epoch": 14.0371920037192, "grad_norm": 0.09573944658041, "learning_rate": 1.1518337358151521e-06, "loss": 0.0013, "step": 30194 }, { "epoch": 14.03812180381218, "grad_norm": 0.3436044454574585, "learning_rate": 1.1755164620695306e-06, "loss": 0.0046, "step": 30196 }, { "epoch": 14.039051603905161, "grad_norm": 0.195977583527565, "learning_rate": 1.199434326358097e-06, "loss": 0.0025, "step": 30198 }, { "epoch": 14.03998140399814, "grad_norm": 0.31188300251960754, "learning_rate": 1.223587092621133e-06, "loss": 0.0031, "step": 30200 }, { "epoch": 14.04091120409112, "grad_norm": 2.021885395050049, "learning_rate": 1.24797452248069e-06, "loss": 0.0031, "step": 30202 }, { "epoch": 14.0418410041841, "grad_norm": 1.4209909439086914, "learning_rate": 1.2725963752426315e-06, "loss": 0.0062, "step": 30204 }, { "epoch": 14.04277080427708, "grad_norm": 0.09525880217552185, "learning_rate": 1.2974524078991514e-06, "loss": 0.0017, "step": 30206 }, { "epoch": 14.043700604370061, "grad_norm": 0.17017626762390137, "learning_rate": 1.3225423751313574e-06, "loss": 0.0019, "step": 30208 }, { "epoch": 14.04463040446304, "grad_norm": 0.06124043092131615, "learning_rate": 1.3478660293113445e-06, "loss": 0.0026, "step": 30210 }, { "epoch": 14.04556020455602, "grad_norm": 0.12013568729162216, "learning_rate": 1.3734231205048677e-06, "loss": 0.0021, "step": 30212 }, { "epoch": 14.046490004649, "grad_norm": 0.08400294929742813, "learning_rate": 1.3992133964736991e-06, "loss": 0.0027, "step": 30214 }, { "epoch": 14.047419804741981, "grad_norm": 0.11699585616588593, "learning_rate": 1.4252366026784024e-06, "loss": 0.0022, "step": 30216 }, { "epoch": 14.04834960483496, "grad_norm": 0.10198919475078583, "learning_rate": 1.4514924822802633e-06, "loss": 0.0015, "step": 30218 }, { "epoch": 14.04927940492794, "grad_norm": 0.3529801070690155, "learning_rate": 1.477980776144346e-06, "loss": 0.0035, "step": 30220 }, { "epoch": 14.05020920502092, "grad_norm": 0.026748893782496452, "learning_rate": 1.5047012228420024e-06, "loss": 0.0016, "step": 30222 }, { "epoch": 14.051139005113901, "grad_norm": 0.9139151573181152, "learning_rate": 1.5316535586531558e-06, "loss": 0.0072, "step": 30224 }, { "epoch": 14.052068805206881, "grad_norm": 0.08397724479436874, "learning_rate": 1.5588375175690692e-06, "loss": 0.0021, "step": 30226 }, { "epoch": 14.05299860529986, "grad_norm": 0.06712356954813004, "learning_rate": 1.5862528312951671e-06, "loss": 0.0014, "step": 30228 }, { "epoch": 14.05392840539284, "grad_norm": 0.09062254428863525, "learning_rate": 1.6138992292533062e-06, "loss": 0.0023, "step": 30230 }, { "epoch": 14.05485820548582, "grad_norm": 0.19074955582618713, "learning_rate": 1.6417764385847015e-06, "loss": 0.0016, "step": 30232 }, { "epoch": 14.055788005578801, "grad_norm": 0.31949540972709656, "learning_rate": 1.6698841841524887e-06, "loss": 0.0072, "step": 30234 }, { "epoch": 14.05671780567178, "grad_norm": 0.3516783118247986, "learning_rate": 1.6982221885446919e-06, "loss": 0.0028, "step": 30236 }, { "epoch": 14.05764760576476, "grad_norm": 0.3824283480644226, "learning_rate": 1.7267901720765881e-06, "loss": 0.0052, "step": 30238 }, { "epoch": 14.05857740585774, "grad_norm": 0.05136405676603317, "learning_rate": 1.7555878527936457e-06, "loss": 0.0012, "step": 30240 }, { "epoch": 14.059507205950721, "grad_norm": 0.17825153470039368, "learning_rate": 1.7846149464745096e-06, "loss": 0.0022, "step": 30242 }, { "epoch": 14.060437006043701, "grad_norm": 0.07959484308958054, "learning_rate": 1.8138711666334918e-06, "loss": 0.0025, "step": 30244 }, { "epoch": 14.06136680613668, "grad_norm": 0.04764473810791969, "learning_rate": 1.8433562245233722e-06, "loss": 0.0012, "step": 30246 }, { "epoch": 14.06229660622966, "grad_norm": 0.10115706920623779, "learning_rate": 1.873069829138536e-06, "loss": 0.006, "step": 30248 }, { "epoch": 14.06322640632264, "grad_norm": 0.030002795159816742, "learning_rate": 1.9030116872178325e-06, "loss": 0.0013, "step": 30250 }, { "epoch": 14.064156206415621, "grad_norm": 0.5512327551841736, "learning_rate": 1.9331815032471455e-06, "loss": 0.0037, "step": 30252 }, { "epoch": 14.0650860065086, "grad_norm": 0.5363564491271973, "learning_rate": 1.963578979462503e-06, "loss": 0.0043, "step": 30254 }, { "epoch": 14.06601580660158, "grad_norm": 0.1058364063501358, "learning_rate": 1.9942038158532195e-06, "loss": 0.0014, "step": 30256 }, { "epoch": 14.06694560669456, "grad_norm": 0.45014631748199463, "learning_rate": 2.0250557101644594e-06, "loss": 0.0044, "step": 30258 }, { "epoch": 14.067875406787541, "grad_norm": 0.2412813901901245, "learning_rate": 2.0561343579004114e-06, "loss": 0.002, "step": 30260 }, { "epoch": 14.068805206880521, "grad_norm": 0.33355793356895447, "learning_rate": 2.0874394523275062e-06, "loss": 0.0021, "step": 30262 }, { "epoch": 14.0697350069735, "grad_norm": 0.09039223194122314, "learning_rate": 2.1189706844770315e-06, "loss": 0.0024, "step": 30264 }, { "epoch": 14.07066480706648, "grad_norm": 0.09944231063127518, "learning_rate": 2.1507277431484565e-06, "loss": 0.0032, "step": 30266 }, { "epoch": 14.071594607159462, "grad_norm": 0.29977652430534363, "learning_rate": 2.1827103149123593e-06, "loss": 0.003, "step": 30268 }, { "epoch": 14.072524407252441, "grad_norm": 0.21711422502994537, "learning_rate": 2.214918084113813e-06, "loss": 0.0039, "step": 30270 }, { "epoch": 14.07345420734542, "grad_norm": 0.10317495465278625, "learning_rate": 2.247350732875143e-06, "loss": 0.002, "step": 30272 }, { "epoch": 14.0743840074384, "grad_norm": 0.16700416803359985, "learning_rate": 2.280007941098975e-06, "loss": 0.0023, "step": 30274 }, { "epoch": 14.07531380753138, "grad_norm": 0.5523369908332825, "learning_rate": 2.3128893864720707e-06, "loss": 0.0025, "step": 30276 }, { "epoch": 14.076243607624361, "grad_norm": 0.5750396847724915, "learning_rate": 2.3459947444677647e-06, "loss": 0.0055, "step": 30278 }, { "epoch": 14.077173407717341, "grad_norm": 0.5347676873207092, "learning_rate": 2.379323688349465e-06, "loss": 0.0046, "step": 30280 }, { "epoch": 14.07810320781032, "grad_norm": 0.7232571244239807, "learning_rate": 2.4128758891740974e-06, "loss": 0.0044, "step": 30282 }, { "epoch": 14.0790330079033, "grad_norm": 0.08644469827413559, "learning_rate": 2.4466510157949164e-06, "loss": 0.0015, "step": 30284 }, { "epoch": 14.079962807996282, "grad_norm": 0.5310443639755249, "learning_rate": 2.4806487348650497e-06, "loss": 0.0052, "step": 30286 }, { "epoch": 14.080892608089261, "grad_norm": 0.47918763756752014, "learning_rate": 2.5148687108406654e-06, "loss": 0.0096, "step": 30288 }, { "epoch": 14.08182240818224, "grad_norm": 0.11804406344890594, "learning_rate": 2.5493106059845675e-06, "loss": 0.0023, "step": 30290 }, { "epoch": 14.08275220827522, "grad_norm": 0.966452956199646, "learning_rate": 2.5839740803690782e-06, "loss": 0.0034, "step": 30292 }, { "epoch": 14.0836820083682, "grad_norm": 0.09767826646566391, "learning_rate": 2.618858791879622e-06, "loss": 0.0015, "step": 30294 }, { "epoch": 14.084611808461181, "grad_norm": 0.16123677790164948, "learning_rate": 2.6539643962183362e-06, "loss": 0.0018, "step": 30296 }, { "epoch": 14.085541608554161, "grad_norm": 0.15363430976867676, "learning_rate": 2.68929054690708e-06, "loss": 0.0022, "step": 30298 }, { "epoch": 14.08647140864714, "grad_norm": 0.11462454497814178, "learning_rate": 2.724836895290849e-06, "loss": 0.0025, "step": 30300 }, { "epoch": 14.08740120874012, "grad_norm": 0.2663321793079376, "learning_rate": 2.7606030905415347e-06, "loss": 0.0024, "step": 30302 }, { "epoch": 14.088331008833102, "grad_norm": 0.03899959474802017, "learning_rate": 2.7965887796613867e-06, "loss": 0.0067, "step": 30304 }, { "epoch": 14.089260808926081, "grad_norm": 0.20097479224205017, "learning_rate": 2.8327936074861076e-06, "loss": 0.0047, "step": 30306 }, { "epoch": 14.09019060901906, "grad_norm": 0.44869282841682434, "learning_rate": 2.8692172166885757e-06, "loss": 0.0033, "step": 30308 }, { "epoch": 14.09112040911204, "grad_norm": 0.2197728306055069, "learning_rate": 2.9058592477826373e-06, "loss": 0.0021, "step": 30310 }, { "epoch": 14.092050209205022, "grad_norm": 0.14530296623706818, "learning_rate": 2.9427193391261585e-06, "loss": 0.0027, "step": 30312 }, { "epoch": 14.092980009298001, "grad_norm": 0.058463890105485916, "learning_rate": 2.979797126924837e-06, "loss": 0.005, "step": 30314 }, { "epoch": 14.093909809390981, "grad_norm": 0.20294958353042603, "learning_rate": 3.017092245236038e-06, "loss": 0.0033, "step": 30316 }, { "epoch": 14.09483960948396, "grad_norm": 0.2553772032260895, "learning_rate": 3.05460432597192e-06, "loss": 0.0037, "step": 30318 }, { "epoch": 14.09576940957694, "grad_norm": 0.030064230784773827, "learning_rate": 3.0923329989033925e-06, "loss": 0.0012, "step": 30320 }, { "epoch": 14.096699209669922, "grad_norm": 0.2751457989215851, "learning_rate": 3.130277891663595e-06, "loss": 0.0018, "step": 30322 }, { "epoch": 14.097629009762901, "grad_norm": 0.5255541801452637, "learning_rate": 3.1684386297519305e-06, "loss": 0.0044, "step": 30324 }, { "epoch": 14.09855880985588, "grad_norm": 0.11131514608860016, "learning_rate": 3.2068148365373174e-06, "loss": 0.0019, "step": 30326 }, { "epoch": 14.09948860994886, "grad_norm": 0.19205957651138306, "learning_rate": 3.2454061332618306e-06, "loss": 0.0021, "step": 30328 }, { "epoch": 14.100418410041842, "grad_norm": 0.06579995900392532, "learning_rate": 3.2842121390452095e-06, "loss": 0.0017, "step": 30330 }, { "epoch": 14.101348210134821, "grad_norm": 0.03077389858663082, "learning_rate": 3.3232324708877575e-06, "loss": 0.0008, "step": 30332 }, { "epoch": 14.102278010227801, "grad_norm": 0.9724298715591431, "learning_rate": 3.3624667436745556e-06, "loss": 0.0091, "step": 30334 }, { "epoch": 14.10320781032078, "grad_norm": 0.2761405110359192, "learning_rate": 3.401914570179076e-06, "loss": 0.0018, "step": 30336 }, { "epoch": 14.104137610413762, "grad_norm": 0.6436226963996887, "learning_rate": 3.4415755610673806e-06, "loss": 0.007, "step": 30338 }, { "epoch": 14.105067410506742, "grad_norm": 0.13116466999053955, "learning_rate": 3.4814493249014087e-06, "loss": 0.0048, "step": 30340 }, { "epoch": 14.105997210599721, "grad_norm": 0.07932352274656296, "learning_rate": 3.5215354681431193e-06, "loss": 0.0018, "step": 30342 }, { "epoch": 14.1069270106927, "grad_norm": 0.06800016760826111, "learning_rate": 3.561833595158641e-06, "loss": 0.002, "step": 30344 }, { "epoch": 14.10785681078568, "grad_norm": 0.18668538331985474, "learning_rate": 3.6023433082216433e-06, "loss": 0.0015, "step": 30346 }, { "epoch": 14.108786610878662, "grad_norm": 0.2742172181606293, "learning_rate": 3.6430642075175166e-06, "loss": 0.0027, "step": 30348 }, { "epoch": 14.109716410971641, "grad_norm": 0.12916380167007446, "learning_rate": 3.68399589114761e-06, "loss": 0.0019, "step": 30350 }, { "epoch": 14.110646211064621, "grad_norm": 0.06267956644296646, "learning_rate": 3.7251379551326406e-06, "loss": 0.0073, "step": 30352 }, { "epoch": 14.1115760111576, "grad_norm": 0.04849929362535477, "learning_rate": 3.7664899934171352e-06, "loss": 0.0017, "step": 30354 }, { "epoch": 14.112505811250582, "grad_norm": 0.11507763713598251, "learning_rate": 3.8080515978728968e-06, "loss": 0.0027, "step": 30356 }, { "epoch": 14.113435611343562, "grad_norm": 0.29120975732803345, "learning_rate": 3.849822358303942e-06, "loss": 0.0037, "step": 30358 }, { "epoch": 14.114365411436541, "grad_norm": 0.7126489877700806, "learning_rate": 3.8918018624496414e-06, "loss": 0.0093, "step": 30360 }, { "epoch": 14.11529521152952, "grad_norm": 0.044552456587553024, "learning_rate": 3.933989695989139e-06, "loss": 0.0018, "step": 30362 }, { "epoch": 14.1162250116225, "grad_norm": 0.17802882194519043, "learning_rate": 3.976385442545736e-06, "loss": 0.0022, "step": 30364 }, { "epoch": 14.117154811715482, "grad_norm": 0.26345816254615784, "learning_rate": 4.0189886836904425e-06, "loss": 0.0029, "step": 30366 }, { "epoch": 14.118084611808461, "grad_norm": 0.05318876728415489, "learning_rate": 4.0617989989463685e-06, "loss": 0.001, "step": 30368 }, { "epoch": 14.119014411901441, "grad_norm": 0.20323750376701355, "learning_rate": 4.1048159657931755e-06, "loss": 0.0019, "step": 30370 }, { "epoch": 14.11994421199442, "grad_norm": 0.04819450154900551, "learning_rate": 4.148039159670674e-06, "loss": 0.0013, "step": 30372 }, { "epoch": 14.120874012087402, "grad_norm": 0.11756106466054916, "learning_rate": 4.1914681539833904e-06, "loss": 0.0019, "step": 30374 }, { "epoch": 14.121803812180381, "grad_norm": 0.1842953860759735, "learning_rate": 4.235102520104583e-06, "loss": 0.0017, "step": 30376 }, { "epoch": 14.122733612273361, "grad_norm": 0.13916540145874023, "learning_rate": 4.278941827380873e-06, "loss": 0.0016, "step": 30378 }, { "epoch": 14.12366341236634, "grad_norm": 0.23288321495056152, "learning_rate": 4.322985643135998e-06, "loss": 0.0076, "step": 30380 }, { "epoch": 14.124593212459322, "grad_norm": 1.362035870552063, "learning_rate": 4.367233532674974e-06, "loss": 0.0096, "step": 30382 }, { "epoch": 14.125523012552302, "grad_norm": 0.3044731020927429, "learning_rate": 4.411685059289302e-06, "loss": 0.0027, "step": 30384 }, { "epoch": 14.126452812645281, "grad_norm": 0.9758578538894653, "learning_rate": 4.456339784260255e-06, "loss": 0.0054, "step": 30386 }, { "epoch": 14.127382612738261, "grad_norm": 0.11418761312961578, "learning_rate": 4.501197266863724e-06, "loss": 0.002, "step": 30388 }, { "epoch": 14.12831241283124, "grad_norm": 0.2866089940071106, "learning_rate": 4.5462570643743675e-06, "loss": 0.0026, "step": 30390 }, { "epoch": 14.129242212924222, "grad_norm": 0.37590163946151733, "learning_rate": 4.591518732070376e-06, "loss": 0.003, "step": 30392 }, { "epoch": 14.130172013017201, "grad_norm": 0.06343013793230057, "learning_rate": 4.636981823237265e-06, "loss": 0.0015, "step": 30394 }, { "epoch": 14.131101813110181, "grad_norm": 0.046702783554792404, "learning_rate": 4.682645889172571e-06, "loss": 0.0015, "step": 30396 }, { "epoch": 14.13203161320316, "grad_norm": 0.07428744435310364, "learning_rate": 4.7285104791906046e-06, "loss": 0.0016, "step": 30398 }, { "epoch": 14.132961413296142, "grad_norm": 0.06547071784734726, "learning_rate": 4.774575140626278e-06, "loss": 0.0041, "step": 30400 }, { "epoch": 14.133891213389122, "grad_norm": 0.08326936513185501, "learning_rate": 4.820839418839875e-06, "loss": 0.0015, "step": 30402 }, { "epoch": 14.134821013482101, "grad_norm": 0.06176802143454552, "learning_rate": 4.867302857221855e-06, "loss": 0.0014, "step": 30404 }, { "epoch": 14.135750813575081, "grad_norm": 0.5944562554359436, "learning_rate": 4.913964997196737e-06, "loss": 0.004, "step": 30406 }, { "epoch": 14.13668061366806, "grad_norm": 0.43745535612106323, "learning_rate": 4.9608253782281445e-06, "loss": 0.0149, "step": 30408 }, { "epoch": 14.137610413761042, "grad_norm": 0.08340752124786377, "learning_rate": 5.007883537822711e-06, "loss": 0.0014, "step": 30410 }, { "epoch": 14.138540213854021, "grad_norm": 0.036936238408088684, "learning_rate": 5.055139011535724e-06, "loss": 0.0017, "step": 30412 }, { "epoch": 14.139470013947001, "grad_norm": 0.05951882526278496, "learning_rate": 5.102591332974624e-06, "loss": 0.0018, "step": 30414 }, { "epoch": 14.14039981403998, "grad_norm": 0.4653213918209076, "learning_rate": 5.150240033804055e-06, "loss": 0.0045, "step": 30416 }, { "epoch": 14.141329614132962, "grad_norm": 0.4143482744693756, "learning_rate": 5.198084643750787e-06, "loss": 0.0081, "step": 30418 }, { "epoch": 14.142259414225942, "grad_norm": 0.11933967471122742, "learning_rate": 5.246124690607726e-06, "loss": 0.002, "step": 30420 }, { "epoch": 14.143189214318921, "grad_norm": 0.4115983843803406, "learning_rate": 5.2943597002390086e-06, "loss": 0.0056, "step": 30422 }, { "epoch": 14.144119014411901, "grad_norm": 0.08996754139661789, "learning_rate": 5.34278919658445e-06, "loss": 0.0019, "step": 30424 }, { "epoch": 14.145048814504882, "grad_norm": 0.2482747584581375, "learning_rate": 5.3914127016646955e-06, "loss": 0.0059, "step": 30426 }, { "epoch": 14.145978614597862, "grad_norm": 0.09250599890947342, "learning_rate": 5.440229735585268e-06, "loss": 0.0036, "step": 30428 }, { "epoch": 14.146908414690841, "grad_norm": 0.7441516518592834, "learning_rate": 5.489239816541642e-06, "loss": 0.0038, "step": 30430 }, { "epoch": 14.147838214783821, "grad_norm": 0.05284484848380089, "learning_rate": 5.538442460824324e-06, "loss": 0.0018, "step": 30432 }, { "epoch": 14.1487680148768, "grad_norm": 0.09987252205610275, "learning_rate": 5.58783718282297e-06, "loss": 0.0019, "step": 30434 }, { "epoch": 14.149697814969782, "grad_norm": 0.057775307446718216, "learning_rate": 5.637423495031614e-06, "loss": 0.0011, "step": 30436 }, { "epoch": 14.150627615062762, "grad_norm": 0.8590948581695557, "learning_rate": 5.687200908053373e-06, "loss": 0.0045, "step": 30438 }, { "epoch": 14.151557415155741, "grad_norm": 0.06408783048391342, "learning_rate": 5.737168930605279e-06, "loss": 0.0022, "step": 30440 }, { "epoch": 14.15248721524872, "grad_norm": 0.08885090053081512, "learning_rate": 5.787327069523121e-06, "loss": 0.0016, "step": 30442 }, { "epoch": 14.153417015341702, "grad_norm": 0.9282169342041016, "learning_rate": 5.8376748297662e-06, "loss": 0.0056, "step": 30444 }, { "epoch": 14.154346815434682, "grad_norm": 0.4988206624984741, "learning_rate": 5.88821171442268e-06, "loss": 0.0051, "step": 30446 }, { "epoch": 14.155276615527661, "grad_norm": 1.2323634624481201, "learning_rate": 5.9389372247138e-06, "loss": 0.0044, "step": 30448 }, { "epoch": 14.156206415620641, "grad_norm": 0.14485082030296326, "learning_rate": 5.989850859999135e-06, "loss": 0.0018, "step": 30450 }, { "epoch": 14.15713621571362, "grad_norm": 0.9890506267547607, "learning_rate": 6.040952117781886e-06, "loss": 0.0115, "step": 30452 }, { "epoch": 14.158066015806602, "grad_norm": 0.03995579108595848, "learning_rate": 6.09224049371316e-06, "loss": 0.0015, "step": 30454 }, { "epoch": 14.158995815899582, "grad_norm": 0.3138347566127777, "learning_rate": 6.1437154815972724e-06, "loss": 0.0095, "step": 30456 }, { "epoch": 14.159925615992561, "grad_norm": 0.41128385066986084, "learning_rate": 6.19537657339711e-06, "loss": 0.0059, "step": 30458 }, { "epoch": 14.16085541608554, "grad_norm": 0.745980441570282, "learning_rate": 6.247223259238433e-06, "loss": 0.0038, "step": 30460 }, { "epoch": 14.161785216178522, "grad_norm": 0.0534944050014019, "learning_rate": 6.2992550274155014e-06, "loss": 0.0028, "step": 30462 }, { "epoch": 14.162715016271502, "grad_norm": 0.23032143712043762, "learning_rate": 6.3514713643954195e-06, "loss": 0.0026, "step": 30464 }, { "epoch": 14.163644816364481, "grad_norm": 0.1572064310312271, "learning_rate": 6.403871754824372e-06, "loss": 0.0023, "step": 30466 }, { "epoch": 14.164574616457461, "grad_norm": 0.06740123778581619, "learning_rate": 6.456455681531545e-06, "loss": 0.0017, "step": 30468 }, { "epoch": 14.165504416550442, "grad_norm": 0.08984769880771637, "learning_rate": 6.5092226255346875e-06, "loss": 0.0026, "step": 30470 }, { "epoch": 14.166434216643422, "grad_norm": 0.06375354528427124, "learning_rate": 6.562172066045608e-06, "loss": 0.0016, "step": 30472 }, { "epoch": 14.167364016736402, "grad_norm": 0.28653761744499207, "learning_rate": 6.6153034804745866e-06, "loss": 0.0036, "step": 30474 }, { "epoch": 14.168293816829381, "grad_norm": 0.08280173689126968, "learning_rate": 6.66861634443601e-06, "loss": 0.0014, "step": 30476 }, { "epoch": 14.16922361692236, "grad_norm": 0.43601298332214355, "learning_rate": 6.722110131753311e-06, "loss": 0.0131, "step": 30478 }, { "epoch": 14.170153417015342, "grad_norm": 0.045269742608070374, "learning_rate": 6.775784314464651e-06, "loss": 0.0012, "step": 30480 }, { "epoch": 14.171083217108322, "grad_norm": 0.08218792825937271, "learning_rate": 6.829638362827396e-06, "loss": 0.0019, "step": 30482 }, { "epoch": 14.172013017201301, "grad_norm": 0.322140097618103, "learning_rate": 6.883671745323707e-06, "loss": 0.0041, "step": 30484 }, { "epoch": 14.172942817294281, "grad_norm": 0.12010494619607925, "learning_rate": 6.9378839286661535e-06, "loss": 0.0018, "step": 30486 }, { "epoch": 14.173872617387262, "grad_norm": 0.17026375234127045, "learning_rate": 6.992274377802252e-06, "loss": 0.0016, "step": 30488 }, { "epoch": 14.174802417480242, "grad_norm": 0.8417558670043945, "learning_rate": 7.046842555920233e-06, "loss": 0.0063, "step": 30490 }, { "epoch": 14.175732217573222, "grad_norm": 0.0656307116150856, "learning_rate": 7.1015879244542e-06, "loss": 0.0019, "step": 30492 }, { "epoch": 14.176662017666201, "grad_norm": 1.3815898895263672, "learning_rate": 7.156509943089475e-06, "loss": 0.0075, "step": 30494 }, { "epoch": 14.177591817759183, "grad_norm": 0.5237112641334534, "learning_rate": 7.2116080697679015e-06, "loss": 0.0056, "step": 30496 }, { "epoch": 14.178521617852162, "grad_norm": 0.13688161969184875, "learning_rate": 7.266881760693091e-06, "loss": 0.0022, "step": 30498 }, { "epoch": 14.179451417945142, "grad_norm": 0.06999306380748749, "learning_rate": 7.322330470336277e-06, "loss": 0.0017, "step": 30500 }, { "epoch": 14.180381218038121, "grad_norm": 0.48265740275382996, "learning_rate": 7.3779536514409595e-06, "loss": 0.0171, "step": 30502 }, { "epoch": 14.181311018131101, "grad_norm": 0.45970574021339417, "learning_rate": 7.433750755028665e-06, "loss": 0.0041, "step": 30504 }, { "epoch": 14.182240818224082, "grad_norm": 0.2686847448348999, "learning_rate": 7.489721230404764e-06, "loss": 0.0038, "step": 30506 }, { "epoch": 14.183170618317062, "grad_norm": 0.15309564769268036, "learning_rate": 7.545864525163136e-06, "loss": 0.0015, "step": 30508 }, { "epoch": 14.184100418410042, "grad_norm": 0.13067421317100525, "learning_rate": 7.602180085192125e-06, "loss": 0.0019, "step": 30510 }, { "epoch": 14.185030218503021, "grad_norm": 0.09775790572166443, "learning_rate": 7.658667354679761e-06, "loss": 0.0023, "step": 30512 }, { "epoch": 14.185960018596003, "grad_norm": 0.35080569982528687, "learning_rate": 7.715325776119753e-06, "loss": 0.0037, "step": 30514 }, { "epoch": 14.186889818688982, "grad_norm": 0.467638224363327, "learning_rate": 7.77215479031623e-06, "loss": 0.0042, "step": 30516 }, { "epoch": 14.187819618781962, "grad_norm": 0.20191359519958496, "learning_rate": 7.829153836389762e-06, "loss": 0.0025, "step": 30518 }, { "epoch": 14.188749418874941, "grad_norm": 0.24704182147979736, "learning_rate": 7.886322351782781e-06, "loss": 0.0029, "step": 30520 }, { "epoch": 14.189679218967921, "grad_norm": 0.3846128284931183, "learning_rate": 7.943659772265115e-06, "loss": 0.0028, "step": 30522 }, { "epoch": 14.190609019060902, "grad_norm": 0.07359585911035538, "learning_rate": 8.001165531939442e-06, "loss": 0.0021, "step": 30524 }, { "epoch": 14.191538819153882, "grad_norm": 1.4060180187225342, "learning_rate": 8.058839063247394e-06, "loss": 0.0212, "step": 30526 }, { "epoch": 14.192468619246862, "grad_norm": 0.660673975944519, "learning_rate": 8.11667979697437e-06, "loss": 0.0055, "step": 30528 }, { "epoch": 14.193398419339841, "grad_norm": 0.12095753848552704, "learning_rate": 8.174687162255676e-06, "loss": 0.0024, "step": 30530 }, { "epoch": 14.194328219432823, "grad_norm": 0.08462349325418472, "learning_rate": 8.232860586581904e-06, "loss": 0.0021, "step": 30532 }, { "epoch": 14.195258019525802, "grad_norm": 0.04549247771501541, "learning_rate": 8.291199495805106e-06, "loss": 0.0019, "step": 30534 }, { "epoch": 14.196187819618782, "grad_norm": 0.1694968044757843, "learning_rate": 8.34970331414367e-06, "loss": 0.0019, "step": 30536 }, { "epoch": 14.197117619711761, "grad_norm": 0.9615742564201355, "learning_rate": 8.408371464188395e-06, "loss": 0.0089, "step": 30538 }, { "epoch": 14.198047419804743, "grad_norm": 0.05452558770775795, "learning_rate": 8.467203366908592e-06, "loss": 0.005, "step": 30540 }, { "epoch": 14.198977219897722, "grad_norm": 0.08506672084331512, "learning_rate": 8.526198441656993e-06, "loss": 0.0018, "step": 30542 }, { "epoch": 14.199907019990702, "grad_norm": 0.6110978126525879, "learning_rate": 8.585356106175899e-06, "loss": 0.004, "step": 30544 }, { "epoch": 14.200836820083682, "grad_norm": 0.03772873803973198, "learning_rate": 8.644675776603446e-06, "loss": 0.0025, "step": 30546 }, { "epoch": 14.201766620176661, "grad_norm": 0.028153346851468086, "learning_rate": 8.704156867478032e-06, "loss": 0.0012, "step": 30548 }, { "epoch": 14.202696420269643, "grad_norm": 0.03993380814790726, "learning_rate": 8.763798791745442e-06, "loss": 0.0015, "step": 30550 }, { "epoch": 14.203626220362622, "grad_norm": 0.24566878378391266, "learning_rate": 8.823600960763819e-06, "loss": 0.0016, "step": 30552 }, { "epoch": 14.204556020455602, "grad_norm": 0.13572536408901215, "learning_rate": 8.883562784310171e-06, "loss": 0.0023, "step": 30554 }, { "epoch": 14.205485820548581, "grad_norm": 0.04831775650382042, "learning_rate": 8.943683670585377e-06, "loss": 0.0013, "step": 30556 }, { "epoch": 14.206415620641563, "grad_norm": 0.17120663821697235, "learning_rate": 9.003963026220417e-06, "loss": 0.0021, "step": 30558 }, { "epoch": 14.207345420734542, "grad_norm": 0.32331278920173645, "learning_rate": 9.064400256282663e-06, "loss": 0.0025, "step": 30560 }, { "epoch": 14.208275220827522, "grad_norm": 0.07605667412281036, "learning_rate": 9.124994764280922e-06, "loss": 0.0015, "step": 30562 }, { "epoch": 14.209205020920502, "grad_norm": 0.04694665968418121, "learning_rate": 9.185745952171858e-06, "loss": 0.0014, "step": 30564 }, { "epoch": 14.210134821013481, "grad_norm": 0.24494805932044983, "learning_rate": 9.246653220365635e-06, "loss": 0.0035, "step": 30566 }, { "epoch": 14.211064621106463, "grad_norm": 0.2122618705034256, "learning_rate": 9.30771596773238e-06, "loss": 0.002, "step": 30568 }, { "epoch": 14.211994421199442, "grad_norm": 0.05432547628879547, "learning_rate": 9.368933591607294e-06, "loss": 0.0031, "step": 30570 }, { "epoch": 14.212924221292422, "grad_norm": 0.23594287037849426, "learning_rate": 9.430305487796998e-06, "loss": 0.0028, "step": 30572 }, { "epoch": 14.213854021385401, "grad_norm": 0.5168774127960205, "learning_rate": 9.49183105058608e-06, "loss": 0.0069, "step": 30574 }, { "epoch": 14.214783821478383, "grad_norm": 0.11365356296300888, "learning_rate": 9.553509672741653e-06, "loss": 0.0018, "step": 30576 }, { "epoch": 14.215713621571362, "grad_norm": 0.08670661598443985, "learning_rate": 9.615340745520614e-06, "loss": 0.0091, "step": 30578 }, { "epoch": 14.216643421664342, "grad_norm": 0.29955771565437317, "learning_rate": 9.677323658675521e-06, "loss": 0.0037, "step": 30580 }, { "epoch": 14.217573221757322, "grad_norm": 0.07175394892692566, "learning_rate": 9.739457800459904e-06, "loss": 0.0023, "step": 30582 }, { "epoch": 14.218503021850303, "grad_norm": 0.0296025313436985, "learning_rate": 9.801742557634862e-06, "loss": 0.0013, "step": 30584 }, { "epoch": 14.219432821943283, "grad_norm": 0.4670186936855316, "learning_rate": 9.864177315474855e-06, "loss": 0.0043, "step": 30586 }, { "epoch": 14.220362622036262, "grad_norm": 0.1837148517370224, "learning_rate": 9.926761457774304e-06, "loss": 0.0017, "step": 30588 }, { "epoch": 14.221292422129242, "grad_norm": 0.4819706380367279, "learning_rate": 9.989494366852851e-06, "loss": 0.0038, "step": 30590 }, { "epoch": 14.222222222222221, "grad_norm": 0.17404089868068695, "learning_rate": 1.005237542356187e-05, "loss": 0.0024, "step": 30592 }, { "epoch": 14.223152022315203, "grad_norm": 0.26125821471214294, "learning_rate": 1.0115404007291001e-05, "loss": 0.0043, "step": 30594 }, { "epoch": 14.224081822408182, "grad_norm": 0.20328371226787567, "learning_rate": 1.0178579495973424e-05, "loss": 0.0018, "step": 30596 }, { "epoch": 14.225011622501162, "grad_norm": 0.17166443169116974, "learning_rate": 1.0241901266092572e-05, "loss": 0.0028, "step": 30598 }, { "epoch": 14.225941422594142, "grad_norm": 0.3082215487957001, "learning_rate": 1.0305368692688137e-05, "loss": 0.0019, "step": 30600 }, { "epoch": 14.226871222687123, "grad_norm": 0.08532168716192245, "learning_rate": 1.0368981149362247e-05, "loss": 0.0018, "step": 30602 }, { "epoch": 14.227801022780103, "grad_norm": 0.03028847463428974, "learning_rate": 1.043273800828563e-05, "loss": 0.0023, "step": 30604 }, { "epoch": 14.228730822873082, "grad_norm": 0.20005953311920166, "learning_rate": 1.0496638640203684e-05, "loss": 0.003, "step": 30606 }, { "epoch": 14.229660622966062, "grad_norm": 0.04372187703847885, "learning_rate": 1.0560682414443261e-05, "loss": 0.0019, "step": 30608 }, { "epoch": 14.230590423059041, "grad_norm": 0.5450424551963806, "learning_rate": 1.0624868698918018e-05, "loss": 0.0038, "step": 30610 }, { "epoch": 14.231520223152023, "grad_norm": 0.034104686230421066, "learning_rate": 1.0689196860135095e-05, "loss": 0.0028, "step": 30612 }, { "epoch": 14.232450023245002, "grad_norm": 0.11856617778539658, "learning_rate": 1.0753666263201806e-05, "loss": 0.0015, "step": 30614 }, { "epoch": 14.233379823337982, "grad_norm": 0.7326923608779907, "learning_rate": 1.0818276271831022e-05, "loss": 0.0035, "step": 30616 }, { "epoch": 14.234309623430962, "grad_norm": 0.10084733366966248, "learning_rate": 1.088302624834804e-05, "loss": 0.0022, "step": 30618 }, { "epoch": 14.235239423523943, "grad_norm": 0.1743748039007187, "learning_rate": 1.0947915553696588e-05, "loss": 0.0017, "step": 30620 }, { "epoch": 14.236169223616923, "grad_norm": 0.08993154764175415, "learning_rate": 1.101294354744571e-05, "loss": 0.0023, "step": 30622 }, { "epoch": 14.237099023709902, "grad_norm": 0.064790278673172, "learning_rate": 1.1078109587795218e-05, "loss": 0.0015, "step": 30624 }, { "epoch": 14.238028823802882, "grad_norm": 0.38086849451065063, "learning_rate": 1.1143413031582453e-05, "loss": 0.0033, "step": 30626 }, { "epoch": 14.238958623895863, "grad_norm": 0.38386258482933044, "learning_rate": 1.1208853234289229e-05, "loss": 0.0088, "step": 30628 }, { "epoch": 14.239888423988843, "grad_norm": 0.07818017154932022, "learning_rate": 1.1274429550046724e-05, "loss": 0.0016, "step": 30630 }, { "epoch": 14.240818224081822, "grad_norm": 0.04307740926742554, "learning_rate": 1.1340141331643173e-05, "loss": 0.0039, "step": 30632 }, { "epoch": 14.241748024174802, "grad_norm": 0.12691400945186615, "learning_rate": 1.1405987930530121e-05, "loss": 0.0016, "step": 30634 }, { "epoch": 14.242677824267782, "grad_norm": 0.08479330688714981, "learning_rate": 1.1471968696828068e-05, "loss": 0.0017, "step": 30636 }, { "epoch": 14.243607624360763, "grad_norm": 0.12207812815904617, "learning_rate": 1.1538082979333493e-05, "loss": 0.0049, "step": 30638 }, { "epoch": 14.244537424453743, "grad_norm": 0.06097836419939995, "learning_rate": 1.1604330125524966e-05, "loss": 0.0032, "step": 30640 }, { "epoch": 14.245467224546722, "grad_norm": 0.09967212378978729, "learning_rate": 1.1670709481570197e-05, "loss": 0.0022, "step": 30642 }, { "epoch": 14.246397024639702, "grad_norm": 0.04205990955233574, "learning_rate": 1.1737220392331597e-05, "loss": 0.0013, "step": 30644 }, { "epoch": 14.247326824732683, "grad_norm": 0.3097858428955078, "learning_rate": 1.1803862201373169e-05, "loss": 0.0037, "step": 30646 }, { "epoch": 14.248256624825663, "grad_norm": 0.2506197988986969, "learning_rate": 1.1870634250967469e-05, "loss": 0.0025, "step": 30648 }, { "epoch": 14.249186424918642, "grad_norm": 0.2078268975019455, "learning_rate": 1.1937535882101175e-05, "loss": 0.0017, "step": 30650 }, { "epoch": 14.250116225011622, "grad_norm": 0.13620907068252563, "learning_rate": 1.2004566434482192e-05, "loss": 0.0017, "step": 30652 }, { "epoch": 14.251046025104603, "grad_norm": 0.726844072341919, "learning_rate": 1.207172524654588e-05, "loss": 0.0074, "step": 30654 }, { "epoch": 14.251975825197583, "grad_norm": 0.11833002418279648, "learning_rate": 1.2139011655462335e-05, "loss": 0.0045, "step": 30656 }, { "epoch": 14.252905625290563, "grad_norm": 0.15475572645664215, "learning_rate": 1.2206424997141408e-05, "loss": 0.0028, "step": 30658 }, { "epoch": 14.253835425383542, "grad_norm": 0.11045090854167938, "learning_rate": 1.227396460624063e-05, "loss": 0.0019, "step": 30660 }, { "epoch": 14.254765225476522, "grad_norm": 0.6594324707984924, "learning_rate": 1.2341629816171632e-05, "loss": 0.0069, "step": 30662 }, { "epoch": 14.255695025569503, "grad_norm": 0.4305926561355591, "learning_rate": 1.2409419959105957e-05, "loss": 0.0103, "step": 30664 }, { "epoch": 14.256624825662483, "grad_norm": 0.37405818700790405, "learning_rate": 1.2477334365982113e-05, "loss": 0.0046, "step": 30666 }, { "epoch": 14.257554625755462, "grad_norm": 0.9757946729660034, "learning_rate": 1.2545372366512624e-05, "loss": 0.0072, "step": 30668 }, { "epoch": 14.258484425848442, "grad_norm": 0.27647337317466736, "learning_rate": 1.2613533289189743e-05, "loss": 0.0026, "step": 30670 }, { "epoch": 14.259414225941423, "grad_norm": 0.36852797865867615, "learning_rate": 1.2681816461292678e-05, "loss": 0.0026, "step": 30672 }, { "epoch": 14.260344026034403, "grad_norm": 0.1706155240535736, "learning_rate": 1.275022120889393e-05, "loss": 0.0027, "step": 30674 }, { "epoch": 14.261273826127383, "grad_norm": 0.21680930256843567, "learning_rate": 1.2818746856866573e-05, "loss": 0.0025, "step": 30676 }, { "epoch": 14.262203626220362, "grad_norm": 0.26159974932670593, "learning_rate": 1.2887392728889965e-05, "loss": 0.0028, "step": 30678 }, { "epoch": 14.263133426313342, "grad_norm": 0.3113652169704437, "learning_rate": 1.295615814745691e-05, "loss": 0.0033, "step": 30680 }, { "epoch": 14.264063226406323, "grad_norm": 0.08888162672519684, "learning_rate": 1.3025042433880957e-05, "loss": 0.0015, "step": 30682 }, { "epoch": 14.264993026499303, "grad_norm": 0.26765671372413635, "learning_rate": 1.309404490830154e-05, "loss": 0.0044, "step": 30684 }, { "epoch": 14.265922826592282, "grad_norm": 0.301366925239563, "learning_rate": 1.316316488969225e-05, "loss": 0.003, "step": 30686 }, { "epoch": 14.266852626685262, "grad_norm": 0.3268454074859619, "learning_rate": 1.3232401695866621e-05, "loss": 0.0037, "step": 30688 }, { "epoch": 14.267782426778243, "grad_norm": 1.51451575756073, "learning_rate": 1.3301754643485632e-05, "loss": 0.0067, "step": 30690 }, { "epoch": 14.268712226871223, "grad_norm": 0.07981948554515839, "learning_rate": 1.337122304806354e-05, "loss": 0.0037, "step": 30692 }, { "epoch": 14.269642026964203, "grad_norm": 0.11164399236440659, "learning_rate": 1.3440806223975086e-05, "loss": 0.0021, "step": 30694 }, { "epoch": 14.270571827057182, "grad_norm": 0.7356608510017395, "learning_rate": 1.3510503484462717e-05, "loss": 0.0076, "step": 30696 }, { "epoch": 14.271501627150164, "grad_norm": 0.1343671977519989, "learning_rate": 1.3580314141642465e-05, "loss": 0.0024, "step": 30698 }, { "epoch": 14.272431427243143, "grad_norm": 0.06017959117889404, "learning_rate": 1.3650237506511159e-05, "loss": 0.0023, "step": 30700 }, { "epoch": 14.273361227336123, "grad_norm": 0.20539279282093048, "learning_rate": 1.3720272888953736e-05, "loss": 0.0016, "step": 30702 }, { "epoch": 14.274291027429102, "grad_norm": 0.11757523566484451, "learning_rate": 1.3790419597749092e-05, "loss": 0.0019, "step": 30704 }, { "epoch": 14.275220827522082, "grad_norm": 0.10079846531152725, "learning_rate": 1.3860676940577525e-05, "loss": 0.0014, "step": 30706 }, { "epoch": 14.276150627615063, "grad_norm": 0.07148825377225876, "learning_rate": 1.3931044224027272e-05, "loss": 0.0015, "step": 30708 }, { "epoch": 14.277080427708043, "grad_norm": 0.5943000316619873, "learning_rate": 1.4001520753602126e-05, "loss": 0.0064, "step": 30710 }, { "epoch": 14.278010227801023, "grad_norm": 0.41369375586509705, "learning_rate": 1.4072105833726718e-05, "loss": 0.0023, "step": 30712 }, { "epoch": 14.278940027894002, "grad_norm": 0.05238046869635582, "learning_rate": 1.4142798767754801e-05, "loss": 0.0041, "step": 30714 }, { "epoch": 14.279869827986984, "grad_norm": 0.31361883878707886, "learning_rate": 1.4213598857975974e-05, "loss": 0.0059, "step": 30716 }, { "epoch": 14.280799628079963, "grad_norm": 0.6291618347167969, "learning_rate": 1.428450540562178e-05, "loss": 0.0159, "step": 30718 }, { "epoch": 14.281729428172943, "grad_norm": 0.3921574056148529, "learning_rate": 1.4355517710873046e-05, "loss": 0.0029, "step": 30720 }, { "epoch": 14.282659228265922, "grad_norm": 0.0880734995007515, "learning_rate": 1.4426635072867316e-05, "loss": 0.0014, "step": 30722 }, { "epoch": 14.283589028358902, "grad_norm": 0.05896661430597305, "learning_rate": 1.4497856789704777e-05, "loss": 0.0017, "step": 30724 }, { "epoch": 14.284518828451883, "grad_norm": 0.2004019320011139, "learning_rate": 1.4569182158455834e-05, "loss": 0.0016, "step": 30726 }, { "epoch": 14.285448628544863, "grad_norm": 3.774836540222168, "learning_rate": 1.4640610475167737e-05, "loss": 0.0079, "step": 30728 }, { "epoch": 14.286378428637843, "grad_norm": 1.266628623008728, "learning_rate": 1.4712141034872156e-05, "loss": 0.0052, "step": 30730 }, { "epoch": 14.287308228730822, "grad_norm": 0.5789543986320496, "learning_rate": 1.478377313159119e-05, "loss": 0.0115, "step": 30732 }, { "epoch": 14.288238028823804, "grad_norm": 0.14720310270786285, "learning_rate": 1.4855506058344788e-05, "loss": 0.0025, "step": 30734 }, { "epoch": 14.289167828916783, "grad_norm": 0.2508601248264313, "learning_rate": 1.4927339107158257e-05, "loss": 0.0031, "step": 30736 }, { "epoch": 14.290097629009763, "grad_norm": 0.04337034374475479, "learning_rate": 1.4999271569068402e-05, "loss": 0.0077, "step": 30738 }, { "epoch": 14.291027429102742, "grad_norm": 0.11303305625915527, "learning_rate": 1.5071302734130543e-05, "loss": 0.0023, "step": 30740 }, { "epoch": 14.291957229195724, "grad_norm": 0.05495798587799072, "learning_rate": 1.5143431891426157e-05, "loss": 0.0017, "step": 30742 }, { "epoch": 14.292887029288703, "grad_norm": 0.13336677849292755, "learning_rate": 1.5215658329069907e-05, "loss": 0.0026, "step": 30744 }, { "epoch": 14.293816829381683, "grad_norm": 0.037280499935150146, "learning_rate": 1.5287981334215848e-05, "loss": 0.0012, "step": 30746 }, { "epoch": 14.294746629474663, "grad_norm": 0.8918995261192322, "learning_rate": 1.5360400193064958e-05, "loss": 0.0027, "step": 30748 }, { "epoch": 14.295676429567642, "grad_norm": 0.28255683183670044, "learning_rate": 1.5432914190872662e-05, "loss": 0.0022, "step": 30750 }, { "epoch": 14.296606229660624, "grad_norm": 0.5289701223373413, "learning_rate": 1.5505522611954917e-05, "loss": 0.0039, "step": 30752 }, { "epoch": 14.297536029753603, "grad_norm": 0.21851107478141785, "learning_rate": 1.5578224739695748e-05, "loss": 0.0063, "step": 30754 }, { "epoch": 14.298465829846583, "grad_norm": 0.8075554966926575, "learning_rate": 1.565101985655484e-05, "loss": 0.0076, "step": 30756 }, { "epoch": 14.299395629939562, "grad_norm": 0.22505329549312592, "learning_rate": 1.5723907244073607e-05, "loss": 0.0034, "step": 30758 }, { "epoch": 14.300325430032544, "grad_norm": 0.3530547618865967, "learning_rate": 1.579688618288297e-05, "loss": 0.0018, "step": 30760 }, { "epoch": 14.301255230125523, "grad_norm": 0.093837209045887, "learning_rate": 1.5869955952710098e-05, "loss": 0.0027, "step": 30762 }, { "epoch": 14.302185030218503, "grad_norm": 0.1406087875366211, "learning_rate": 1.5943115832386356e-05, "loss": 0.0017, "step": 30764 }, { "epoch": 14.303114830311483, "grad_norm": 0.10057218372821808, "learning_rate": 1.601636509985277e-05, "loss": 0.0021, "step": 30766 }, { "epoch": 14.304044630404462, "grad_norm": 0.14185068011283875, "learning_rate": 1.6089703032168642e-05, "loss": 0.0029, "step": 30768 }, { "epoch": 14.304974430497444, "grad_norm": 1.2602512836456299, "learning_rate": 1.6163128905518517e-05, "loss": 0.0075, "step": 30770 }, { "epoch": 14.305904230590423, "grad_norm": 0.07109366357326508, "learning_rate": 1.6236641995218507e-05, "loss": 0.0021, "step": 30772 }, { "epoch": 14.306834030683403, "grad_norm": 0.1987350732088089, "learning_rate": 1.6310241575724094e-05, "loss": 0.0022, "step": 30774 }, { "epoch": 14.307763830776382, "grad_norm": 0.7250790596008301, "learning_rate": 1.638392692063696e-05, "loss": 0.0057, "step": 30776 }, { "epoch": 14.308693630869364, "grad_norm": 0.8497149348258972, "learning_rate": 1.6457697302712843e-05, "loss": 0.0047, "step": 30778 }, { "epoch": 14.309623430962343, "grad_norm": 0.49257931113243103, "learning_rate": 1.653155199386768e-05, "loss": 0.0035, "step": 30780 }, { "epoch": 14.310553231055323, "grad_norm": 0.13979408144950867, "learning_rate": 1.660549026518532e-05, "loss": 0.0015, "step": 30782 }, { "epoch": 14.311483031148303, "grad_norm": 0.0718572586774826, "learning_rate": 1.6679511386925208e-05, "loss": 0.0019, "step": 30784 }, { "epoch": 14.312412831241284, "grad_norm": 0.1614639312028885, "learning_rate": 1.6753614628528588e-05, "loss": 0.002, "step": 30786 }, { "epoch": 14.313342631334264, "grad_norm": 0.21123673021793365, "learning_rate": 1.6827799258626215e-05, "loss": 0.0025, "step": 30788 }, { "epoch": 14.314272431427243, "grad_norm": 0.6140891909599304, "learning_rate": 1.6902064545046078e-05, "loss": 0.0108, "step": 30790 }, { "epoch": 14.315202231520223, "grad_norm": 0.6200202107429504, "learning_rate": 1.6976409754819777e-05, "loss": 0.0047, "step": 30792 }, { "epoch": 14.316132031613202, "grad_norm": 0.10965987294912338, "learning_rate": 1.7050834154189774e-05, "loss": 0.0143, "step": 30794 }, { "epoch": 14.317061831706184, "grad_norm": 0.1638159155845642, "learning_rate": 1.7125337008617294e-05, "loss": 0.0048, "step": 30796 }, { "epoch": 14.317991631799163, "grad_norm": 0.24444575607776642, "learning_rate": 1.7199917582789582e-05, "loss": 0.003, "step": 30798 }, { "epoch": 14.318921431892143, "grad_norm": 0.25413045287132263, "learning_rate": 1.7274575140626304e-05, "loss": 0.0029, "step": 30800 }, { "epoch": 14.319851231985123, "grad_norm": 0.20796708762645721, "learning_rate": 1.734930894528734e-05, "loss": 0.0019, "step": 30802 }, { "epoch": 14.320781032078104, "grad_norm": 0.6053308248519897, "learning_rate": 1.7424118259180546e-05, "loss": 0.0044, "step": 30804 }, { "epoch": 14.321710832171084, "grad_norm": 0.26368698477745056, "learning_rate": 1.7499002343968027e-05, "loss": 0.0024, "step": 30806 }, { "epoch": 14.322640632264063, "grad_norm": 1.243884801864624, "learning_rate": 1.7573960460574093e-05, "loss": 0.0144, "step": 30808 }, { "epoch": 14.323570432357043, "grad_norm": 0.7679377198219299, "learning_rate": 1.7648991869192232e-05, "loss": 0.0025, "step": 30810 }, { "epoch": 14.324500232450024, "grad_norm": 0.4209423065185547, "learning_rate": 1.7724095829293074e-05, "loss": 0.0213, "step": 30812 }, { "epoch": 14.325430032543004, "grad_norm": 0.8158274292945862, "learning_rate": 1.7799271599630678e-05, "loss": 0.0164, "step": 30814 }, { "epoch": 14.326359832635983, "grad_norm": 0.07488299906253815, "learning_rate": 1.7874518438250374e-05, "loss": 0.0017, "step": 30816 }, { "epoch": 14.327289632728963, "grad_norm": 0.401353120803833, "learning_rate": 1.794983560249658e-05, "loss": 0.0032, "step": 30818 }, { "epoch": 14.328219432821943, "grad_norm": 0.11236962676048279, "learning_rate": 1.8025222349019297e-05, "loss": 0.0124, "step": 30820 }, { "epoch": 14.329149232914924, "grad_norm": 0.3404226005077362, "learning_rate": 1.810067793378125e-05, "loss": 0.0021, "step": 30822 }, { "epoch": 14.330079033007904, "grad_norm": 0.043104756623506546, "learning_rate": 1.81762016120668e-05, "loss": 0.0012, "step": 30824 }, { "epoch": 14.331008833100883, "grad_norm": 0.16861329972743988, "learning_rate": 1.8251792638487562e-05, "loss": 0.0026, "step": 30826 }, { "epoch": 14.331938633193863, "grad_norm": 0.08263277262449265, "learning_rate": 1.8327450266990616e-05, "loss": 0.0015, "step": 30828 }, { "epoch": 14.332868433286844, "grad_norm": 0.24836258590221405, "learning_rate": 1.8403173750865546e-05, "loss": 0.0105, "step": 30830 }, { "epoch": 14.333798233379824, "grad_norm": 0.10668892413377762, "learning_rate": 1.8478962342752485e-05, "loss": 0.0019, "step": 30832 }, { "epoch": 14.334728033472803, "grad_norm": 1.1644251346588135, "learning_rate": 1.8554815294648456e-05, "loss": 0.0053, "step": 30834 }, { "epoch": 14.335657833565783, "grad_norm": 0.5543082356452942, "learning_rate": 1.8630731857915263e-05, "loss": 0.003, "step": 30836 }, { "epoch": 14.336587633658763, "grad_norm": 0.23722200095653534, "learning_rate": 1.8706711283287427e-05, "loss": 0.0024, "step": 30838 }, { "epoch": 14.337517433751744, "grad_norm": 0.21911929547786713, "learning_rate": 1.878275282087852e-05, "loss": 0.0029, "step": 30840 }, { "epoch": 14.338447233844724, "grad_norm": 0.08976700156927109, "learning_rate": 1.8858855720189096e-05, "loss": 0.0019, "step": 30842 }, { "epoch": 14.339377033937703, "grad_norm": 0.11589184403419495, "learning_rate": 1.8935019230114606e-05, "loss": 0.0017, "step": 30844 }, { "epoch": 14.340306834030683, "grad_norm": 0.21102391183376312, "learning_rate": 1.9011242598951783e-05, "loss": 0.0022, "step": 30846 }, { "epoch": 14.341236634123664, "grad_norm": 0.26840806007385254, "learning_rate": 1.9087525074406926e-05, "loss": 0.0049, "step": 30848 }, { "epoch": 14.342166434216644, "grad_norm": 0.08884941041469574, "learning_rate": 1.916386590360227e-05, "loss": 0.0015, "step": 30850 }, { "epoch": 14.343096234309623, "grad_norm": 0.057167135179042816, "learning_rate": 1.924026433308518e-05, "loss": 0.0023, "step": 30852 }, { "epoch": 14.344026034402603, "grad_norm": 0.8865957260131836, "learning_rate": 1.931671960883382e-05, "loss": 0.0086, "step": 30854 }, { "epoch": 14.344955834495583, "grad_norm": 0.13155527412891388, "learning_rate": 1.9393230976265318e-05, "loss": 0.0043, "step": 30856 }, { "epoch": 14.345885634588564, "grad_norm": 1.2011173963546753, "learning_rate": 1.9469797680243706e-05, "loss": 0.0107, "step": 30858 }, { "epoch": 14.346815434681544, "grad_norm": 0.165470689535141, "learning_rate": 1.9546418965086357e-05, "loss": 0.0019, "step": 30860 }, { "epoch": 14.347745234774523, "grad_norm": 0.05512210726737976, "learning_rate": 1.9623094074572126e-05, "loss": 0.0014, "step": 30862 }, { "epoch": 14.348675034867503, "grad_norm": 0.18590013682842255, "learning_rate": 1.9699822251948464e-05, "loss": 0.0035, "step": 30864 }, { "epoch": 14.349604834960484, "grad_norm": 0.13020336627960205, "learning_rate": 1.9776602739939572e-05, "loss": 0.0052, "step": 30866 }, { "epoch": 14.350534635053464, "grad_norm": 0.1955970823764801, "learning_rate": 1.9853434780752868e-05, "loss": 0.003, "step": 30868 }, { "epoch": 14.351464435146443, "grad_norm": 0.051668889820575714, "learning_rate": 1.993031761608695e-05, "loss": 0.0012, "step": 30870 }, { "epoch": 14.352394235239423, "grad_norm": 0.350679486989975, "learning_rate": 2.0007250487139623e-05, "loss": 0.0026, "step": 30872 }, { "epoch": 14.353324035332404, "grad_norm": 0.5856779217720032, "learning_rate": 2.008423263461451e-05, "loss": 0.0073, "step": 30874 }, { "epoch": 14.354253835425384, "grad_norm": 0.11194495111703873, "learning_rate": 2.016126329872837e-05, "loss": 0.0025, "step": 30876 }, { "epoch": 14.355183635518364, "grad_norm": 0.10299288481473923, "learning_rate": 2.023834171922017e-05, "loss": 0.0022, "step": 30878 }, { "epoch": 14.356113435611343, "grad_norm": 0.7220467329025269, "learning_rate": 2.0315467135356836e-05, "loss": 0.0035, "step": 30880 }, { "epoch": 14.357043235704323, "grad_norm": 0.04419475793838501, "learning_rate": 2.039263878594166e-05, "loss": 0.0014, "step": 30882 }, { "epoch": 14.357973035797304, "grad_norm": 0.7808495163917542, "learning_rate": 2.046985590932142e-05, "loss": 0.0079, "step": 30884 }, { "epoch": 14.358902835890284, "grad_norm": 0.2198784500360489, "learning_rate": 2.0547117743394632e-05, "loss": 0.0037, "step": 30886 }, { "epoch": 14.359832635983263, "grad_norm": 0.05147750675678253, "learning_rate": 2.0624423525618027e-05, "loss": 0.0023, "step": 30888 }, { "epoch": 14.360762436076243, "grad_norm": 0.07911454141139984, "learning_rate": 2.070177249301455e-05, "loss": 0.0042, "step": 30890 }, { "epoch": 14.361692236169224, "grad_norm": 1.290221929550171, "learning_rate": 2.077916388218148e-05, "loss": 0.0073, "step": 30892 }, { "epoch": 14.362622036262204, "grad_norm": 0.21806582808494568, "learning_rate": 2.085659692929687e-05, "loss": 0.0024, "step": 30894 }, { "epoch": 14.363551836355184, "grad_norm": 0.7546462416648865, "learning_rate": 2.093407087012781e-05, "loss": 0.0104, "step": 30896 }, { "epoch": 14.364481636448163, "grad_norm": 0.09052547812461853, "learning_rate": 2.101158494003761e-05, "loss": 0.0017, "step": 30898 }, { "epoch": 14.365411436541144, "grad_norm": 0.17054663598537445, "learning_rate": 2.108913837399403e-05, "loss": 0.0025, "step": 30900 }, { "epoch": 14.366341236634124, "grad_norm": 0.97991544008255, "learning_rate": 2.1166730406576e-05, "loss": 0.0222, "step": 30902 }, { "epoch": 14.367271036727104, "grad_norm": 0.32952821254730225, "learning_rate": 2.1244360271980955e-05, "loss": 0.0043, "step": 30904 }, { "epoch": 14.368200836820083, "grad_norm": 0.8504729866981506, "learning_rate": 2.1322027204033985e-05, "loss": 0.0041, "step": 30906 }, { "epoch": 14.369130636913063, "grad_norm": 0.3330215513706207, "learning_rate": 2.1399730436193656e-05, "loss": 0.0024, "step": 30908 }, { "epoch": 14.370060437006044, "grad_norm": 0.10341394692659378, "learning_rate": 2.147746920156026e-05, "loss": 0.0047, "step": 30910 }, { "epoch": 14.370990237099024, "grad_norm": 0.8789494037628174, "learning_rate": 2.1555242732883912e-05, "loss": 0.0061, "step": 30912 }, { "epoch": 14.371920037192004, "grad_norm": 0.817630410194397, "learning_rate": 2.163305026257109e-05, "loss": 0.0066, "step": 30914 }, { "epoch": 14.372849837284983, "grad_norm": 0.2671823501586914, "learning_rate": 2.1710891022692885e-05, "loss": 0.0047, "step": 30916 }, { "epoch": 14.373779637377964, "grad_norm": 0.22916440665721893, "learning_rate": 2.1788764244992237e-05, "loss": 0.0025, "step": 30918 }, { "epoch": 14.374709437470944, "grad_norm": 0.1487007886171341, "learning_rate": 2.1866669160892245e-05, "loss": 0.0034, "step": 30920 }, { "epoch": 14.375639237563924, "grad_norm": 0.1586133986711502, "learning_rate": 2.1944605001502654e-05, "loss": 0.0042, "step": 30922 }, { "epoch": 14.376569037656903, "grad_norm": 0.4348863959312439, "learning_rate": 2.202257099762801e-05, "loss": 0.0027, "step": 30924 }, { "epoch": 14.377498837749883, "grad_norm": 0.07941368967294693, "learning_rate": 2.2100566379775757e-05, "loss": 0.0068, "step": 30926 }, { "epoch": 14.378428637842864, "grad_norm": 1.1989705562591553, "learning_rate": 2.2178590378162793e-05, "loss": 0.0127, "step": 30928 }, { "epoch": 14.379358437935844, "grad_norm": 0.41640806198120117, "learning_rate": 2.2256642222723744e-05, "loss": 0.0114, "step": 30930 }, { "epoch": 14.380288238028823, "grad_norm": 0.34484952688217163, "learning_rate": 2.233472114311842e-05, "loss": 0.0023, "step": 30932 }, { "epoch": 14.381218038121803, "grad_norm": 0.783638596534729, "learning_rate": 2.2412826368739393e-05, "loss": 0.0036, "step": 30934 }, { "epoch": 14.382147838214784, "grad_norm": 0.16690580546855927, "learning_rate": 2.2490957128719627e-05, "loss": 0.002, "step": 30936 }, { "epoch": 14.383077638307764, "grad_norm": 0.5465366244316101, "learning_rate": 2.256911265193988e-05, "loss": 0.0147, "step": 30938 }, { "epoch": 14.384007438400744, "grad_norm": 0.07997289299964905, "learning_rate": 2.264729216703705e-05, "loss": 0.0026, "step": 30940 }, { "epoch": 14.384937238493723, "grad_norm": 0.3028228282928467, "learning_rate": 2.27254949024107e-05, "loss": 0.0025, "step": 30942 }, { "epoch": 14.385867038586705, "grad_norm": 0.7103320956230164, "learning_rate": 2.2803720086231228e-05, "loss": 0.024, "step": 30944 }, { "epoch": 14.386796838679684, "grad_norm": 0.7988787889480591, "learning_rate": 2.288196694644801e-05, "loss": 0.008, "step": 30946 }, { "epoch": 14.387726638772664, "grad_norm": 0.23974460363388062, "learning_rate": 2.2960234710795945e-05, "loss": 0.005, "step": 30948 }, { "epoch": 14.388656438865643, "grad_norm": 0.4015665054321289, "learning_rate": 2.30385226068038e-05, "loss": 0.0039, "step": 30950 }, { "epoch": 14.389586238958623, "grad_norm": 0.5237339735031128, "learning_rate": 2.3116829861801466e-05, "loss": 0.0045, "step": 30952 }, { "epoch": 14.390516039051604, "grad_norm": 0.1931535005569458, "learning_rate": 2.3195155702928307e-05, "loss": 0.0022, "step": 30954 }, { "epoch": 14.391445839144584, "grad_norm": 3.331223964691162, "learning_rate": 2.3273499357139896e-05, "loss": 0.0121, "step": 30956 }, { "epoch": 14.392375639237564, "grad_norm": 0.14632093906402588, "learning_rate": 2.335186005121546e-05, "loss": 0.0047, "step": 30958 }, { "epoch": 14.393305439330543, "grad_norm": 0.4291849136352539, "learning_rate": 2.343023701176711e-05, "loss": 0.0052, "step": 30960 }, { "epoch": 14.394235239423525, "grad_norm": 0.3844180405139923, "learning_rate": 2.3508629465245714e-05, "loss": 0.0025, "step": 30962 }, { "epoch": 14.395165039516504, "grad_norm": 0.3235642611980438, "learning_rate": 2.3587036637949225e-05, "loss": 0.0052, "step": 30964 }, { "epoch": 14.396094839609484, "grad_norm": 1.4400526285171509, "learning_rate": 2.366545775603087e-05, "loss": 0.0072, "step": 30966 }, { "epoch": 14.397024639702463, "grad_norm": 0.22582608461380005, "learning_rate": 2.3743892045505683e-05, "loss": 0.0029, "step": 30968 }, { "epoch": 14.397954439795445, "grad_norm": 0.5156593322753906, "learning_rate": 2.3822338732258893e-05, "loss": 0.0037, "step": 30970 }, { "epoch": 14.398884239888424, "grad_norm": 0.08694934844970703, "learning_rate": 2.39007970420532e-05, "loss": 0.002, "step": 30972 }, { "epoch": 14.399814039981404, "grad_norm": 1.331255555152893, "learning_rate": 2.3979266200537113e-05, "loss": 0.0249, "step": 30974 }, { "epoch": 14.400743840074384, "grad_norm": 0.1933237910270691, "learning_rate": 2.4057745433251543e-05, "loss": 0.0022, "step": 30976 }, { "epoch": 14.401673640167363, "grad_norm": 1.116148829460144, "learning_rate": 2.4136233965637958e-05, "loss": 0.0104, "step": 30978 }, { "epoch": 14.402603440260345, "grad_norm": 0.07527576386928558, "learning_rate": 2.4214731023046593e-05, "loss": 0.0015, "step": 30980 }, { "epoch": 14.403533240353324, "grad_norm": 0.095021091401577, "learning_rate": 2.429323583074301e-05, "loss": 0.007, "step": 30982 }, { "epoch": 14.404463040446304, "grad_norm": 0.07623384147882462, "learning_rate": 2.437174761391662e-05, "loss": 0.0021, "step": 30984 }, { "epoch": 14.405392840539283, "grad_norm": 0.04278888180851936, "learning_rate": 2.4450265597687288e-05, "loss": 0.0013, "step": 30986 }, { "epoch": 14.406322640632265, "grad_norm": 0.12354692071676254, "learning_rate": 2.4528789007114746e-05, "loss": 0.007, "step": 30988 }, { "epoch": 14.407252440725244, "grad_norm": 0.13234105706214905, "learning_rate": 2.4607317067204475e-05, "loss": 0.0022, "step": 30990 }, { "epoch": 14.408182240818224, "grad_norm": 0.5662795305252075, "learning_rate": 2.4685849002916037e-05, "loss": 0.0054, "step": 30992 }, { "epoch": 14.409112040911204, "grad_norm": 0.3725866377353668, "learning_rate": 2.476438403917129e-05, "loss": 0.0062, "step": 30994 }, { "epoch": 14.410041841004183, "grad_norm": 0.13045845925807953, "learning_rate": 2.484292140086095e-05, "loss": 0.0033, "step": 30996 }, { "epoch": 14.410971641097165, "grad_norm": 0.2177010029554367, "learning_rate": 2.4921460312852797e-05, "loss": 0.0099, "step": 30998 }, { "epoch": 14.411901441190144, "grad_norm": 0.2401544600725174, "learning_rate": 2.499999999999983e-05, "loss": 0.0095, "step": 31000 }, { "epoch": 14.411901441190144, "eval_cer": 0.10454361107840444, "eval_loss": 0.18928837776184082, "eval_runtime": 403.2221, "eval_samples_per_second": 31.481, "eval_steps_per_second": 0.985, "step": 31000 } ], "logging_steps": 2, "max_steps": 32265, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2878233327346791e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }