{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2458760494452179, "eval_steps": 365, "global_step": 1457, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016875500991435683, "grad_norm": 1.940874608408194e-05, "learning_rate": 2e-05, "loss": 46.0, "step": 1 }, { "epoch": 0.00016875500991435683, "eval_loss": 11.5, "eval_runtime": 14.7961, "eval_samples_per_second": 168.625, "eval_steps_per_second": 84.346, "step": 1 }, { "epoch": 0.00033751001982871366, "grad_norm": 1.7339452824671753e-05, "learning_rate": 4e-05, "loss": 46.0, "step": 2 }, { "epoch": 0.0005062650297430705, "grad_norm": 9.871354450297076e-06, "learning_rate": 6e-05, "loss": 46.0, "step": 3 }, { "epoch": 0.0006750200396574273, "grad_norm": 1.9611639800132252e-05, "learning_rate": 8e-05, "loss": 46.0, "step": 4 }, { "epoch": 0.0008437750495717841, "grad_norm": 1.9497307221172377e-05, "learning_rate": 0.0001, "loss": 46.0, "step": 5 }, { "epoch": 0.001012530059486141, "grad_norm": 1.4163069863570854e-05, "learning_rate": 0.00012, "loss": 46.0, "step": 6 }, { "epoch": 0.0011812850694004977, "grad_norm": 2.7470567147247493e-05, "learning_rate": 0.00014, "loss": 46.0, "step": 7 }, { "epoch": 0.0013500400793148546, "grad_norm": 1.262454861716833e-05, "learning_rate": 0.00016, "loss": 46.0, "step": 8 }, { "epoch": 0.0015187950892292116, "grad_norm": 1.2461353435355704e-05, "learning_rate": 0.00018, "loss": 46.0, "step": 9 }, { "epoch": 0.0016875500991435683, "grad_norm": 1.924686148413457e-05, "learning_rate": 0.0002, "loss": 46.0, "step": 10 }, { "epoch": 0.0018563051090579252, "grad_norm": 1.5145715224207379e-05, "learning_rate": 0.0001999997643146886, "loss": 46.0, "step": 11 }, { "epoch": 0.002025060118972282, "grad_norm": 8.845885531627573e-06, "learning_rate": 0.0001999990572598653, "loss": 46.0, "step": 12 }, { "epoch": 0.0021938151288866388, "grad_norm": 1.8190678019891493e-05, "learning_rate": 0.00019999787883886297, "loss": 46.0, "step": 13 }, { "epoch": 0.0023625701388009955, "grad_norm": 1.827279083954636e-05, "learning_rate": 0.00019999622905723634, "loss": 46.0, "step": 14 }, { "epoch": 0.0025313251487153526, "grad_norm": 1.6565853002248332e-05, "learning_rate": 0.00019999410792276198, "loss": 46.0, "step": 15 }, { "epoch": 0.0027000801586297093, "grad_norm": 1.0454502444190439e-05, "learning_rate": 0.00019999151544543832, "loss": 46.0, "step": 16 }, { "epoch": 0.002868835168544066, "grad_norm": 1.3837036021868698e-05, "learning_rate": 0.00019998845163748553, "loss": 46.0, "step": 17 }, { "epoch": 0.003037590178458423, "grad_norm": 1.835626426327508e-05, "learning_rate": 0.0001999849165133455, "loss": 46.0, "step": 18 }, { "epoch": 0.00320634518837278, "grad_norm": 1.9169588995282538e-05, "learning_rate": 0.00019998091008968175, "loss": 46.0, "step": 19 }, { "epoch": 0.0033751001982871365, "grad_norm": 9.931905879057012e-06, "learning_rate": 0.0001999764323853794, "loss": 46.0, "step": 20 }, { "epoch": 0.0035438552082014936, "grad_norm": 1.246378269570414e-05, "learning_rate": 0.00019997148342154502, "loss": 46.0, "step": 21 }, { "epoch": 0.0037126102181158503, "grad_norm": 1.627793062652927e-05, "learning_rate": 0.0001999660632215066, "loss": 46.0, "step": 22 }, { "epoch": 0.003881365228030207, "grad_norm": 1.6465271983179264e-05, "learning_rate": 0.00019996017181081336, "loss": 46.0, "step": 23 }, { "epoch": 0.004050120237944564, "grad_norm": 1.4562248907168396e-05, "learning_rate": 0.00019995380921723562, "loss": 46.0, "step": 24 }, { "epoch": 0.00421887524785892, "grad_norm": 1.0674185432435479e-05, "learning_rate": 0.00019994697547076487, "loss": 46.0, "step": 25 }, { "epoch": 0.0043876302577732776, "grad_norm": 1.2923982467327733e-05, "learning_rate": 0.00019993967060361335, "loss": 46.0, "step": 26 }, { "epoch": 0.004556385267687635, "grad_norm": 1.9432607587077655e-05, "learning_rate": 0.00019993189465021405, "loss": 46.0, "step": 27 }, { "epoch": 0.004725140277601991, "grad_norm": 1.4471233043877874e-05, "learning_rate": 0.0001999236476472205, "loss": 46.0, "step": 28 }, { "epoch": 0.004893895287516348, "grad_norm": 2.0513718482106924e-05, "learning_rate": 0.0001999149296335067, "loss": 46.0, "step": 29 }, { "epoch": 0.005062650297430705, "grad_norm": 2.3202137526823208e-05, "learning_rate": 0.00019990574065016677, "loss": 46.0, "step": 30 }, { "epoch": 0.0052314053073450615, "grad_norm": 2.1836229279870167e-05, "learning_rate": 0.00019989608074051489, "loss": 46.0, "step": 31 }, { "epoch": 0.005400160317259419, "grad_norm": 1.5274166798917577e-05, "learning_rate": 0.00019988594995008505, "loss": 46.0, "step": 32 }, { "epoch": 0.005568915327173776, "grad_norm": 1.919052010634914e-05, "learning_rate": 0.00019987534832663082, "loss": 46.0, "step": 33 }, { "epoch": 0.005737670337088132, "grad_norm": 1.5861112842685543e-05, "learning_rate": 0.0001998642759201251, "loss": 46.0, "step": 34 }, { "epoch": 0.005906425347002489, "grad_norm": 1.185925975732971e-05, "learning_rate": 0.00019985273278276, "loss": 46.0, "step": 35 }, { "epoch": 0.006075180356916846, "grad_norm": 1.6194346244446933e-05, "learning_rate": 0.00019984071896894646, "loss": 46.0, "step": 36 }, { "epoch": 0.0062439353668312025, "grad_norm": 1.961121779459063e-05, "learning_rate": 0.0001998282345353141, "loss": 46.0, "step": 37 }, { "epoch": 0.00641269037674556, "grad_norm": 2.1559202650678344e-05, "learning_rate": 0.0001998152795407108, "loss": 46.0, "step": 38 }, { "epoch": 0.006581445386659917, "grad_norm": 2.8157497581560165e-05, "learning_rate": 0.00019980185404620268, "loss": 46.0, "step": 39 }, { "epoch": 0.006750200396574273, "grad_norm": 2.1486024706973694e-05, "learning_rate": 0.00019978795811507354, "loss": 46.0, "step": 40 }, { "epoch": 0.00691895540648863, "grad_norm": 2.3358212274615653e-05, "learning_rate": 0.00019977359181282473, "loss": 46.0, "step": 41 }, { "epoch": 0.007087710416402987, "grad_norm": 1.6756239347159863e-05, "learning_rate": 0.00019975875520717479, "loss": 46.0, "step": 42 }, { "epoch": 0.0072564654263173435, "grad_norm": 1.7175016182591207e-05, "learning_rate": 0.00019974344836805905, "loss": 46.0, "step": 43 }, { "epoch": 0.007425220436231701, "grad_norm": 1.2200899618619587e-05, "learning_rate": 0.00019972767136762953, "loss": 46.0, "step": 44 }, { "epoch": 0.007593975446146058, "grad_norm": 1.7348913388559595e-05, "learning_rate": 0.00019971142428025433, "loss": 46.0, "step": 45 }, { "epoch": 0.007762730456060414, "grad_norm": 9.565149412082974e-06, "learning_rate": 0.00019969470718251748, "loss": 46.0, "step": 46 }, { "epoch": 0.007931485465974771, "grad_norm": 1.5444033124367706e-05, "learning_rate": 0.00019967752015321845, "loss": 46.0, "step": 47 }, { "epoch": 0.008100240475889128, "grad_norm": 1.9607326976256445e-05, "learning_rate": 0.00019965986327337185, "loss": 46.0, "step": 48 }, { "epoch": 0.008268995485803485, "grad_norm": 1.6280893760267645e-05, "learning_rate": 0.00019964173662620702, "loss": 46.0, "step": 49 }, { "epoch": 0.00843775049571784, "grad_norm": 2.2253505449043587e-05, "learning_rate": 0.00019962314029716766, "loss": 46.0, "step": 50 }, { "epoch": 0.008606505505632198, "grad_norm": 1.3872278032067697e-05, "learning_rate": 0.0001996040743739114, "loss": 46.0, "step": 51 }, { "epoch": 0.008775260515546555, "grad_norm": 3.644825119408779e-05, "learning_rate": 0.0001995845389463094, "loss": 46.0, "step": 52 }, { "epoch": 0.008944015525460912, "grad_norm": 1.858348332461901e-05, "learning_rate": 0.00019956453410644592, "loss": 46.0, "step": 53 }, { "epoch": 0.00911277053537527, "grad_norm": 1.7900563761941157e-05, "learning_rate": 0.0001995440599486179, "loss": 46.0, "step": 54 }, { "epoch": 0.009281525545289626, "grad_norm": 4.377702498459257e-05, "learning_rate": 0.0001995231165693345, "loss": 46.0, "step": 55 }, { "epoch": 0.009450280555203982, "grad_norm": 2.3857590349507518e-05, "learning_rate": 0.00019950170406731667, "loss": 46.0, "step": 56 }, { "epoch": 0.009619035565118339, "grad_norm": 1.2674429854087066e-05, "learning_rate": 0.00019947982254349666, "loss": 46.0, "step": 57 }, { "epoch": 0.009787790575032696, "grad_norm": 2.890539326472208e-05, "learning_rate": 0.00019945747210101754, "loss": 46.0, "step": 58 }, { "epoch": 0.009956545584947053, "grad_norm": 1.9787186829489656e-05, "learning_rate": 0.0001994346528452327, "loss": 46.0, "step": 59 }, { "epoch": 0.01012530059486141, "grad_norm": 2.427671461191494e-05, "learning_rate": 0.00019941136488370542, "loss": 46.0, "step": 60 }, { "epoch": 0.010294055604775768, "grad_norm": 1.2431184586603194e-05, "learning_rate": 0.00019938760832620834, "loss": 46.0, "step": 61 }, { "epoch": 0.010462810614690123, "grad_norm": 2.0812987713725306e-05, "learning_rate": 0.00019936338328472287, "loss": 46.0, "step": 62 }, { "epoch": 0.01063156562460448, "grad_norm": 2.8024591301800683e-05, "learning_rate": 0.00019933868987343875, "loss": 46.0, "step": 63 }, { "epoch": 0.010800320634518837, "grad_norm": 2.2569249267689884e-05, "learning_rate": 0.0001993135282087535, "loss": 46.0, "step": 64 }, { "epoch": 0.010969075644433194, "grad_norm": 1.9383338440093212e-05, "learning_rate": 0.0001992878984092717, "loss": 46.0, "step": 65 }, { "epoch": 0.011137830654347551, "grad_norm": 2.542112815717701e-05, "learning_rate": 0.00019926180059580482, "loss": 46.0, "step": 66 }, { "epoch": 0.011306585664261909, "grad_norm": 1.598012568138074e-05, "learning_rate": 0.00019923523489137024, "loss": 46.0, "step": 67 }, { "epoch": 0.011475340674176264, "grad_norm": 1.4789104170631617e-05, "learning_rate": 0.00019920820142119085, "loss": 46.0, "step": 68 }, { "epoch": 0.011644095684090621, "grad_norm": 1.9799528672592714e-05, "learning_rate": 0.00019918070031269453, "loss": 46.0, "step": 69 }, { "epoch": 0.011812850694004978, "grad_norm": 3.249241126468405e-05, "learning_rate": 0.00019915273169551342, "loss": 46.0, "step": 70 }, { "epoch": 0.011981605703919335, "grad_norm": 2.287813367729541e-05, "learning_rate": 0.00019912429570148339, "loss": 46.0, "step": 71 }, { "epoch": 0.012150360713833692, "grad_norm": 3.186216781614348e-05, "learning_rate": 0.0001990953924646433, "loss": 46.0, "step": 72 }, { "epoch": 0.01231911572374805, "grad_norm": 4.087354682269506e-05, "learning_rate": 0.00019906602212123455, "loss": 46.0, "step": 73 }, { "epoch": 0.012487870733662405, "grad_norm": 2.5896191800711676e-05, "learning_rate": 0.00019903618480970035, "loss": 46.0, "step": 74 }, { "epoch": 0.012656625743576762, "grad_norm": 2.6261466700816527e-05, "learning_rate": 0.00019900588067068493, "loss": 46.0, "step": 75 }, { "epoch": 0.01282538075349112, "grad_norm": 1.815898940549232e-05, "learning_rate": 0.0001989751098470332, "loss": 46.0, "step": 76 }, { "epoch": 0.012994135763405476, "grad_norm": 1.682456240814645e-05, "learning_rate": 0.0001989438724837897, "loss": 46.0, "step": 77 }, { "epoch": 0.013162890773319834, "grad_norm": 2.581811168056447e-05, "learning_rate": 0.00019891216872819825, "loss": 46.0, "step": 78 }, { "epoch": 0.013331645783234189, "grad_norm": 3.8674785173498094e-05, "learning_rate": 0.00019887999872970097, "loss": 46.0, "step": 79 }, { "epoch": 0.013500400793148546, "grad_norm": 3.877016933984123e-05, "learning_rate": 0.00019884736263993784, "loss": 46.0, "step": 80 }, { "epoch": 0.013669155803062903, "grad_norm": 1.5791521946084686e-05, "learning_rate": 0.0001988142606127458, "loss": 46.0, "step": 81 }, { "epoch": 0.01383791081297726, "grad_norm": 2.4946857593022287e-05, "learning_rate": 0.00019878069280415803, "loss": 46.0, "step": 82 }, { "epoch": 0.014006665822891617, "grad_norm": 4.878333129454404e-05, "learning_rate": 0.00019874665937240335, "loss": 46.0, "step": 83 }, { "epoch": 0.014175420832805975, "grad_norm": 2.5513558284728788e-05, "learning_rate": 0.00019871216047790538, "loss": 46.0, "step": 84 }, { "epoch": 0.01434417584272033, "grad_norm": 5.085681550554e-05, "learning_rate": 0.00019867719628328175, "loss": 46.0, "step": 85 }, { "epoch": 0.014512930852634687, "grad_norm": 4.014965088572353e-05, "learning_rate": 0.0001986417669533434, "loss": 46.0, "step": 86 }, { "epoch": 0.014681685862549044, "grad_norm": 2.0565448721754365e-05, "learning_rate": 0.0001986058726550938, "loss": 46.0, "step": 87 }, { "epoch": 0.014850440872463401, "grad_norm": 4.422978236107156e-05, "learning_rate": 0.00019856951355772814, "loss": 46.0, "step": 88 }, { "epoch": 0.015019195882377758, "grad_norm": 2.699590550037101e-05, "learning_rate": 0.00019853268983263244, "loss": 46.0, "step": 89 }, { "epoch": 0.015187950892292116, "grad_norm": 1.274027908948483e-05, "learning_rate": 0.000198495401653383, "loss": 46.0, "step": 90 }, { "epoch": 0.015356705902206471, "grad_norm": 4.096307384315878e-05, "learning_rate": 0.00019845764919574537, "loss": 46.0, "step": 91 }, { "epoch": 0.015525460912120828, "grad_norm": 3.1611998565495014e-05, "learning_rate": 0.00019841943263767346, "loss": 46.0, "step": 92 }, { "epoch": 0.015694215922035185, "grad_norm": 2.0112831407459453e-05, "learning_rate": 0.00019838075215930894, "loss": 46.0, "step": 93 }, { "epoch": 0.015862970931949542, "grad_norm": 6.471107190009207e-05, "learning_rate": 0.00019834160794298024, "loss": 46.0, "step": 94 }, { "epoch": 0.0160317259418639, "grad_norm": 3.331500920467079e-05, "learning_rate": 0.00019830200017320168, "loss": 46.0, "step": 95 }, { "epoch": 0.016200480951778257, "grad_norm": 3.4283220884390175e-05, "learning_rate": 0.0001982619290366726, "loss": 46.0, "step": 96 }, { "epoch": 0.016369235961692614, "grad_norm": 4.784906195709482e-05, "learning_rate": 0.00019822139472227665, "loss": 46.0, "step": 97 }, { "epoch": 0.01653799097160697, "grad_norm": 4.9155318265547976e-05, "learning_rate": 0.00019818039742108064, "loss": 46.0, "step": 98 }, { "epoch": 0.016706745981521328, "grad_norm": 4.837827509618364e-05, "learning_rate": 0.00019813893732633378, "loss": 46.0, "step": 99 }, { "epoch": 0.01687550099143568, "grad_norm": 4.3958363676210865e-05, "learning_rate": 0.00019809701463346683, "loss": 46.0, "step": 100 }, { "epoch": 0.01704425600135004, "grad_norm": 5.13470804435201e-05, "learning_rate": 0.000198054629540091, "loss": 46.0, "step": 101 }, { "epoch": 0.017213011011264396, "grad_norm": 3.207432382623665e-05, "learning_rate": 0.00019801178224599722, "loss": 46.0, "step": 102 }, { "epoch": 0.017381766021178753, "grad_norm": 3.24075881508179e-05, "learning_rate": 0.00019796847295315502, "loss": 46.0, "step": 103 }, { "epoch": 0.01755052103109311, "grad_norm": 5.3687395848101005e-05, "learning_rate": 0.00019792470186571167, "loss": 46.0, "step": 104 }, { "epoch": 0.017719276041007467, "grad_norm": 2.9774340873700567e-05, "learning_rate": 0.00019788046918999122, "loss": 46.0, "step": 105 }, { "epoch": 0.017888031050921824, "grad_norm": 7.565080159110948e-05, "learning_rate": 0.00019783577513449353, "loss": 46.0, "step": 106 }, { "epoch": 0.01805678606083618, "grad_norm": 5.2741965191671625e-05, "learning_rate": 0.0001977906199098932, "loss": 46.0, "step": 107 }, { "epoch": 0.01822554107075054, "grad_norm": 3.216122786398046e-05, "learning_rate": 0.0001977450037290388, "loss": 46.0, "step": 108 }, { "epoch": 0.018394296080664896, "grad_norm": 5.457305451272987e-05, "learning_rate": 0.00019769892680695147, "loss": 46.0, "step": 109 }, { "epoch": 0.018563051090579253, "grad_norm": 3.489471419015899e-05, "learning_rate": 0.00019765238936082438, "loss": 46.0, "step": 110 }, { "epoch": 0.01873180610049361, "grad_norm": 1.886937752715312e-05, "learning_rate": 0.00019760539161002135, "loss": 46.0, "step": 111 }, { "epoch": 0.018900561110407964, "grad_norm": 3.381875285413116e-05, "learning_rate": 0.00019755793377607597, "loss": 46.0, "step": 112 }, { "epoch": 0.01906931612032232, "grad_norm": 3.3628173696342856e-05, "learning_rate": 0.00019751001608269052, "loss": 46.0, "step": 113 }, { "epoch": 0.019238071130236678, "grad_norm": 5.391196464188397e-05, "learning_rate": 0.00019746163875573492, "loss": 46.0, "step": 114 }, { "epoch": 0.019406826140151035, "grad_norm": 4.308508141548373e-05, "learning_rate": 0.0001974128020232457, "loss": 46.0, "step": 115 }, { "epoch": 0.019575581150065392, "grad_norm": 0.00010606838623061776, "learning_rate": 0.00019736350611542487, "loss": 46.0, "step": 116 }, { "epoch": 0.01974433615997975, "grad_norm": 4.744268153444864e-05, "learning_rate": 0.00019731375126463886, "loss": 46.0, "step": 117 }, { "epoch": 0.019913091169894107, "grad_norm": 3.2672236557118595e-05, "learning_rate": 0.00019726353770541742, "loss": 46.0, "step": 118 }, { "epoch": 0.020081846179808464, "grad_norm": 1.8585633370094e-05, "learning_rate": 0.0001972128656744525, "loss": 46.0, "step": 119 }, { "epoch": 0.02025060118972282, "grad_norm": 3.3655844163149595e-05, "learning_rate": 0.0001971617354105972, "loss": 46.0, "step": 120 }, { "epoch": 0.020419356199637178, "grad_norm": 4.9799295084085315e-05, "learning_rate": 0.00019711014715486448, "loss": 46.0, "step": 121 }, { "epoch": 0.020588111209551535, "grad_norm": 7.914419256849214e-05, "learning_rate": 0.00019705810115042634, "loss": 46.0, "step": 122 }, { "epoch": 0.02075686621946589, "grad_norm": 4.802920011570677e-05, "learning_rate": 0.00019700559764261225, "loss": 46.0, "step": 123 }, { "epoch": 0.020925621229380246, "grad_norm": 3.76962598238606e-05, "learning_rate": 0.0001969526368789084, "loss": 46.0, "step": 124 }, { "epoch": 0.021094376239294603, "grad_norm": 3.57206336047966e-05, "learning_rate": 0.00019689921910895627, "loss": 46.0, "step": 125 }, { "epoch": 0.02126313124920896, "grad_norm": 0.0001358168519800529, "learning_rate": 0.00019684534458455145, "loss": 46.0, "step": 126 }, { "epoch": 0.021431886259123317, "grad_norm": 3.319705865578726e-05, "learning_rate": 0.0001967910135596427, "loss": 46.0, "step": 127 }, { "epoch": 0.021600641269037674, "grad_norm": 9.154703002423048e-05, "learning_rate": 0.0001967362262903305, "loss": 46.0, "step": 128 }, { "epoch": 0.02176939627895203, "grad_norm": 0.00012708237045444548, "learning_rate": 0.00019668098303486593, "loss": 46.0, "step": 129 }, { "epoch": 0.02193815128886639, "grad_norm": 5.1937749958597124e-05, "learning_rate": 0.00019662528405364947, "loss": 46.0, "step": 130 }, { "epoch": 0.022106906298780746, "grad_norm": 6.14839227637276e-05, "learning_rate": 0.00019656912960922974, "loss": 46.0, "step": 131 }, { "epoch": 0.022275661308695103, "grad_norm": 5.0448226829757914e-05, "learning_rate": 0.0001965125199663023, "loss": 46.0, "step": 132 }, { "epoch": 0.02244441631860946, "grad_norm": 0.00013035547453910112, "learning_rate": 0.0001964554553917084, "loss": 46.0, "step": 133 }, { "epoch": 0.022613171328523817, "grad_norm": 5.22616392117925e-05, "learning_rate": 0.00019639793615443366, "loss": 46.0, "step": 134 }, { "epoch": 0.02278192633843817, "grad_norm": 7.795252167852595e-05, "learning_rate": 0.00019633996252560687, "loss": 46.0, "step": 135 }, { "epoch": 0.022950681348352528, "grad_norm": 0.0001024070952553302, "learning_rate": 0.00019628153477849867, "loss": 46.0, "step": 136 }, { "epoch": 0.023119436358266885, "grad_norm": 6.724517152179033e-05, "learning_rate": 0.00019622265318852033, "loss": 46.0, "step": 137 }, { "epoch": 0.023288191368181242, "grad_norm": 6.319572275970131e-05, "learning_rate": 0.00019616331803322236, "loss": 46.0, "step": 138 }, { "epoch": 0.0234569463780956, "grad_norm": 6.086541907279752e-05, "learning_rate": 0.0001961035295922932, "loss": 46.0, "step": 139 }, { "epoch": 0.023625701388009956, "grad_norm": 5.147139745531604e-05, "learning_rate": 0.00019604328814755808, "loss": 46.0, "step": 140 }, { "epoch": 0.023794456397924314, "grad_norm": 6.334174395306036e-05, "learning_rate": 0.0001959825939829774, "loss": 46.0, "step": 141 }, { "epoch": 0.02396321140783867, "grad_norm": 8.245484787039459e-05, "learning_rate": 0.00019592144738464566, "loss": 46.0, "step": 142 }, { "epoch": 0.024131966417753028, "grad_norm": 3.768013630178757e-05, "learning_rate": 0.00019585984864078996, "loss": 46.0, "step": 143 }, { "epoch": 0.024300721427667385, "grad_norm": 5.31747609784361e-05, "learning_rate": 0.0001957977980417687, "loss": 46.0, "step": 144 }, { "epoch": 0.024469476437581742, "grad_norm": 6.340059917420149e-05, "learning_rate": 0.00019573529588007011, "loss": 46.0, "step": 145 }, { "epoch": 0.0246382314474961, "grad_norm": 8.484098361805081e-05, "learning_rate": 0.00019567234245031106, "loss": 46.0, "step": 146 }, { "epoch": 0.024806986457410453, "grad_norm": 7.689618360018358e-05, "learning_rate": 0.00019560893804923554, "loss": 46.0, "step": 147 }, { "epoch": 0.02497574146732481, "grad_norm": 6.701362144667655e-05, "learning_rate": 0.00019554508297571328, "loss": 46.0, "step": 148 }, { "epoch": 0.025144496477239167, "grad_norm": 4.832363629247993e-05, "learning_rate": 0.00019548077753073827, "loss": 46.0, "step": 149 }, { "epoch": 0.025313251487153524, "grad_norm": 0.00010464687511557713, "learning_rate": 0.00019541602201742755, "loss": 46.0, "step": 150 }, { "epoch": 0.02548200649706788, "grad_norm": 5.5594293371541426e-05, "learning_rate": 0.00019535081674101955, "loss": 46.0, "step": 151 }, { "epoch": 0.02565076150698224, "grad_norm": 0.00010436464071972296, "learning_rate": 0.0001952851620088728, "loss": 46.0, "step": 152 }, { "epoch": 0.025819516516896596, "grad_norm": 7.730885408818722e-05, "learning_rate": 0.00019521905813046445, "loss": 46.0, "step": 153 }, { "epoch": 0.025988271526810953, "grad_norm": 0.0002586382324807346, "learning_rate": 0.00019515250541738872, "loss": 46.0, "step": 154 }, { "epoch": 0.02615702653672531, "grad_norm": 5.2660256187664345e-05, "learning_rate": 0.00019508550418335555, "loss": 46.0, "step": 155 }, { "epoch": 0.026325781546639667, "grad_norm": 6.58825520076789e-05, "learning_rate": 0.00019501805474418912, "loss": 46.0, "step": 156 }, { "epoch": 0.026494536556554024, "grad_norm": 9.116072760662064e-05, "learning_rate": 0.00019495015741782622, "loss": 46.0, "step": 157 }, { "epoch": 0.026663291566468378, "grad_norm": 0.00010203113924944773, "learning_rate": 0.00019488181252431489, "loss": 46.0, "step": 158 }, { "epoch": 0.026832046576382735, "grad_norm": 9.367840539198369e-05, "learning_rate": 0.00019481302038581294, "loss": 46.0, "step": 159 }, { "epoch": 0.027000801586297092, "grad_norm": 5.867075742571615e-05, "learning_rate": 0.00019474378132658626, "loss": 46.0, "step": 160 }, { "epoch": 0.02716955659621145, "grad_norm": 0.0001331541279796511, "learning_rate": 0.00019467409567300745, "loss": 46.0, "step": 161 }, { "epoch": 0.027338311606125806, "grad_norm": 9.494357800576836e-05, "learning_rate": 0.0001946039637535542, "loss": 46.0, "step": 162 }, { "epoch": 0.027507066616040163, "grad_norm": 0.00018060464935842901, "learning_rate": 0.0001945333858988078, "loss": 46.0, "step": 163 }, { "epoch": 0.02767582162595452, "grad_norm": 9.109014354180545e-05, "learning_rate": 0.0001944623624414515, "loss": 46.0, "step": 164 }, { "epoch": 0.027844576635868878, "grad_norm": 0.00021458794071804732, "learning_rate": 0.00019439089371626903, "loss": 46.0, "step": 165 }, { "epoch": 0.028013331645783235, "grad_norm": 0.00023161491844803095, "learning_rate": 0.0001943189800601429, "loss": 46.0, "step": 166 }, { "epoch": 0.028182086655697592, "grad_norm": 0.0001091673257178627, "learning_rate": 0.00019424662181205307, "loss": 46.0, "step": 167 }, { "epoch": 0.02835084166561195, "grad_norm": 9.839528502197936e-05, "learning_rate": 0.00019417381931307497, "loss": 46.0, "step": 168 }, { "epoch": 0.028519596675526306, "grad_norm": 0.0001077549095498398, "learning_rate": 0.00019410057290637824, "loss": 46.0, "step": 169 }, { "epoch": 0.02868835168544066, "grad_norm": 0.00011080451076850295, "learning_rate": 0.0001940268829372249, "loss": 46.0, "step": 170 }, { "epoch": 0.028857106695355017, "grad_norm": 0.00010105837282026187, "learning_rate": 0.00019395274975296786, "loss": 46.0, "step": 171 }, { "epoch": 0.029025861705269374, "grad_norm": 0.00012236724433023483, "learning_rate": 0.0001938781737030491, "loss": 46.0, "step": 172 }, { "epoch": 0.02919461671518373, "grad_norm": 8.416602213401347e-05, "learning_rate": 0.00019380315513899826, "loss": 46.0, "step": 173 }, { "epoch": 0.02936337172509809, "grad_norm": 0.00017547875177115202, "learning_rate": 0.00019372769441443083, "loss": 46.0, "step": 174 }, { "epoch": 0.029532126735012446, "grad_norm": 0.00010037582251243293, "learning_rate": 0.00019365179188504647, "loss": 46.0, "step": 175 }, { "epoch": 0.029700881744926803, "grad_norm": 0.0001204924556077458, "learning_rate": 0.0001935754479086274, "loss": 46.0, "step": 176 }, { "epoch": 0.02986963675484116, "grad_norm": 0.00014140504936221987, "learning_rate": 0.00019349866284503674, "loss": 46.0, "step": 177 }, { "epoch": 0.030038391764755517, "grad_norm": 9.342600969830528e-05, "learning_rate": 0.00019342143705621662, "loss": 46.0, "step": 178 }, { "epoch": 0.030207146774669874, "grad_norm": 4.463369259610772e-05, "learning_rate": 0.00019334377090618682, "loss": 46.0, "step": 179 }, { "epoch": 0.03037590178458423, "grad_norm": 8.116533717839047e-05, "learning_rate": 0.00019326566476104274, "loss": 46.0, "step": 180 }, { "epoch": 0.03054465679449859, "grad_norm": 0.00013790494995191693, "learning_rate": 0.00019318711898895377, "loss": 46.0, "step": 181 }, { "epoch": 0.030713411804412942, "grad_norm": 0.0002199001028202474, "learning_rate": 0.00019310813396016162, "loss": 46.0, "step": 182 }, { "epoch": 0.0308821668143273, "grad_norm": 0.0002289148687850684, "learning_rate": 0.0001930287100469785, "loss": 46.0, "step": 183 }, { "epoch": 0.031050921824241656, "grad_norm": 0.00022609223378822207, "learning_rate": 0.00019294884762378547, "loss": 46.0, "step": 184 }, { "epoch": 0.031219676834156013, "grad_norm": 0.00014787810505367815, "learning_rate": 0.00019286854706703044, "loss": 46.0, "step": 185 }, { "epoch": 0.03138843184407037, "grad_norm": 0.00017034618940670043, "learning_rate": 0.00019278780875522667, "loss": 46.0, "step": 186 }, { "epoch": 0.03155718685398473, "grad_norm": 0.0001577001967234537, "learning_rate": 0.0001927066330689509, "loss": 46.0, "step": 187 }, { "epoch": 0.031725941863899085, "grad_norm": 0.0001635671651456505, "learning_rate": 0.0001926250203908414, "loss": 46.0, "step": 188 }, { "epoch": 0.03189469687381344, "grad_norm": 0.00011218619329156354, "learning_rate": 0.00019254297110559638, "loss": 46.0, "step": 189 }, { "epoch": 0.0320634518837278, "grad_norm": 0.0001787557266652584, "learning_rate": 0.0001924604855999721, "loss": 46.0, "step": 190 }, { "epoch": 0.03223220689364215, "grad_norm": 0.00014260809984989464, "learning_rate": 0.00019237756426278095, "loss": 46.0, "step": 191 }, { "epoch": 0.03240096190355651, "grad_norm": 0.00012893076927866787, "learning_rate": 0.00019229420748488978, "loss": 46.0, "step": 192 }, { "epoch": 0.03256971691347087, "grad_norm": 0.00022735691163688898, "learning_rate": 0.00019221041565921796, "loss": 46.0, "step": 193 }, { "epoch": 0.03273847192338523, "grad_norm": 0.00011990263010375202, "learning_rate": 0.0001921261891807355, "loss": 46.0, "step": 194 }, { "epoch": 0.03290722693329958, "grad_norm": 0.00017182013834826648, "learning_rate": 0.00019204152844646134, "loss": 46.0, "step": 195 }, { "epoch": 0.03307598194321394, "grad_norm": 0.00017154582019429654, "learning_rate": 0.00019195643385546126, "loss": 46.0, "step": 196 }, { "epoch": 0.033244736953128295, "grad_norm": 0.0001479845232097432, "learning_rate": 0.00019187090580884622, "loss": 46.0, "step": 197 }, { "epoch": 0.033413491963042656, "grad_norm": 0.00010758084681583568, "learning_rate": 0.00019178494470977023, "loss": 46.0, "step": 198 }, { "epoch": 0.03358224697295701, "grad_norm": 0.0001167198788607493, "learning_rate": 0.0001916985509634287, "loss": 46.0, "step": 199 }, { "epoch": 0.03375100198287136, "grad_norm": 0.00015376460214611143, "learning_rate": 0.00019161172497705637, "loss": 46.0, "step": 200 }, { "epoch": 0.033919756992785724, "grad_norm": 0.0001339185400865972, "learning_rate": 0.00019152446715992543, "loss": 46.0, "step": 201 }, { "epoch": 0.03408851200270008, "grad_norm": 0.00018876604735851288, "learning_rate": 0.0001914367779233436, "loss": 46.0, "step": 202 }, { "epoch": 0.03425726701261444, "grad_norm": 0.00017353007569909096, "learning_rate": 0.00019134865768065216, "loss": 46.0, "step": 203 }, { "epoch": 0.03442602202252879, "grad_norm": 0.00011807784903794527, "learning_rate": 0.00019126010684722406, "loss": 46.0, "step": 204 }, { "epoch": 0.03459477703244315, "grad_norm": 0.00010566677519818768, "learning_rate": 0.00019117112584046193, "loss": 46.0, "step": 205 }, { "epoch": 0.034763532042357506, "grad_norm": 0.0001312542735831812, "learning_rate": 0.00019108171507979606, "loss": 46.0, "step": 206 }, { "epoch": 0.03493228705227187, "grad_norm": 5.670605969498865e-05, "learning_rate": 0.00019099187498668256, "loss": 46.0, "step": 207 }, { "epoch": 0.03510104206218622, "grad_norm": 8.242291369242594e-05, "learning_rate": 0.0001909016059846012, "loss": 46.0, "step": 208 }, { "epoch": 0.03526979707210058, "grad_norm": 0.0001418525935150683, "learning_rate": 0.00019081090849905355, "loss": 46.0, "step": 209 }, { "epoch": 0.035438552082014935, "grad_norm": 0.0002694391005206853, "learning_rate": 0.00019071978295756087, "loss": 46.0, "step": 210 }, { "epoch": 0.03560730709192929, "grad_norm": 0.00015816248196642846, "learning_rate": 0.0001906282297896623, "loss": 46.0, "step": 211 }, { "epoch": 0.03577606210184365, "grad_norm": 0.00011155927495565265, "learning_rate": 0.00019053624942691247, "loss": 46.0, "step": 212 }, { "epoch": 0.035944817111758, "grad_norm": 0.00010293432569596916, "learning_rate": 0.0001904438423028798, "loss": 46.0, "step": 213 }, { "epoch": 0.03611357212167236, "grad_norm": 0.00017549478798173368, "learning_rate": 0.00019035100885314438, "loss": 46.0, "step": 214 }, { "epoch": 0.03628232713158672, "grad_norm": 8.048818563111126e-05, "learning_rate": 0.0001902577495152958, "loss": 46.0, "step": 215 }, { "epoch": 0.03645108214150108, "grad_norm": 7.043426012387499e-05, "learning_rate": 0.0001901640647289312, "loss": 46.0, "step": 216 }, { "epoch": 0.03661983715141543, "grad_norm": 0.00022185473062563688, "learning_rate": 0.00019006995493565305, "loss": 46.0, "step": 217 }, { "epoch": 0.03678859216132979, "grad_norm": 0.0002446068392600864, "learning_rate": 0.0001899754205790674, "loss": 46.0, "step": 218 }, { "epoch": 0.036957347171244145, "grad_norm": 0.0002539333945605904, "learning_rate": 0.00018988046210478132, "loss": 46.0, "step": 219 }, { "epoch": 0.037126102181158506, "grad_norm": 0.0001403048081556335, "learning_rate": 0.00018978507996040124, "loss": 46.0, "step": 220 }, { "epoch": 0.03729485719107286, "grad_norm": 0.00014873422333039343, "learning_rate": 0.00018968927459553055, "loss": 46.0, "step": 221 }, { "epoch": 0.03746361220098722, "grad_norm": 0.00015955405251588672, "learning_rate": 0.00018959304646176754, "loss": 46.0, "step": 222 }, { "epoch": 0.037632367210901574, "grad_norm": 0.0003200356150045991, "learning_rate": 0.00018949639601270347, "loss": 46.0, "step": 223 }, { "epoch": 0.03780112222081593, "grad_norm": 0.00014675638522021472, "learning_rate": 0.00018939932370392004, "loss": 46.0, "step": 224 }, { "epoch": 0.03796987723073029, "grad_norm": 0.00022176875791046768, "learning_rate": 0.00018930182999298768, "loss": 46.0, "step": 225 }, { "epoch": 0.03813863224064464, "grad_norm": 0.00029244759934954345, "learning_rate": 0.0001892039153394631, "loss": 46.0, "step": 226 }, { "epoch": 0.038307387250559, "grad_norm": 0.00019421910110395402, "learning_rate": 0.0001891055802048872, "loss": 46.0, "step": 227 }, { "epoch": 0.038476142260473356, "grad_norm": 0.00012899210560135543, "learning_rate": 0.00018900682505278287, "loss": 46.0, "step": 228 }, { "epoch": 0.03864489727038772, "grad_norm": 0.00016150598821695894, "learning_rate": 0.00018890765034865295, "loss": 46.0, "step": 229 }, { "epoch": 0.03881365228030207, "grad_norm": 0.0004213732318021357, "learning_rate": 0.00018880805655997784, "loss": 46.0, "step": 230 }, { "epoch": 0.03898240729021643, "grad_norm": 0.0001324907352682203, "learning_rate": 0.0001887080441562134, "loss": 46.0, "step": 231 }, { "epoch": 0.039151162300130785, "grad_norm": 0.00029545003781095147, "learning_rate": 0.0001886076136087887, "loss": 46.0, "step": 232 }, { "epoch": 0.039319917310045145, "grad_norm": 0.00018010212806984782, "learning_rate": 0.00018850676539110386, "loss": 46.0, "step": 233 }, { "epoch": 0.0394886723199595, "grad_norm": 0.00014782045036554337, "learning_rate": 0.00018840549997852776, "loss": 46.0, "step": 234 }, { "epoch": 0.03965742732987385, "grad_norm": 0.0002910486946348101, "learning_rate": 0.0001883038178483958, "loss": 46.0, "step": 235 }, { "epoch": 0.03982618233978821, "grad_norm": 0.00010300084977643564, "learning_rate": 0.00018820171948000764, "loss": 46.0, "step": 236 }, { "epoch": 0.03999493734970257, "grad_norm": 0.00029963982524350286, "learning_rate": 0.00018809920535462502, "loss": 46.0, "step": 237 }, { "epoch": 0.04016369235961693, "grad_norm": 0.0002475226647220552, "learning_rate": 0.00018799627595546942, "loss": 46.0, "step": 238 }, { "epoch": 0.04033244736953128, "grad_norm": 9.632138244342059e-05, "learning_rate": 0.00018789293176771978, "loss": 46.0, "step": 239 }, { "epoch": 0.04050120237944564, "grad_norm": 0.00018374540377408266, "learning_rate": 0.00018778917327851025, "loss": 46.0, "step": 240 }, { "epoch": 0.040669957389359995, "grad_norm": 0.0005576743860729039, "learning_rate": 0.00018768500097692784, "loss": 46.0, "step": 241 }, { "epoch": 0.040838712399274356, "grad_norm": 0.0002332236763322726, "learning_rate": 0.00018758041535401018, "loss": 46.0, "step": 242 }, { "epoch": 0.04100746740918871, "grad_norm": 0.00021743084653280675, "learning_rate": 0.00018747541690274325, "loss": 46.0, "step": 243 }, { "epoch": 0.04117622241910307, "grad_norm": 0.00037613531458191574, "learning_rate": 0.00018737000611805877, "loss": 46.0, "step": 244 }, { "epoch": 0.041344977429017424, "grad_norm": 0.00017969420878216624, "learning_rate": 0.00018726418349683231, "loss": 46.0, "step": 245 }, { "epoch": 0.04151373243893178, "grad_norm": 0.00021221920906100422, "learning_rate": 0.00018715794953788059, "loss": 46.0, "step": 246 }, { "epoch": 0.04168248744884614, "grad_norm": 0.0002182903845096007, "learning_rate": 0.0001870513047419593, "loss": 46.0, "step": 247 }, { "epoch": 0.04185124245876049, "grad_norm": 0.00023534568026661873, "learning_rate": 0.00018694424961176065, "loss": 46.0, "step": 248 }, { "epoch": 0.04201999746867485, "grad_norm": 0.00013651238987222314, "learning_rate": 0.00018683678465191108, "loss": 46.0, "step": 249 }, { "epoch": 0.042188752478589206, "grad_norm": 0.000251735036727041, "learning_rate": 0.00018672891036896884, "loss": 46.0, "step": 250 }, { "epoch": 0.04235750748850357, "grad_norm": 0.0005651676910929382, "learning_rate": 0.00018662062727142165, "loss": 46.0, "step": 251 }, { "epoch": 0.04252626249841792, "grad_norm": 0.00027543309261091053, "learning_rate": 0.00018651193586968417, "loss": 46.0, "step": 252 }, { "epoch": 0.04269501750833228, "grad_norm": 0.00025643999106250703, "learning_rate": 0.00018640283667609574, "loss": 46.0, "step": 253 }, { "epoch": 0.042863772518246634, "grad_norm": 0.00026899727527052164, "learning_rate": 0.00018629333020491796, "loss": 46.0, "step": 254 }, { "epoch": 0.043032527528160995, "grad_norm": 0.0002454131608828902, "learning_rate": 0.00018618341697233213, "loss": 46.0, "step": 255 }, { "epoch": 0.04320128253807535, "grad_norm": 0.00020008228602819145, "learning_rate": 0.0001860730974964369, "loss": 46.0, "step": 256 }, { "epoch": 0.04337003754798971, "grad_norm": 0.0003180755884386599, "learning_rate": 0.00018596237229724595, "loss": 46.0, "step": 257 }, { "epoch": 0.04353879255790406, "grad_norm": 0.00027137139113619924, "learning_rate": 0.0001858512418966853, "loss": 46.0, "step": 258 }, { "epoch": 0.04370754756781842, "grad_norm": 0.0003248886205255985, "learning_rate": 0.000185739706818591, "loss": 46.0, "step": 259 }, { "epoch": 0.04387630257773278, "grad_norm": 0.00022937578614801168, "learning_rate": 0.00018562776758870663, "loss": 46.0, "step": 260 }, { "epoch": 0.04404505758764713, "grad_norm": 0.00026010029250755906, "learning_rate": 0.0001855154247346809, "loss": 46.0, "step": 261 }, { "epoch": 0.04421381259756149, "grad_norm": 0.00012881477596238256, "learning_rate": 0.00018540267878606497, "loss": 46.0, "step": 262 }, { "epoch": 0.044382567607475845, "grad_norm": 0.0001669221237534657, "learning_rate": 0.0001852895302743101, "loss": 46.0, "step": 263 }, { "epoch": 0.044551322617390206, "grad_norm": 8.989863272290677e-05, "learning_rate": 0.0001851759797327652, "loss": 46.0, "step": 264 }, { "epoch": 0.04472007762730456, "grad_norm": 0.00043415901018306613, "learning_rate": 0.00018506202769667413, "loss": 46.0, "step": 265 }, { "epoch": 0.04488883263721892, "grad_norm": 0.00018308595463167876, "learning_rate": 0.00018494767470317333, "loss": 46.0, "step": 266 }, { "epoch": 0.045057587647133274, "grad_norm": 0.0005433953483588994, "learning_rate": 0.00018483292129128914, "loss": 46.0, "step": 267 }, { "epoch": 0.045226342657047634, "grad_norm": 0.0001997397339437157, "learning_rate": 0.00018471776800193553, "loss": 46.0, "step": 268 }, { "epoch": 0.04539509766696199, "grad_norm": 0.0002549294731579721, "learning_rate": 0.00018460221537791122, "loss": 46.0, "step": 269 }, { "epoch": 0.04556385267687634, "grad_norm": 0.0004634494544006884, "learning_rate": 0.00018448626396389738, "loss": 46.0, "step": 270 }, { "epoch": 0.0457326076867907, "grad_norm": 0.00015057041309773922, "learning_rate": 0.00018436991430645488, "loss": 46.0, "step": 271 }, { "epoch": 0.045901362696705056, "grad_norm": 0.00030916737159714103, "learning_rate": 0.00018425316695402181, "loss": 46.0, "step": 272 }, { "epoch": 0.046070117706619416, "grad_norm": 0.0003056859422940761, "learning_rate": 0.00018413602245691092, "loss": 46.0, "step": 273 }, { "epoch": 0.04623887271653377, "grad_norm": 0.00028438231674954295, "learning_rate": 0.00018401848136730698, "loss": 46.0, "step": 274 }, { "epoch": 0.04640762772644813, "grad_norm": 0.00034854307887144387, "learning_rate": 0.00018390054423926406, "loss": 46.0, "step": 275 }, { "epoch": 0.046576382736362484, "grad_norm": 0.00025173407630063593, "learning_rate": 0.00018378221162870326, "loss": 46.0, "step": 276 }, { "epoch": 0.046745137746276845, "grad_norm": 0.0006683963001705706, "learning_rate": 0.00018366348409340965, "loss": 46.0, "step": 277 }, { "epoch": 0.0469138927561912, "grad_norm": 0.00017825645045377314, "learning_rate": 0.00018354436219303, "loss": 46.0, "step": 278 }, { "epoch": 0.04708264776610556, "grad_norm": 0.00026973988860845566, "learning_rate": 0.00018342484648906996, "loss": 46.0, "step": 279 }, { "epoch": 0.04725140277601991, "grad_norm": 0.00027474435046315193, "learning_rate": 0.00018330493754489138, "loss": 46.0, "step": 280 }, { "epoch": 0.04742015778593427, "grad_norm": 0.00025235096109099686, "learning_rate": 0.00018318463592570988, "loss": 46.0, "step": 281 }, { "epoch": 0.04758891279584863, "grad_norm": 0.00020448811119422317, "learning_rate": 0.0001830639421985919, "loss": 46.0, "step": 282 }, { "epoch": 0.04775766780576298, "grad_norm": 0.00028648230363614857, "learning_rate": 0.00018294285693245223, "loss": 46.0, "step": 283 }, { "epoch": 0.04792642281567734, "grad_norm": 0.00027214092551730573, "learning_rate": 0.00018282138069805127, "loss": 46.0, "step": 284 }, { "epoch": 0.048095177825591695, "grad_norm": 0.00021831200865563005, "learning_rate": 0.00018269951406799223, "loss": 46.0, "step": 285 }, { "epoch": 0.048263932835506056, "grad_norm": 0.000336469296598807, "learning_rate": 0.00018257725761671866, "loss": 46.0, "step": 286 }, { "epoch": 0.04843268784542041, "grad_norm": 0.00034162108204327524, "learning_rate": 0.00018245461192051157, "loss": 46.0, "step": 287 }, { "epoch": 0.04860144285533477, "grad_norm": 0.00035298787406645715, "learning_rate": 0.00018233157755748669, "loss": 46.0, "step": 288 }, { "epoch": 0.048770197865249124, "grad_norm": 0.00015676271868869662, "learning_rate": 0.0001822081551075919, "loss": 46.0, "step": 289 }, { "epoch": 0.048938952875163484, "grad_norm": 0.00023883357062004507, "learning_rate": 0.0001820843451526044, "loss": 46.0, "step": 290 }, { "epoch": 0.04910770788507784, "grad_norm": 0.0002669495588634163, "learning_rate": 0.0001819601482761278, "loss": 46.0, "step": 291 }, { "epoch": 0.0492764628949922, "grad_norm": 0.00034910603426396847, "learning_rate": 0.0001818355650635899, "loss": 46.0, "step": 292 }, { "epoch": 0.04944521790490655, "grad_norm": 0.00024713424500077963, "learning_rate": 0.0001817105961022392, "loss": 46.0, "step": 293 }, { "epoch": 0.049613972914820906, "grad_norm": 0.0002389974833931774, "learning_rate": 0.00018158524198114278, "loss": 46.0, "step": 294 }, { "epoch": 0.049782727924735266, "grad_norm": 0.00028152199229225516, "learning_rate": 0.0001814595032911831, "loss": 46.0, "step": 295 }, { "epoch": 0.04995148293464962, "grad_norm": 0.0004107706481590867, "learning_rate": 0.00018133338062505534, "loss": 46.0, "step": 296 }, { "epoch": 0.05012023794456398, "grad_norm": 0.00032018640195019543, "learning_rate": 0.00018120687457726478, "loss": 46.0, "step": 297 }, { "epoch": 0.050288992954478334, "grad_norm": 0.0003432031662669033, "learning_rate": 0.00018107998574412376, "loss": 46.0, "step": 298 }, { "epoch": 0.050457747964392695, "grad_norm": 0.00042492407374083996, "learning_rate": 0.00018095271472374892, "loss": 46.0, "step": 299 }, { "epoch": 0.05062650297430705, "grad_norm": 0.0004913901793770492, "learning_rate": 0.00018082506211605852, "loss": 46.0, "step": 300 }, { "epoch": 0.05079525798422141, "grad_norm": 0.00045284689986146986, "learning_rate": 0.00018069702852276941, "loss": 46.0, "step": 301 }, { "epoch": 0.05096401299413576, "grad_norm": 0.0002827131829690188, "learning_rate": 0.00018056861454739432, "loss": 46.0, "step": 302 }, { "epoch": 0.05113276800405012, "grad_norm": 0.000371127447579056, "learning_rate": 0.00018043982079523905, "loss": 46.0, "step": 303 }, { "epoch": 0.05130152301396448, "grad_norm": 0.0001193537755170837, "learning_rate": 0.00018031064787339947, "loss": 46.0, "step": 304 }, { "epoch": 0.05147027802387883, "grad_norm": 0.0002617594145704061, "learning_rate": 0.00018018109639075886, "loss": 46.0, "step": 305 }, { "epoch": 0.05163903303379319, "grad_norm": 0.0003127566596958786, "learning_rate": 0.00018005116695798476, "loss": 46.0, "step": 306 }, { "epoch": 0.051807788043707545, "grad_norm": 0.0003426918410696089, "learning_rate": 0.00017992086018752638, "loss": 46.0, "step": 307 }, { "epoch": 0.051976543053621906, "grad_norm": 0.000528005592059344, "learning_rate": 0.0001797901766936116, "loss": 46.0, "step": 308 }, { "epoch": 0.05214529806353626, "grad_norm": 0.00044293675455264747, "learning_rate": 0.00017965911709224395, "loss": 46.0, "step": 309 }, { "epoch": 0.05231405307345062, "grad_norm": 0.0004309279902372509, "learning_rate": 0.00017952768200119992, "loss": 46.0, "step": 310 }, { "epoch": 0.052482808083364973, "grad_norm": 0.00038651516661047935, "learning_rate": 0.0001793958720400259, "loss": 46.0, "step": 311 }, { "epoch": 0.052651563093279334, "grad_norm": 0.000503813847899437, "learning_rate": 0.00017926368783003537, "loss": 46.0, "step": 312 }, { "epoch": 0.05282031810319369, "grad_norm": 0.0007580111850984395, "learning_rate": 0.00017913112999430584, "loss": 46.0, "step": 313 }, { "epoch": 0.05298907311310805, "grad_norm": 0.0003376993117853999, "learning_rate": 0.00017899819915767598, "loss": 46.0, "step": 314 }, { "epoch": 0.0531578281230224, "grad_norm": 0.0003070792299695313, "learning_rate": 0.00017886489594674273, "loss": 46.0, "step": 315 }, { "epoch": 0.053326583132936756, "grad_norm": 0.0006369905895553529, "learning_rate": 0.00017873122098985826, "loss": 46.0, "step": 316 }, { "epoch": 0.053495338142851116, "grad_norm": 0.0005446787690743804, "learning_rate": 0.00017859717491712707, "loss": 46.0, "step": 317 }, { "epoch": 0.05366409315276547, "grad_norm": 0.0003436711267568171, "learning_rate": 0.0001784627583604029, "loss": 46.0, "step": 318 }, { "epoch": 0.05383284816267983, "grad_norm": 0.0004495533648878336, "learning_rate": 0.000178327971953286, "loss": 46.0, "step": 319 }, { "epoch": 0.054001603172594184, "grad_norm": 0.0004997072974219918, "learning_rate": 0.00017819281633111984, "loss": 46.0, "step": 320 }, { "epoch": 0.054170358182508545, "grad_norm": 0.00040087226079776883, "learning_rate": 0.0001780572921309883, "loss": 46.0, "step": 321 }, { "epoch": 0.0543391131924229, "grad_norm": 0.0005316153983585536, "learning_rate": 0.0001779213999917127, "loss": 46.0, "step": 322 }, { "epoch": 0.05450786820233726, "grad_norm": 0.0006078218575567007, "learning_rate": 0.00017778514055384866, "loss": 46.0, "step": 323 }, { "epoch": 0.05467662321225161, "grad_norm": 0.0004828522796742618, "learning_rate": 0.00017764851445968308, "loss": 46.0, "step": 324 }, { "epoch": 0.05484537822216597, "grad_norm": 0.00032997396192513406, "learning_rate": 0.0001775115223532313, "loss": 46.0, "step": 325 }, { "epoch": 0.05501413323208033, "grad_norm": 0.00033607761724852026, "learning_rate": 0.00017737416488023384, "loss": 46.0, "step": 326 }, { "epoch": 0.05518288824199469, "grad_norm": 0.0004014720907434821, "learning_rate": 0.00017723644268815344, "loss": 46.0, "step": 327 }, { "epoch": 0.05535164325190904, "grad_norm": 0.000525740790180862, "learning_rate": 0.00017709835642617212, "loss": 46.0, "step": 328 }, { "epoch": 0.055520398261823395, "grad_norm": 0.0007568416767753661, "learning_rate": 0.00017695990674518788, "loss": 46.0, "step": 329 }, { "epoch": 0.055689153271737755, "grad_norm": 0.0004670285852625966, "learning_rate": 0.0001768210942978119, "loss": 46.0, "step": 330 }, { "epoch": 0.05585790828165211, "grad_norm": 0.0005972511135041714, "learning_rate": 0.00017668191973836529, "loss": 46.0, "step": 331 }, { "epoch": 0.05602666329156647, "grad_norm": 0.00048381276428699493, "learning_rate": 0.000176542383722876, "loss": 46.0, "step": 332 }, { "epoch": 0.05619541830148082, "grad_norm": 0.0007428377284668386, "learning_rate": 0.0001764024869090758, "loss": 46.0, "step": 333 }, { "epoch": 0.056364173311395184, "grad_norm": 0.000608109578024596, "learning_rate": 0.00017626222995639724, "loss": 46.0, "step": 334 }, { "epoch": 0.05653292832130954, "grad_norm": 0.0006571222911588848, "learning_rate": 0.00017612161352597032, "loss": 46.0, "step": 335 }, { "epoch": 0.0567016833312239, "grad_norm": 0.0007276976830326021, "learning_rate": 0.00017598063828061958, "loss": 46.0, "step": 336 }, { "epoch": 0.05687043834113825, "grad_norm": 0.0007646044250577688, "learning_rate": 0.000175839304884861, "loss": 46.0, "step": 337 }, { "epoch": 0.05703919335105261, "grad_norm": 0.0004487757687456906, "learning_rate": 0.00017569761400489862, "loss": 46.0, "step": 338 }, { "epoch": 0.057207948360966966, "grad_norm": 0.0004590119933709502, "learning_rate": 0.0001755555663086216, "loss": 46.0, "step": 339 }, { "epoch": 0.05737670337088132, "grad_norm": 0.00019634263298939914, "learning_rate": 0.0001754131624656011, "loss": 46.0, "step": 340 }, { "epoch": 0.05754545838079568, "grad_norm": 0.0008665765053592622, "learning_rate": 0.00017527040314708702, "loss": 46.0, "step": 341 }, { "epoch": 0.057714213390710034, "grad_norm": 0.0010710041970014572, "learning_rate": 0.0001751272890260048, "loss": 46.0, "step": 342 }, { "epoch": 0.057882968400624395, "grad_norm": 0.0010121267987415195, "learning_rate": 0.0001749838207769524, "loss": 46.0, "step": 343 }, { "epoch": 0.05805172341053875, "grad_norm": 0.0006279960507526994, "learning_rate": 0.00017483999907619695, "loss": 46.0, "step": 344 }, { "epoch": 0.05822047842045311, "grad_norm": 0.0006246848497539759, "learning_rate": 0.00017469582460167174, "loss": 46.0, "step": 345 }, { "epoch": 0.05838923343036746, "grad_norm": 0.000596629804931581, "learning_rate": 0.00017455129803297287, "loss": 46.0, "step": 346 }, { "epoch": 0.05855798844028182, "grad_norm": 0.0008878617081791162, "learning_rate": 0.00017440642005135614, "loss": 46.0, "step": 347 }, { "epoch": 0.05872674345019618, "grad_norm": 0.0003601356002036482, "learning_rate": 0.0001742611913397338, "loss": 46.0, "step": 348 }, { "epoch": 0.05889549846011054, "grad_norm": 0.0003498071455396712, "learning_rate": 0.00017411561258267127, "loss": 46.0, "step": 349 }, { "epoch": 0.05906425347002489, "grad_norm": 0.0007128751021809876, "learning_rate": 0.0001739696844663841, "loss": 46.0, "step": 350 }, { "epoch": 0.059233008479939245, "grad_norm": 0.00047719714348204434, "learning_rate": 0.0001738234076787346, "loss": 46.0, "step": 351 }, { "epoch": 0.059401763489853605, "grad_norm": 0.0006972616538405418, "learning_rate": 0.00017367678290922852, "loss": 46.0, "step": 352 }, { "epoch": 0.05957051849976796, "grad_norm": 0.0005275082658044994, "learning_rate": 0.00017352981084901194, "loss": 46.0, "step": 353 }, { "epoch": 0.05973927350968232, "grad_norm": 0.001250717556104064, "learning_rate": 0.000173382492190868, "loss": 46.0, "step": 354 }, { "epoch": 0.05990802851959667, "grad_norm": 0.0008419329533353448, "learning_rate": 0.00017323482762921354, "loss": 46.0, "step": 355 }, { "epoch": 0.060076783529511034, "grad_norm": 0.0008670453680679202, "learning_rate": 0.000173086817860096, "loss": 46.0, "step": 356 }, { "epoch": 0.06024553853942539, "grad_norm": 0.0014900369569659233, "learning_rate": 0.00017293846358118988, "loss": 46.0, "step": 357 }, { "epoch": 0.06041429354933975, "grad_norm": 0.001586690079420805, "learning_rate": 0.0001727897654917937, "loss": 46.0, "step": 358 }, { "epoch": 0.0605830485592541, "grad_norm": 0.0009236481855623424, "learning_rate": 0.00017264072429282656, "loss": 46.0, "step": 359 }, { "epoch": 0.06075180356916846, "grad_norm": 0.0011958472896367311, "learning_rate": 0.00017249134068682487, "loss": 46.0, "step": 360 }, { "epoch": 0.060920558579082816, "grad_norm": 0.0006783442222513258, "learning_rate": 0.00017234161537793913, "loss": 46.0, "step": 361 }, { "epoch": 0.06108931358899718, "grad_norm": 0.0010664722649380565, "learning_rate": 0.0001721915490719304, "loss": 46.0, "step": 362 }, { "epoch": 0.06125806859891153, "grad_norm": 0.0012871964136138558, "learning_rate": 0.00017204114247616715, "loss": 46.0, "step": 363 }, { "epoch": 0.061426823608825884, "grad_norm": 0.0006326769944280386, "learning_rate": 0.00017189039629962193, "loss": 46.0, "step": 364 }, { "epoch": 0.061595578618740245, "grad_norm": 0.0005951938219368458, "learning_rate": 0.00017173931125286792, "loss": 46.0, "step": 365 }, { "epoch": 0.061595578618740245, "eval_loss": 11.5, "eval_runtime": 14.8849, "eval_samples_per_second": 167.619, "eval_steps_per_second": 83.843, "step": 365 }, { "epoch": 0.0617643336286546, "grad_norm": 0.000900912971701473, "learning_rate": 0.00017158788804807565, "loss": 46.0, "step": 366 }, { "epoch": 0.06193308863856896, "grad_norm": 0.0008746191742829978, "learning_rate": 0.00017143612739900963, "loss": 46.0, "step": 367 }, { "epoch": 0.06210184364848331, "grad_norm": 0.0008302325149998069, "learning_rate": 0.00017128403002102493, "loss": 46.0, "step": 368 }, { "epoch": 0.06227059865839767, "grad_norm": 0.0016092363512143493, "learning_rate": 0.00017113159663106396, "loss": 46.0, "step": 369 }, { "epoch": 0.06243935366831203, "grad_norm": 0.0010375389829277992, "learning_rate": 0.00017097882794765292, "loss": 46.0, "step": 370 }, { "epoch": 0.06260810867822639, "grad_norm": 0.0010077969636768103, "learning_rate": 0.00017082572469089845, "loss": 46.0, "step": 371 }, { "epoch": 0.06277686368814074, "grad_norm": 0.0014613711973652244, "learning_rate": 0.00017067228758248443, "loss": 46.0, "step": 372 }, { "epoch": 0.0629456186980551, "grad_norm": 0.0010287724435329437, "learning_rate": 0.00017051851734566827, "loss": 46.0, "step": 373 }, { "epoch": 0.06311437370796946, "grad_norm": 0.0012760079698637128, "learning_rate": 0.00017036441470527768, "loss": 46.0, "step": 374 }, { "epoch": 0.06328312871788382, "grad_norm": 0.001163105363957584, "learning_rate": 0.00017020998038770724, "loss": 46.0, "step": 375 }, { "epoch": 0.06345188372779817, "grad_norm": 0.0008599930442869663, "learning_rate": 0.00017005521512091493, "loss": 46.0, "step": 376 }, { "epoch": 0.06362063873771252, "grad_norm": 0.001305455924011767, "learning_rate": 0.0001699001196344188, "loss": 46.0, "step": 377 }, { "epoch": 0.06378939374762688, "grad_norm": 0.0006688539870083332, "learning_rate": 0.00016974469465929338, "loss": 46.0, "step": 378 }, { "epoch": 0.06395814875754124, "grad_norm": 0.0010529232677072287, "learning_rate": 0.00016958894092816636, "loss": 46.0, "step": 379 }, { "epoch": 0.0641269037674556, "grad_norm": 0.00040389568312093616, "learning_rate": 0.00016943285917521506, "loss": 46.0, "step": 380 }, { "epoch": 0.06429565877736995, "grad_norm": 0.0007607465959154069, "learning_rate": 0.00016927645013616301, "loss": 46.0, "step": 381 }, { "epoch": 0.0644644137872843, "grad_norm": 0.0007972440216690302, "learning_rate": 0.0001691197145482765, "loss": 46.0, "step": 382 }, { "epoch": 0.06463316879719867, "grad_norm": 0.0007731697405688465, "learning_rate": 0.00016896265315036098, "loss": 46.0, "step": 383 }, { "epoch": 0.06480192380711303, "grad_norm": 0.0010464886436238885, "learning_rate": 0.00016880526668275783, "loss": 46.0, "step": 384 }, { "epoch": 0.06497067881702738, "grad_norm": 0.0012620363850146532, "learning_rate": 0.00016864755588734057, "loss": 46.0, "step": 385 }, { "epoch": 0.06513943382694173, "grad_norm": 0.0008887553703971207, "learning_rate": 0.00016848952150751154, "loss": 46.0, "step": 386 }, { "epoch": 0.06530818883685609, "grad_norm": 0.0008710179827176034, "learning_rate": 0.0001683311642881984, "loss": 46.0, "step": 387 }, { "epoch": 0.06547694384677046, "grad_norm": 0.0006600015913136303, "learning_rate": 0.0001681724849758506, "loss": 46.0, "step": 388 }, { "epoch": 0.06564569885668481, "grad_norm": 0.0011270915856584907, "learning_rate": 0.00016801348431843573, "loss": 46.0, "step": 389 }, { "epoch": 0.06581445386659916, "grad_norm": 0.0004072414885740727, "learning_rate": 0.00016785416306543626, "loss": 46.0, "step": 390 }, { "epoch": 0.06598320887651352, "grad_norm": 0.0013463982613757253, "learning_rate": 0.0001676945219678457, "loss": 46.0, "step": 391 }, { "epoch": 0.06615196388642788, "grad_norm": 0.0008421620586887002, "learning_rate": 0.00016753456177816535, "loss": 46.0, "step": 392 }, { "epoch": 0.06632071889634224, "grad_norm": 0.0011627651983872056, "learning_rate": 0.00016737428325040048, "loss": 46.0, "step": 393 }, { "epoch": 0.06648947390625659, "grad_norm": 0.0010495700407773256, "learning_rate": 0.00016721368714005705, "loss": 46.0, "step": 394 }, { "epoch": 0.06665822891617094, "grad_norm": 0.001558567862957716, "learning_rate": 0.0001670527742041379, "loss": 46.0, "step": 395 }, { "epoch": 0.06682698392608531, "grad_norm": 0.0007277853437699378, "learning_rate": 0.0001668915452011394, "loss": 46.0, "step": 396 }, { "epoch": 0.06699573893599967, "grad_norm": 0.0010374211706221104, "learning_rate": 0.0001667300008910476, "loss": 46.0, "step": 397 }, { "epoch": 0.06716449394591402, "grad_norm": 0.0010681662242859602, "learning_rate": 0.000166568142035335, "loss": 46.0, "step": 398 }, { "epoch": 0.06733324895582837, "grad_norm": 0.0012568504316732287, "learning_rate": 0.00016640596939695673, "loss": 46.0, "step": 399 }, { "epoch": 0.06750200396574273, "grad_norm": 0.0010285298340022564, "learning_rate": 0.00016624348374034685, "loss": 46.0, "step": 400 }, { "epoch": 0.0676707589756571, "grad_norm": 0.0012999747414141893, "learning_rate": 0.0001660806858314151, "loss": 46.0, "step": 401 }, { "epoch": 0.06783951398557145, "grad_norm": 0.0010224470170214772, "learning_rate": 0.000165917576437543, "loss": 46.0, "step": 402 }, { "epoch": 0.0680082689954858, "grad_norm": 0.001064454554580152, "learning_rate": 0.00016575415632758027, "loss": 46.0, "step": 403 }, { "epoch": 0.06817702400540016, "grad_norm": 0.0012302878312766552, "learning_rate": 0.0001655904262718413, "loss": 46.0, "step": 404 }, { "epoch": 0.06834577901531452, "grad_norm": 0.0010292811784893274, "learning_rate": 0.00016542638704210153, "loss": 46.0, "step": 405 }, { "epoch": 0.06851453402522888, "grad_norm": 0.0017992197535932064, "learning_rate": 0.00016526203941159365, "loss": 46.0, "step": 406 }, { "epoch": 0.06868328903514323, "grad_norm": 0.0012390941847115755, "learning_rate": 0.00016509738415500412, "loss": 46.0, "step": 407 }, { "epoch": 0.06885204404505758, "grad_norm": 0.0012958311708644032, "learning_rate": 0.00016493242204846945, "loss": 46.0, "step": 408 }, { "epoch": 0.06902079905497195, "grad_norm": 0.0008734130533412099, "learning_rate": 0.00016476715386957256, "loss": 46.0, "step": 409 }, { "epoch": 0.0691895540648863, "grad_norm": 0.0009476335253566504, "learning_rate": 0.00016460158039733908, "loss": 46.0, "step": 410 }, { "epoch": 0.06935830907480066, "grad_norm": 0.0008190583321265876, "learning_rate": 0.0001644357024122337, "loss": 46.0, "step": 411 }, { "epoch": 0.06952706408471501, "grad_norm": 0.0010068168630823493, "learning_rate": 0.00016426952069615656, "loss": 46.0, "step": 412 }, { "epoch": 0.06969581909462937, "grad_norm": 0.0009863304439932108, "learning_rate": 0.00016410303603243943, "loss": 46.0, "step": 413 }, { "epoch": 0.06986457410454373, "grad_norm": 0.0013538243947550654, "learning_rate": 0.0001639362492058421, "loss": 46.0, "step": 414 }, { "epoch": 0.07003332911445809, "grad_norm": 0.001239404664374888, "learning_rate": 0.00016376916100254864, "loss": 46.0, "step": 415 }, { "epoch": 0.07020208412437244, "grad_norm": 0.0022674521896988153, "learning_rate": 0.0001636017722101638, "loss": 46.0, "step": 416 }, { "epoch": 0.0703708391342868, "grad_norm": 0.0006947132642380893, "learning_rate": 0.00016343408361770915, "loss": 46.0, "step": 417 }, { "epoch": 0.07053959414420116, "grad_norm": 0.0007783303153701127, "learning_rate": 0.00016326609601561952, "loss": 46.0, "step": 418 }, { "epoch": 0.07070834915411552, "grad_norm": 0.0016195345669984818, "learning_rate": 0.000163097810195739, "loss": 46.0, "step": 419 }, { "epoch": 0.07087710416402987, "grad_norm": 0.000915382755920291, "learning_rate": 0.00016292922695131755, "loss": 46.0, "step": 420 }, { "epoch": 0.07104585917394422, "grad_norm": 0.001205647480674088, "learning_rate": 0.00016276034707700713, "loss": 46.0, "step": 421 }, { "epoch": 0.07121461418385858, "grad_norm": 0.0012716164346784353, "learning_rate": 0.0001625911713688578, "loss": 46.0, "step": 422 }, { "epoch": 0.07138336919377294, "grad_norm": 0.0010019976180046797, "learning_rate": 0.0001624217006243141, "loss": 46.0, "step": 423 }, { "epoch": 0.0715521242036873, "grad_norm": 0.0013898113975301385, "learning_rate": 0.00016225193564221142, "loss": 46.0, "step": 424 }, { "epoch": 0.07172087921360165, "grad_norm": 0.0012531159445643425, "learning_rate": 0.000162081877222772, "loss": 46.0, "step": 425 }, { "epoch": 0.071889634223516, "grad_norm": 0.0008526226156391203, "learning_rate": 0.0001619115261676012, "loss": 46.0, "step": 426 }, { "epoch": 0.07205838923343037, "grad_norm": 0.0010567533317953348, "learning_rate": 0.00016174088327968394, "loss": 46.0, "step": 427 }, { "epoch": 0.07222714424334473, "grad_norm": 0.0010411246912553906, "learning_rate": 0.0001615699493633806, "loss": 46.0, "step": 428 }, { "epoch": 0.07239589925325908, "grad_norm": 0.001222698949277401, "learning_rate": 0.00016139872522442347, "loss": 46.0, "step": 429 }, { "epoch": 0.07256465426317343, "grad_norm": 0.0014631192898377776, "learning_rate": 0.00016122721166991286, "loss": 46.0, "step": 430 }, { "epoch": 0.0727334092730878, "grad_norm": 0.0016150050796568394, "learning_rate": 0.00016105540950831323, "loss": 46.0, "step": 431 }, { "epoch": 0.07290216428300215, "grad_norm": 0.0011149218771606684, "learning_rate": 0.00016088331954944952, "loss": 46.0, "step": 432 }, { "epoch": 0.07307091929291651, "grad_norm": 0.0016063664807006717, "learning_rate": 0.00016071094260450327, "loss": 46.0, "step": 433 }, { "epoch": 0.07323967430283086, "grad_norm": 0.0010584626579657197, "learning_rate": 0.00016053827948600872, "loss": 46.0, "step": 434 }, { "epoch": 0.07340842931274522, "grad_norm": 0.0011532072676345706, "learning_rate": 0.00016036533100784913, "loss": 46.0, "step": 435 }, { "epoch": 0.07357718432265958, "grad_norm": 0.0015770825557410717, "learning_rate": 0.00016019209798525278, "loss": 46.0, "step": 436 }, { "epoch": 0.07374593933257394, "grad_norm": 0.0012206478277221322, "learning_rate": 0.00016001858123478927, "loss": 46.0, "step": 437 }, { "epoch": 0.07391469434248829, "grad_norm": 0.0011760505149140954, "learning_rate": 0.00015984478157436558, "loss": 46.0, "step": 438 }, { "epoch": 0.07408344935240264, "grad_norm": 0.0006681337836198509, "learning_rate": 0.00015967069982322223, "loss": 46.0, "step": 439 }, { "epoch": 0.07425220436231701, "grad_norm": 0.0010028331307694316, "learning_rate": 0.0001594963368019295, "loss": 46.0, "step": 440 }, { "epoch": 0.07442095937223137, "grad_norm": 0.0014602706069126725, "learning_rate": 0.0001593216933323834, "loss": 46.0, "step": 441 }, { "epoch": 0.07458971438214572, "grad_norm": 0.00084131263429299, "learning_rate": 0.000159146770237802, "loss": 46.0, "step": 442 }, { "epoch": 0.07475846939206007, "grad_norm": 0.0013664717553183436, "learning_rate": 0.00015897156834272132, "loss": 46.0, "step": 443 }, { "epoch": 0.07492722440197444, "grad_norm": 0.0013984786346554756, "learning_rate": 0.00015879608847299163, "loss": 46.0, "step": 444 }, { "epoch": 0.0750959794118888, "grad_norm": 0.0006391989882104099, "learning_rate": 0.00015862033145577352, "loss": 46.0, "step": 445 }, { "epoch": 0.07526473442180315, "grad_norm": 0.0014059271197766066, "learning_rate": 0.00015844429811953393, "loss": 46.0, "step": 446 }, { "epoch": 0.0754334894317175, "grad_norm": 0.0005209510563872755, "learning_rate": 0.00015826798929404227, "loss": 46.0, "step": 447 }, { "epoch": 0.07560224444163186, "grad_norm": 0.0006487764185294509, "learning_rate": 0.00015809140581036658, "loss": 46.0, "step": 448 }, { "epoch": 0.07577099945154622, "grad_norm": 0.000935686519369483, "learning_rate": 0.0001579145485008695, "loss": 46.0, "step": 449 }, { "epoch": 0.07593975446146058, "grad_norm": 0.001003348152153194, "learning_rate": 0.00015773741819920446, "loss": 46.0, "step": 450 }, { "epoch": 0.07610850947137493, "grad_norm": 0.0011402338277548552, "learning_rate": 0.00015756001574031162, "loss": 46.0, "step": 451 }, { "epoch": 0.07627726448128928, "grad_norm": 0.0009165026131086051, "learning_rate": 0.00015738234196041408, "loss": 46.0, "step": 452 }, { "epoch": 0.07644601949120365, "grad_norm": 0.0012046222109347582, "learning_rate": 0.00015720439769701384, "loss": 46.0, "step": 453 }, { "epoch": 0.076614774501118, "grad_norm": 0.0013616887154057622, "learning_rate": 0.00015702618378888791, "loss": 46.0, "step": 454 }, { "epoch": 0.07678352951103236, "grad_norm": 0.0010609148303046823, "learning_rate": 0.00015684770107608428, "loss": 46.0, "step": 455 }, { "epoch": 0.07695228452094671, "grad_norm": 0.0010137903736904263, "learning_rate": 0.000156668950399918, "loss": 46.0, "step": 456 }, { "epoch": 0.07712103953086107, "grad_norm": 0.0008188852807506919, "learning_rate": 0.0001564899326029673, "loss": 46.0, "step": 457 }, { "epoch": 0.07728979454077543, "grad_norm": 0.001042909687384963, "learning_rate": 0.00015631064852906942, "loss": 46.0, "step": 458 }, { "epoch": 0.07745854955068979, "grad_norm": 0.001120585948228836, "learning_rate": 0.00015613109902331686, "loss": 46.0, "step": 459 }, { "epoch": 0.07762730456060414, "grad_norm": 0.0009330071043223143, "learning_rate": 0.00015595128493205325, "loss": 46.0, "step": 460 }, { "epoch": 0.0777960595705185, "grad_norm": 0.0011058711679652333, "learning_rate": 0.00015577120710286933, "loss": 46.0, "step": 461 }, { "epoch": 0.07796481458043286, "grad_norm": 0.0009043689933605492, "learning_rate": 0.00015559086638459917, "loss": 46.0, "step": 462 }, { "epoch": 0.07813356959034722, "grad_norm": 0.0009576305164955556, "learning_rate": 0.00015541026362731586, "loss": 46.0, "step": 463 }, { "epoch": 0.07830232460026157, "grad_norm": 0.0006179432384669781, "learning_rate": 0.0001552293996823278, "loss": 46.0, "step": 464 }, { "epoch": 0.07847107961017592, "grad_norm": 0.0009595199953764677, "learning_rate": 0.00015504827540217445, "loss": 46.0, "step": 465 }, { "epoch": 0.07863983462009029, "grad_norm": 0.0005762246437370777, "learning_rate": 0.00015486689164062248, "loss": 46.0, "step": 466 }, { "epoch": 0.07880858963000464, "grad_norm": 0.0010678599355742335, "learning_rate": 0.0001546852492526617, "loss": 46.0, "step": 467 }, { "epoch": 0.078977344639919, "grad_norm": 0.001243466162122786, "learning_rate": 0.00015450334909450087, "loss": 46.0, "step": 468 }, { "epoch": 0.07914609964983335, "grad_norm": 0.0010546196717768908, "learning_rate": 0.00015432119202356398, "loss": 46.0, "step": 469 }, { "epoch": 0.0793148546597477, "grad_norm": 0.0006426689797081053, "learning_rate": 0.0001541387788984859, "loss": 46.0, "step": 470 }, { "epoch": 0.07948360966966207, "grad_norm": 0.0011058398522436619, "learning_rate": 0.00015395611057910852, "loss": 46.0, "step": 471 }, { "epoch": 0.07965236467957643, "grad_norm": 0.0012805294245481491, "learning_rate": 0.0001537731879264767, "loss": 46.0, "step": 472 }, { "epoch": 0.07982111968949078, "grad_norm": 0.001057624351233244, "learning_rate": 0.000153590011802834, "loss": 46.0, "step": 473 }, { "epoch": 0.07998987469940513, "grad_norm": 0.0009916762355715036, "learning_rate": 0.00015340658307161885, "loss": 46.0, "step": 474 }, { "epoch": 0.0801586297093195, "grad_norm": 0.0011284681968390942, "learning_rate": 0.0001532229025974605, "loss": 46.0, "step": 475 }, { "epoch": 0.08032738471923385, "grad_norm": 0.0010007736273109913, "learning_rate": 0.00015303897124617467, "loss": 46.0, "step": 476 }, { "epoch": 0.08049613972914821, "grad_norm": 0.001305941492319107, "learning_rate": 0.00015285478988475972, "loss": 46.0, "step": 477 }, { "epoch": 0.08066489473906256, "grad_norm": 0.0006156846648082137, "learning_rate": 0.00015267035938139252, "loss": 46.0, "step": 478 }, { "epoch": 0.08083364974897693, "grad_norm": 0.0009203223744407296, "learning_rate": 0.00015248568060542423, "loss": 46.0, "step": 479 }, { "epoch": 0.08100240475889128, "grad_norm": 0.0008335929596796632, "learning_rate": 0.00015230075442737638, "loss": 46.0, "step": 480 }, { "epoch": 0.08117115976880564, "grad_norm": 0.001341037219390273, "learning_rate": 0.00015211558171893664, "loss": 46.0, "step": 481 }, { "epoch": 0.08133991477871999, "grad_norm": 0.0010233522625640035, "learning_rate": 0.00015193016335295477, "loss": 46.0, "step": 482 }, { "epoch": 0.08150866978863434, "grad_norm": 0.000823578389827162, "learning_rate": 0.00015174450020343842, "loss": 46.0, "step": 483 }, { "epoch": 0.08167742479854871, "grad_norm": 0.00170811521820724, "learning_rate": 0.00015155859314554924, "loss": 46.0, "step": 484 }, { "epoch": 0.08184617980846307, "grad_norm": 0.0008304553339257836, "learning_rate": 0.00015137244305559838, "loss": 46.0, "step": 485 }, { "epoch": 0.08201493481837742, "grad_norm": 0.0009338143863715231, "learning_rate": 0.00015118605081104275, "loss": 46.0, "step": 486 }, { "epoch": 0.08218368982829177, "grad_norm": 0.0007202426786534488, "learning_rate": 0.0001509994172904806, "loss": 46.0, "step": 487 }, { "epoch": 0.08235244483820614, "grad_norm": 0.0007982858805917203, "learning_rate": 0.00015081254337364754, "loss": 46.0, "step": 488 }, { "epoch": 0.0825211998481205, "grad_norm": 0.0007698725094087422, "learning_rate": 0.00015062542994141232, "loss": 46.0, "step": 489 }, { "epoch": 0.08268995485803485, "grad_norm": 0.0010482663055881858, "learning_rate": 0.00015043807787577262, "loss": 46.0, "step": 490 }, { "epoch": 0.0828587098679492, "grad_norm": 0.0014217188581824303, "learning_rate": 0.00015025048805985113, "loss": 46.0, "step": 491 }, { "epoch": 0.08302746487786355, "grad_norm": 0.0014352472499012947, "learning_rate": 0.00015006266137789108, "loss": 46.0, "step": 492 }, { "epoch": 0.08319621988777792, "grad_norm": 0.001194497337564826, "learning_rate": 0.0001498745987152523, "loss": 46.0, "step": 493 }, { "epoch": 0.08336497489769228, "grad_norm": 0.0008624936453998089, "learning_rate": 0.0001496863009584069, "loss": 46.0, "step": 494 }, { "epoch": 0.08353372990760663, "grad_norm": 0.001185000641271472, "learning_rate": 0.00014949776899493523, "loss": 46.0, "step": 495 }, { "epoch": 0.08370248491752098, "grad_norm": 0.0006659825448878109, "learning_rate": 0.00014930900371352157, "loss": 46.0, "step": 496 }, { "epoch": 0.08387123992743535, "grad_norm": 0.0014636954292654991, "learning_rate": 0.00014912000600394995, "loss": 46.0, "step": 497 }, { "epoch": 0.0840399949373497, "grad_norm": 0.0007161729736253619, "learning_rate": 0.00014893077675710013, "loss": 46.0, "step": 498 }, { "epoch": 0.08420874994726406, "grad_norm": 0.0008941322448663414, "learning_rate": 0.00014874131686494314, "loss": 46.0, "step": 499 }, { "epoch": 0.08437750495717841, "grad_norm": 0.0007309920038096607, "learning_rate": 0.00014855162722053725, "loss": 46.0, "step": 500 }, { "epoch": 0.08454625996709278, "grad_norm": 0.0008992912480607629, "learning_rate": 0.00014836170871802379, "loss": 46.0, "step": 501 }, { "epoch": 0.08471501497700713, "grad_norm": 0.001996345119550824, "learning_rate": 0.0001481715622526227, "loss": 46.0, "step": 502 }, { "epoch": 0.08488376998692149, "grad_norm": 0.001881232368759811, "learning_rate": 0.00014798118872062855, "loss": 46.0, "step": 503 }, { "epoch": 0.08505252499683584, "grad_norm": 0.00150417466647923, "learning_rate": 0.00014779058901940635, "loss": 46.0, "step": 504 }, { "epoch": 0.0852212800067502, "grad_norm": 0.0018176480662077665, "learning_rate": 0.000147599764047387, "loss": 46.0, "step": 505 }, { "epoch": 0.08539003501666456, "grad_norm": 0.0010128653375431895, "learning_rate": 0.00014740871470406342, "loss": 46.0, "step": 506 }, { "epoch": 0.08555879002657892, "grad_norm": 0.0005888533196412027, "learning_rate": 0.00014721744188998605, "loss": 46.0, "step": 507 }, { "epoch": 0.08572754503649327, "grad_norm": 0.0010933991288766265, "learning_rate": 0.00014702594650675872, "loss": 46.0, "step": 508 }, { "epoch": 0.08589630004640762, "grad_norm": 0.0009146520169451833, "learning_rate": 0.00014683422945703452, "loss": 46.0, "step": 509 }, { "epoch": 0.08606505505632199, "grad_norm": 0.0010995094198733568, "learning_rate": 0.0001466422916445112, "loss": 46.0, "step": 510 }, { "epoch": 0.08623381006623634, "grad_norm": 0.001196429249830544, "learning_rate": 0.00014645013397392723, "loss": 46.0, "step": 511 }, { "epoch": 0.0864025650761507, "grad_norm": 0.0016038153553381562, "learning_rate": 0.00014625775735105744, "loss": 46.0, "step": 512 }, { "epoch": 0.08657132008606505, "grad_norm": 0.00237817014567554, "learning_rate": 0.00014606516268270874, "loss": 46.0, "step": 513 }, { "epoch": 0.08674007509597942, "grad_norm": 0.0008629761869087815, "learning_rate": 0.00014587235087671577, "loss": 46.0, "step": 514 }, { "epoch": 0.08690883010589377, "grad_norm": 0.0009417358669452369, "learning_rate": 0.0001456793228419368, "loss": 46.0, "step": 515 }, { "epoch": 0.08707758511580813, "grad_norm": 0.0004477721522562206, "learning_rate": 0.0001454860794882492, "loss": 46.0, "step": 516 }, { "epoch": 0.08724634012572248, "grad_norm": 0.001148428302258253, "learning_rate": 0.00014529262172654545, "loss": 46.0, "step": 517 }, { "epoch": 0.08741509513563683, "grad_norm": 0.001068097772076726, "learning_rate": 0.00014509895046872854, "loss": 46.0, "step": 518 }, { "epoch": 0.0875838501455512, "grad_norm": 0.0017000689404085279, "learning_rate": 0.00014490506662770796, "loss": 46.0, "step": 519 }, { "epoch": 0.08775260515546555, "grad_norm": 0.0009183180518448353, "learning_rate": 0.0001447109711173951, "loss": 46.0, "step": 520 }, { "epoch": 0.08792136016537991, "grad_norm": 0.0010085629764944315, "learning_rate": 0.0001445166648526992, "loss": 46.0, "step": 521 }, { "epoch": 0.08809011517529426, "grad_norm": 0.0015139034949243069, "learning_rate": 0.00014432214874952296, "loss": 46.0, "step": 522 }, { "epoch": 0.08825887018520863, "grad_norm": 0.001635671011172235, "learning_rate": 0.00014412742372475808, "loss": 46.0, "step": 523 }, { "epoch": 0.08842762519512298, "grad_norm": 0.0006598219624720514, "learning_rate": 0.00014393249069628112, "loss": 46.0, "step": 524 }, { "epoch": 0.08859638020503734, "grad_norm": 0.0005806029075756669, "learning_rate": 0.00014373735058294918, "loss": 46.0, "step": 525 }, { "epoch": 0.08876513521495169, "grad_norm": 0.0013370051747187972, "learning_rate": 0.00014354200430459537, "loss": 46.0, "step": 526 }, { "epoch": 0.08893389022486604, "grad_norm": 0.001640844508074224, "learning_rate": 0.0001433464527820247, "loss": 46.0, "step": 527 }, { "epoch": 0.08910264523478041, "grad_norm": 0.0014307728270068765, "learning_rate": 0.00014315069693700955, "loss": 46.0, "step": 528 }, { "epoch": 0.08927140024469477, "grad_norm": 0.0017885541310533881, "learning_rate": 0.00014295473769228547, "loss": 46.0, "step": 529 }, { "epoch": 0.08944015525460912, "grad_norm": 0.0008539075497537851, "learning_rate": 0.00014275857597154684, "loss": 46.0, "step": 530 }, { "epoch": 0.08960891026452347, "grad_norm": 0.0015051423106342554, "learning_rate": 0.0001425622126994423, "loss": 46.0, "step": 531 }, { "epoch": 0.08977766527443784, "grad_norm": 0.0010912073776125908, "learning_rate": 0.0001423656488015707, "loss": 46.0, "step": 532 }, { "epoch": 0.0899464202843522, "grad_norm": 0.002204689895734191, "learning_rate": 0.00014216888520447648, "loss": 46.0, "step": 533 }, { "epoch": 0.09011517529426655, "grad_norm": 0.0009545715292915702, "learning_rate": 0.00014197192283564542, "loss": 46.0, "step": 534 }, { "epoch": 0.0902839303041809, "grad_norm": 0.0008314741426147521, "learning_rate": 0.00014177476262350027, "loss": 46.0, "step": 535 }, { "epoch": 0.09045268531409527, "grad_norm": 0.0015365943545475602, "learning_rate": 0.00014157740549739637, "loss": 46.0, "step": 536 }, { "epoch": 0.09062144032400962, "grad_norm": 0.0020499713718891144, "learning_rate": 0.00014137985238761718, "loss": 46.0, "step": 537 }, { "epoch": 0.09079019533392398, "grad_norm": 0.0008071595220826566, "learning_rate": 0.0001411821042253701, "loss": 46.0, "step": 538 }, { "epoch": 0.09095895034383833, "grad_norm": 0.0009972534608095884, "learning_rate": 0.0001409841619427818, "loss": 46.0, "step": 539 }, { "epoch": 0.09112770535375268, "grad_norm": 0.000979638658463955, "learning_rate": 0.00014078602647289414, "loss": 46.0, "step": 540 }, { "epoch": 0.09129646036366705, "grad_norm": 0.0021402165293693542, "learning_rate": 0.00014058769874965944, "loss": 46.0, "step": 541 }, { "epoch": 0.0914652153735814, "grad_norm": 0.001012645079754293, "learning_rate": 0.00014038917970793634, "loss": 46.0, "step": 542 }, { "epoch": 0.09163397038349576, "grad_norm": 0.0011463590199127793, "learning_rate": 0.0001401904702834853, "loss": 46.0, "step": 543 }, { "epoch": 0.09180272539341011, "grad_norm": 0.0007347980281338096, "learning_rate": 0.00013999157141296418, "loss": 46.0, "step": 544 }, { "epoch": 0.09197148040332448, "grad_norm": 0.001035605208016932, "learning_rate": 0.0001397924840339238, "loss": 46.0, "step": 545 }, { "epoch": 0.09214023541323883, "grad_norm": 0.0019607653375715017, "learning_rate": 0.0001395932090848036, "loss": 46.0, "step": 546 }, { "epoch": 0.09230899042315319, "grad_norm": 0.0016435689758509398, "learning_rate": 0.0001393937475049271, "loss": 46.0, "step": 547 }, { "epoch": 0.09247774543306754, "grad_norm": 0.0011168678756803274, "learning_rate": 0.00013919410023449769, "loss": 46.0, "step": 548 }, { "epoch": 0.09264650044298191, "grad_norm": 0.000912736461032182, "learning_rate": 0.00013899426821459385, "loss": 46.0, "step": 549 }, { "epoch": 0.09281525545289626, "grad_norm": 0.0017240199958905578, "learning_rate": 0.00013879425238716504, "loss": 46.0, "step": 550 }, { "epoch": 0.09298401046281062, "grad_norm": 0.00043686252320185304, "learning_rate": 0.00013859405369502718, "loss": 46.0, "step": 551 }, { "epoch": 0.09315276547272497, "grad_norm": 0.0015362072736024857, "learning_rate": 0.000138393673081858, "loss": 46.0, "step": 552 }, { "epoch": 0.09332152048263932, "grad_norm": 0.0010787771316245198, "learning_rate": 0.0001381931114921929, "loss": 46.0, "step": 553 }, { "epoch": 0.09349027549255369, "grad_norm": 0.0015809914330020547, "learning_rate": 0.00013799236987142028, "loss": 46.0, "step": 554 }, { "epoch": 0.09365903050246804, "grad_norm": 0.0007688051555305719, "learning_rate": 0.00013779144916577717, "loss": 46.0, "step": 555 }, { "epoch": 0.0938277855123824, "grad_norm": 0.0016322305891662836, "learning_rate": 0.00013759035032234474, "loss": 46.0, "step": 556 }, { "epoch": 0.09399654052229675, "grad_norm": 0.0008620574371889234, "learning_rate": 0.00013738907428904388, "loss": 46.0, "step": 557 }, { "epoch": 0.09416529553221112, "grad_norm": 0.0013546262634918094, "learning_rate": 0.00013718762201463068, "loss": 46.0, "step": 558 }, { "epoch": 0.09433405054212547, "grad_norm": 0.0010162482503801584, "learning_rate": 0.000136985994448692, "loss": 46.0, "step": 559 }, { "epoch": 0.09450280555203983, "grad_norm": 0.0009128018864430487, "learning_rate": 0.00013678419254164084, "loss": 46.0, "step": 560 }, { "epoch": 0.09467156056195418, "grad_norm": 0.001009635510854423, "learning_rate": 0.00013658221724471227, "loss": 46.0, "step": 561 }, { "epoch": 0.09484031557186853, "grad_norm": 0.001470302464440465, "learning_rate": 0.0001363800695099584, "loss": 46.0, "step": 562 }, { "epoch": 0.0950090705817829, "grad_norm": 0.0010980793740600348, "learning_rate": 0.0001361777502902443, "loss": 46.0, "step": 563 }, { "epoch": 0.09517782559169725, "grad_norm": 0.0007694049854762852, "learning_rate": 0.00013597526053924335, "loss": 46.0, "step": 564 }, { "epoch": 0.09534658060161161, "grad_norm": 0.0008336540777236223, "learning_rate": 0.00013577260121143268, "loss": 46.0, "step": 565 }, { "epoch": 0.09551533561152596, "grad_norm": 0.0015536536229774356, "learning_rate": 0.0001355697732620889, "loss": 46.0, "step": 566 }, { "epoch": 0.09568409062144033, "grad_norm": 0.001046131830662489, "learning_rate": 0.00013536677764728336, "loss": 46.0, "step": 567 }, { "epoch": 0.09585284563135468, "grad_norm": 0.0007584646809846163, "learning_rate": 0.00013516361532387773, "loss": 46.0, "step": 568 }, { "epoch": 0.09602160064126904, "grad_norm": 0.0009031200897879899, "learning_rate": 0.00013496028724951958, "loss": 46.0, "step": 569 }, { "epoch": 0.09619035565118339, "grad_norm": 0.00145262002479285, "learning_rate": 0.00013475679438263764, "loss": 46.0, "step": 570 }, { "epoch": 0.09635911066109776, "grad_norm": 0.0011636154958978295, "learning_rate": 0.00013455313768243756, "loss": 46.0, "step": 571 }, { "epoch": 0.09652786567101211, "grad_norm": 0.0007297865231521428, "learning_rate": 0.00013434931810889719, "loss": 46.0, "step": 572 }, { "epoch": 0.09669662068092647, "grad_norm": 0.0012997461017221212, "learning_rate": 0.00013414533662276207, "loss": 46.0, "step": 573 }, { "epoch": 0.09686537569084082, "grad_norm": 0.0010920744389295578, "learning_rate": 0.00013394119418554107, "loss": 46.0, "step": 574 }, { "epoch": 0.09703413070075517, "grad_norm": 0.0009875032119452953, "learning_rate": 0.00013373689175950163, "loss": 46.0, "step": 575 }, { "epoch": 0.09720288571066954, "grad_norm": 0.0005962883587926626, "learning_rate": 0.0001335324303076654, "loss": 46.0, "step": 576 }, { "epoch": 0.0973716407205839, "grad_norm": 0.0010888517135754228, "learning_rate": 0.00013332781079380358, "loss": 46.0, "step": 577 }, { "epoch": 0.09754039573049825, "grad_norm": 0.0007129855803214014, "learning_rate": 0.0001331230341824324, "loss": 46.0, "step": 578 }, { "epoch": 0.0977091507404126, "grad_norm": 0.000518136948812753, "learning_rate": 0.0001329181014388087, "loss": 46.0, "step": 579 }, { "epoch": 0.09787790575032697, "grad_norm": 0.0012831081403419375, "learning_rate": 0.00013271301352892528, "loss": 46.0, "step": 580 }, { "epoch": 0.09804666076024132, "grad_norm": 0.0007919691852293909, "learning_rate": 0.00013250777141950618, "loss": 46.0, "step": 581 }, { "epoch": 0.09821541577015568, "grad_norm": 0.001164072658866644, "learning_rate": 0.0001323023760780025, "loss": 46.0, "step": 582 }, { "epoch": 0.09838417078007003, "grad_norm": 0.0008439416997134686, "learning_rate": 0.00013209682847258752, "loss": 46.0, "step": 583 }, { "epoch": 0.0985529257899844, "grad_norm": 0.0015564275672659278, "learning_rate": 0.00013189112957215227, "loss": 46.0, "step": 584 }, { "epoch": 0.09872168079989875, "grad_norm": 0.001126340590417385, "learning_rate": 0.0001316852803463009, "loss": 46.0, "step": 585 }, { "epoch": 0.0988904358098131, "grad_norm": 0.00132976402528584, "learning_rate": 0.0001314792817653462, "loss": 46.0, "step": 586 }, { "epoch": 0.09905919081972746, "grad_norm": 0.0006973096751607955, "learning_rate": 0.000131273134800305, "loss": 46.0, "step": 587 }, { "epoch": 0.09922794582964181, "grad_norm": 0.0008691195980645716, "learning_rate": 0.00013106684042289352, "loss": 46.0, "step": 588 }, { "epoch": 0.09939670083955618, "grad_norm": 0.0007818661979399621, "learning_rate": 0.00013086039960552283, "loss": 46.0, "step": 589 }, { "epoch": 0.09956545584947053, "grad_norm": 0.0013533838791772723, "learning_rate": 0.00013065381332129434, "loss": 46.0, "step": 590 }, { "epoch": 0.09973421085938489, "grad_norm": 0.0008758413605391979, "learning_rate": 0.00013044708254399503, "loss": 46.0, "step": 591 }, { "epoch": 0.09990296586929924, "grad_norm": 0.001231315778568387, "learning_rate": 0.00013024020824809317, "loss": 46.0, "step": 592 }, { "epoch": 0.10007172087921361, "grad_norm": 0.0004609159368555993, "learning_rate": 0.00013003319140873328, "loss": 46.0, "step": 593 }, { "epoch": 0.10024047588912796, "grad_norm": 0.0009504602057859302, "learning_rate": 0.000129826033001732, "loss": 46.0, "step": 594 }, { "epoch": 0.10040923089904231, "grad_norm": 0.001184243243187666, "learning_rate": 0.0001296187340035732, "loss": 46.0, "step": 595 }, { "epoch": 0.10057798590895667, "grad_norm": 0.0015000102575868368, "learning_rate": 0.00012941129539140346, "loss": 46.0, "step": 596 }, { "epoch": 0.10074674091887102, "grad_norm": 0.0008899506065063179, "learning_rate": 0.00012920371814302743, "loss": 46.0, "step": 597 }, { "epoch": 0.10091549592878539, "grad_norm": 0.0013121910160407424, "learning_rate": 0.00012899600323690332, "loss": 46.0, "step": 598 }, { "epoch": 0.10108425093869974, "grad_norm": 0.0006298114312812686, "learning_rate": 0.00012878815165213813, "loss": 46.0, "step": 599 }, { "epoch": 0.1012530059486141, "grad_norm": 0.001439255429431796, "learning_rate": 0.00012858016436848323, "loss": 46.0, "step": 600 }, { "epoch": 0.10142176095852845, "grad_norm": 0.001775096170604229, "learning_rate": 0.00012837204236632952, "loss": 46.0, "step": 601 }, { "epoch": 0.10159051596844282, "grad_norm": 0.0008805092074908316, "learning_rate": 0.000128163786626703, "loss": 46.0, "step": 602 }, { "epoch": 0.10175927097835717, "grad_norm": 0.0007393642445094883, "learning_rate": 0.00012795539813126005, "loss": 46.0, "step": 603 }, { "epoch": 0.10192802598827153, "grad_norm": 0.0013515629107132554, "learning_rate": 0.0001277468778622828, "loss": 46.0, "step": 604 }, { "epoch": 0.10209678099818588, "grad_norm": 0.0010095036122947931, "learning_rate": 0.00012753822680267458, "loss": 46.0, "step": 605 }, { "epoch": 0.10226553600810025, "grad_norm": 0.001216367818415165, "learning_rate": 0.00012732944593595515, "loss": 46.0, "step": 606 }, { "epoch": 0.1024342910180146, "grad_norm": 0.0010075365426018834, "learning_rate": 0.00012712053624625618, "loss": 46.0, "step": 607 }, { "epoch": 0.10260304602792895, "grad_norm": 0.00094554980751127, "learning_rate": 0.0001269114987183166, "loss": 46.0, "step": 608 }, { "epoch": 0.10277180103784331, "grad_norm": 0.0011870136950165033, "learning_rate": 0.0001267023343374779, "loss": 46.0, "step": 609 }, { "epoch": 0.10294055604775766, "grad_norm": 0.00031608124845661223, "learning_rate": 0.00012649304408967954, "loss": 46.0, "step": 610 }, { "epoch": 0.10310931105767203, "grad_norm": 0.0017526588635519147, "learning_rate": 0.00012628362896145422, "loss": 46.0, "step": 611 }, { "epoch": 0.10327806606758638, "grad_norm": 0.0016203138511627913, "learning_rate": 0.00012607408993992335, "loss": 46.0, "step": 612 }, { "epoch": 0.10344682107750074, "grad_norm": 0.0017116809030994773, "learning_rate": 0.0001258644280127924, "loss": 46.0, "step": 613 }, { "epoch": 0.10361557608741509, "grad_norm": 0.0010332480305805802, "learning_rate": 0.00012565464416834597, "loss": 46.0, "step": 614 }, { "epoch": 0.10378433109732946, "grad_norm": 0.0016215546056628227, "learning_rate": 0.00012544473939544355, "loss": 46.0, "step": 615 }, { "epoch": 0.10395308610724381, "grad_norm": 0.0011419228976592422, "learning_rate": 0.00012523471468351457, "loss": 46.0, "step": 616 }, { "epoch": 0.10412184111715816, "grad_norm": 0.001963686663657427, "learning_rate": 0.00012502457102255381, "loss": 46.0, "step": 617 }, { "epoch": 0.10429059612707252, "grad_norm": 0.0018488739151507616, "learning_rate": 0.0001248143094031168, "loss": 46.0, "step": 618 }, { "epoch": 0.10445935113698689, "grad_norm": 0.0009021925507113338, "learning_rate": 0.00012460393081631494, "loss": 46.0, "step": 619 }, { "epoch": 0.10462810614690124, "grad_norm": 0.000811565259937197, "learning_rate": 0.00012439343625381117, "loss": 46.0, "step": 620 }, { "epoch": 0.1047968611568156, "grad_norm": 0.0009416831890121102, "learning_rate": 0.00012418282670781502, "loss": 46.0, "step": 621 }, { "epoch": 0.10496561616672995, "grad_norm": 0.0011268195230513811, "learning_rate": 0.000123972103171078, "loss": 46.0, "step": 622 }, { "epoch": 0.1051343711766443, "grad_norm": 0.0008039239328354597, "learning_rate": 0.00012376126663688893, "loss": 46.0, "step": 623 }, { "epoch": 0.10530312618655867, "grad_norm": 0.001955381128937006, "learning_rate": 0.00012355031809906935, "loss": 46.0, "step": 624 }, { "epoch": 0.10547188119647302, "grad_norm": 0.0006566803785972297, "learning_rate": 0.00012333925855196863, "loss": 46.0, "step": 625 }, { "epoch": 0.10564063620638738, "grad_norm": 0.0007125309784896672, "learning_rate": 0.00012312808899045955, "loss": 46.0, "step": 626 }, { "epoch": 0.10580939121630173, "grad_norm": 0.0008565658936277032, "learning_rate": 0.00012291681040993332, "loss": 46.0, "step": 627 }, { "epoch": 0.1059781462262161, "grad_norm": 0.0012913001701235771, "learning_rate": 0.00012270542380629515, "loss": 46.0, "step": 628 }, { "epoch": 0.10614690123613045, "grad_norm": 0.0005761163774877787, "learning_rate": 0.00012249393017595936, "loss": 46.0, "step": 629 }, { "epoch": 0.1063156562460448, "grad_norm": 0.0014059852110221982, "learning_rate": 0.0001222823305158448, "loss": 46.0, "step": 630 }, { "epoch": 0.10648441125595916, "grad_norm": 0.0009610773995518684, "learning_rate": 0.0001220706258233701, "loss": 46.0, "step": 631 }, { "epoch": 0.10665316626587351, "grad_norm": 0.00141863152384758, "learning_rate": 0.000121858817096449, "loss": 46.0, "step": 632 }, { "epoch": 0.10682192127578788, "grad_norm": 0.0008159265271387994, "learning_rate": 0.00012164690533348557, "loss": 46.0, "step": 633 }, { "epoch": 0.10699067628570223, "grad_norm": 0.0004636669473256916, "learning_rate": 0.00012143489153336972, "loss": 46.0, "step": 634 }, { "epoch": 0.10715943129561659, "grad_norm": 0.002586417132988572, "learning_rate": 0.00012122277669547208, "loss": 46.0, "step": 635 }, { "epoch": 0.10732818630553094, "grad_norm": 0.0007418668828904629, "learning_rate": 0.00012101056181963977, "loss": 46.0, "step": 636 }, { "epoch": 0.10749694131544531, "grad_norm": 0.0007878371980041265, "learning_rate": 0.00012079824790619136, "loss": 46.0, "step": 637 }, { "epoch": 0.10766569632535966, "grad_norm": 0.0008747411775402725, "learning_rate": 0.00012058583595591227, "loss": 46.0, "step": 638 }, { "epoch": 0.10783445133527401, "grad_norm": 0.0004629637114703655, "learning_rate": 0.00012037332697004999, "loss": 46.0, "step": 639 }, { "epoch": 0.10800320634518837, "grad_norm": 0.0013667724560946226, "learning_rate": 0.0001201607219503095, "loss": 46.0, "step": 640 }, { "epoch": 0.10817196135510274, "grad_norm": 0.000639776058960706, "learning_rate": 0.00011994802189884833, "loss": 46.0, "step": 641 }, { "epoch": 0.10834071636501709, "grad_norm": 0.002615303033962846, "learning_rate": 0.00011973522781827217, "loss": 46.0, "step": 642 }, { "epoch": 0.10850947137493144, "grad_norm": 0.0006569072720594704, "learning_rate": 0.00011952234071162967, "loss": 46.0, "step": 643 }, { "epoch": 0.1086782263848458, "grad_norm": 0.0020576249808073044, "learning_rate": 0.0001193093615824082, "loss": 46.0, "step": 644 }, { "epoch": 0.10884698139476015, "grad_norm": 0.0006775553338229656, "learning_rate": 0.00011909629143452876, "loss": 46.0, "step": 645 }, { "epoch": 0.10901573640467452, "grad_norm": 0.0010402743937447667, "learning_rate": 0.00011888313127234145, "loss": 46.0, "step": 646 }, { "epoch": 0.10918449141458887, "grad_norm": 0.0009751408942975104, "learning_rate": 0.00011866988210062064, "loss": 46.0, "step": 647 }, { "epoch": 0.10935324642450323, "grad_norm": 0.0006023211753927171, "learning_rate": 0.00011845654492456031, "loss": 46.0, "step": 648 }, { "epoch": 0.10952200143441758, "grad_norm": 0.002933788113296032, "learning_rate": 0.00011824312074976919, "loss": 46.0, "step": 649 }, { "epoch": 0.10969075644433195, "grad_norm": 0.00047651128261350095, "learning_rate": 0.0001180296105822662, "loss": 46.0, "step": 650 }, { "epoch": 0.1098595114542463, "grad_norm": 0.0009603831567801535, "learning_rate": 0.00011781601542847548, "loss": 46.0, "step": 651 }, { "epoch": 0.11002826646416065, "grad_norm": 0.0006936375284567475, "learning_rate": 0.0001176023362952219, "loss": 46.0, "step": 652 }, { "epoch": 0.11019702147407501, "grad_norm": 0.0005456854123622179, "learning_rate": 0.00011738857418972609, "loss": 46.0, "step": 653 }, { "epoch": 0.11036577648398938, "grad_norm": 0.0012621437199413776, "learning_rate": 0.00011717473011959979, "loss": 46.0, "step": 654 }, { "epoch": 0.11053453149390373, "grad_norm": 0.0023074436467140913, "learning_rate": 0.0001169608050928412, "loss": 46.0, "step": 655 }, { "epoch": 0.11070328650381808, "grad_norm": 0.0011624132748693228, "learning_rate": 0.00011674680011782997, "loss": 46.0, "step": 656 }, { "epoch": 0.11087204151373244, "grad_norm": 0.0004293357487767935, "learning_rate": 0.00011653271620332274, "loss": 46.0, "step": 657 }, { "epoch": 0.11104079652364679, "grad_norm": 0.002472059801220894, "learning_rate": 0.00011631855435844816, "loss": 46.0, "step": 658 }, { "epoch": 0.11120955153356116, "grad_norm": 0.0008103114669211209, "learning_rate": 0.00011610431559270228, "loss": 46.0, "step": 659 }, { "epoch": 0.11137830654347551, "grad_norm": 0.0008554041851311922, "learning_rate": 0.00011589000091594369, "loss": 46.0, "step": 660 }, { "epoch": 0.11154706155338986, "grad_norm": 0.0012236748589202762, "learning_rate": 0.00011567561133838877, "loss": 46.0, "step": 661 }, { "epoch": 0.11171581656330422, "grad_norm": 0.0010636880761012435, "learning_rate": 0.00011546114787060709, "loss": 46.0, "step": 662 }, { "epoch": 0.11188457157321859, "grad_norm": 0.001079093781299889, "learning_rate": 0.00011524661152351641, "loss": 46.0, "step": 663 }, { "epoch": 0.11205332658313294, "grad_norm": 0.0013255566591396928, "learning_rate": 0.000115032003308378, "loss": 46.0, "step": 664 }, { "epoch": 0.1122220815930473, "grad_norm": 0.0007876198505982757, "learning_rate": 0.00011481732423679197, "loss": 46.0, "step": 665 }, { "epoch": 0.11239083660296165, "grad_norm": 0.0007665604935027659, "learning_rate": 0.00011460257532069241, "loss": 46.0, "step": 666 }, { "epoch": 0.112559591612876, "grad_norm": 0.000991009990684688, "learning_rate": 0.00011438775757234261, "loss": 46.0, "step": 667 }, { "epoch": 0.11272834662279037, "grad_norm": 0.0008777152397669852, "learning_rate": 0.00011417287200433033, "loss": 46.0, "step": 668 }, { "epoch": 0.11289710163270472, "grad_norm": 0.0004085569526068866, "learning_rate": 0.00011395791962956299, "loss": 46.0, "step": 669 }, { "epoch": 0.11306585664261908, "grad_norm": 0.0023701228201389313, "learning_rate": 0.00011374290146126299, "loss": 46.0, "step": 670 }, { "epoch": 0.11323461165253343, "grad_norm": 0.0006865113973617554, "learning_rate": 0.00011352781851296277, "loss": 46.0, "step": 671 }, { "epoch": 0.1134033666624478, "grad_norm": 0.0007230490446090698, "learning_rate": 0.00011331267179850016, "loss": 46.0, "step": 672 }, { "epoch": 0.11357212167236215, "grad_norm": 0.0006796296802349389, "learning_rate": 0.00011309746233201357, "loss": 46.0, "step": 673 }, { "epoch": 0.1137408766822765, "grad_norm": 0.0009500381420366466, "learning_rate": 0.00011288219112793722, "loss": 46.0, "step": 674 }, { "epoch": 0.11390963169219086, "grad_norm": 0.0005749015836045146, "learning_rate": 0.00011266685920099632, "loss": 46.0, "step": 675 }, { "epoch": 0.11407838670210523, "grad_norm": 0.0008176440605893731, "learning_rate": 0.00011245146756620233, "loss": 46.0, "step": 676 }, { "epoch": 0.11424714171201958, "grad_norm": 0.0012956701684743166, "learning_rate": 0.00011223601723884807, "loss": 46.0, "step": 677 }, { "epoch": 0.11441589672193393, "grad_norm": 0.0012071337550878525, "learning_rate": 0.00011202050923450317, "loss": 46.0, "step": 678 }, { "epoch": 0.11458465173184829, "grad_norm": 0.0008238774607889354, "learning_rate": 0.00011180494456900903, "loss": 46.0, "step": 679 }, { "epoch": 0.11475340674176264, "grad_norm": 0.000518804183229804, "learning_rate": 0.00011158932425847415, "loss": 46.0, "step": 680 }, { "epoch": 0.11492216175167701, "grad_norm": 0.0009683236130513251, "learning_rate": 0.00011137364931926932, "loss": 46.0, "step": 681 }, { "epoch": 0.11509091676159136, "grad_norm": 0.0006015019025653601, "learning_rate": 0.00011115792076802286, "loss": 46.0, "step": 682 }, { "epoch": 0.11525967177150571, "grad_norm": 0.0009642652003094554, "learning_rate": 0.00011094213962161576, "loss": 46.0, "step": 683 }, { "epoch": 0.11542842678142007, "grad_norm": 0.0013890763511881232, "learning_rate": 0.000110726306897177, "loss": 46.0, "step": 684 }, { "epoch": 0.11559718179133444, "grad_norm": 0.0007178309024311602, "learning_rate": 0.00011051042361207861, "loss": 46.0, "step": 685 }, { "epoch": 0.11576593680124879, "grad_norm": 0.00312964734621346, "learning_rate": 0.00011029449078393098, "loss": 46.0, "step": 686 }, { "epoch": 0.11593469181116314, "grad_norm": 0.0011070105247199535, "learning_rate": 0.000110078509430578, "loss": 46.0, "step": 687 }, { "epoch": 0.1161034468210775, "grad_norm": 0.002353600226342678, "learning_rate": 0.00010986248057009239, "loss": 46.0, "step": 688 }, { "epoch": 0.11627220183099186, "grad_norm": 0.0012018800480291247, "learning_rate": 0.00010964640522077065, "loss": 46.0, "step": 689 }, { "epoch": 0.11644095684090622, "grad_norm": 0.0008240241440944374, "learning_rate": 0.00010943028440112854, "loss": 46.0, "step": 690 }, { "epoch": 0.11660971185082057, "grad_norm": 0.000862058368511498, "learning_rate": 0.00010921411912989615, "loss": 46.0, "step": 691 }, { "epoch": 0.11677846686073493, "grad_norm": 0.0008431962342001498, "learning_rate": 0.000108997910426013, "loss": 46.0, "step": 692 }, { "epoch": 0.11694722187064928, "grad_norm": 0.0010532697197049856, "learning_rate": 0.00010878165930862343, "loss": 46.0, "step": 693 }, { "epoch": 0.11711597688056365, "grad_norm": 0.0008627737988717854, "learning_rate": 0.0001085653667970717, "loss": 46.0, "step": 694 }, { "epoch": 0.117284731890478, "grad_norm": 0.0010250735795125365, "learning_rate": 0.00010834903391089712, "loss": 46.0, "step": 695 }, { "epoch": 0.11745348690039235, "grad_norm": 0.0015792236663401127, "learning_rate": 0.00010813266166982946, "loss": 46.0, "step": 696 }, { "epoch": 0.11762224191030671, "grad_norm": 0.0014957215171307325, "learning_rate": 0.00010791625109378376, "loss": 46.0, "step": 697 }, { "epoch": 0.11779099692022107, "grad_norm": 0.0006147067178972065, "learning_rate": 0.00010769980320285599, "loss": 46.0, "step": 698 }, { "epoch": 0.11795975193013543, "grad_norm": 0.0004508892016019672, "learning_rate": 0.00010748331901731793, "loss": 46.0, "step": 699 }, { "epoch": 0.11812850694004978, "grad_norm": 0.0005747199174948037, "learning_rate": 0.00010726679955761238, "loss": 46.0, "step": 700 }, { "epoch": 0.11829726194996414, "grad_norm": 0.0018676796462386847, "learning_rate": 0.00010705024584434853, "loss": 46.0, "step": 701 }, { "epoch": 0.11846601695987849, "grad_norm": 0.0008204419864341617, "learning_rate": 0.00010683365889829692, "loss": 46.0, "step": 702 }, { "epoch": 0.11863477196979286, "grad_norm": 0.0007434524013660848, "learning_rate": 0.0001066170397403848, "loss": 46.0, "step": 703 }, { "epoch": 0.11880352697970721, "grad_norm": 0.0006690184236504138, "learning_rate": 0.00010640038939169124, "loss": 46.0, "step": 704 }, { "epoch": 0.11897228198962156, "grad_norm": 0.0015682928496971726, "learning_rate": 0.00010618370887344234, "loss": 46.0, "step": 705 }, { "epoch": 0.11914103699953592, "grad_norm": 0.000833112804684788, "learning_rate": 0.0001059669992070064, "loss": 46.0, "step": 706 }, { "epoch": 0.11930979200945029, "grad_norm": 0.0005185617483220994, "learning_rate": 0.00010575026141388915, "loss": 46.0, "step": 707 }, { "epoch": 0.11947854701936464, "grad_norm": 0.001206497778184712, "learning_rate": 0.00010553349651572881, "loss": 46.0, "step": 708 }, { "epoch": 0.11964730202927899, "grad_norm": 0.0009608225082047284, "learning_rate": 0.00010531670553429156, "loss": 46.0, "step": 709 }, { "epoch": 0.11981605703919335, "grad_norm": 0.0008177707786671817, "learning_rate": 0.00010509988949146627, "loss": 46.0, "step": 710 }, { "epoch": 0.11998481204910771, "grad_norm": 0.0021591701079159975, "learning_rate": 0.00010488304940926012, "loss": 46.0, "step": 711 }, { "epoch": 0.12015356705902207, "grad_norm": 0.002416484523564577, "learning_rate": 0.00010466618630979357, "loss": 46.0, "step": 712 }, { "epoch": 0.12032232206893642, "grad_norm": 0.0010555331828072667, "learning_rate": 0.00010444930121529555, "loss": 46.0, "step": 713 }, { "epoch": 0.12049107707885078, "grad_norm": 0.00034197320928797126, "learning_rate": 0.00010423239514809866, "loss": 46.0, "step": 714 }, { "epoch": 0.12065983208876513, "grad_norm": 0.0006457001436501741, "learning_rate": 0.00010401546913063442, "loss": 46.0, "step": 715 }, { "epoch": 0.1208285870986795, "grad_norm": 0.0006262487731873989, "learning_rate": 0.0001037985241854283, "loss": 46.0, "step": 716 }, { "epoch": 0.12099734210859385, "grad_norm": 0.0030058289412409067, "learning_rate": 0.00010358156133509513, "loss": 46.0, "step": 717 }, { "epoch": 0.1211660971185082, "grad_norm": 0.0007620776887051761, "learning_rate": 0.00010336458160233393, "loss": 46.0, "step": 718 }, { "epoch": 0.12133485212842256, "grad_norm": 0.0007673231884837151, "learning_rate": 0.0001031475860099235, "loss": 46.0, "step": 719 }, { "epoch": 0.12150360713833692, "grad_norm": 0.0008117277757264674, "learning_rate": 0.00010293057558071728, "loss": 46.0, "step": 720 }, { "epoch": 0.12167236214825128, "grad_norm": 0.0005056714289821684, "learning_rate": 0.00010271355133763869, "loss": 46.0, "step": 721 }, { "epoch": 0.12184111715816563, "grad_norm": 0.0008250874234363437, "learning_rate": 0.00010249651430367628, "loss": 46.0, "step": 722 }, { "epoch": 0.12200987216807999, "grad_norm": 0.0010816901922225952, "learning_rate": 0.00010227946550187884, "loss": 46.0, "step": 723 }, { "epoch": 0.12217862717799435, "grad_norm": 0.0008268383098766208, "learning_rate": 0.00010206240595535063, "loss": 46.0, "step": 724 }, { "epoch": 0.12234738218790871, "grad_norm": 0.0011831193696707487, "learning_rate": 0.00010184533668724667, "loss": 46.0, "step": 725 }, { "epoch": 0.12251613719782306, "grad_norm": 0.003153022611513734, "learning_rate": 0.00010162825872076767, "loss": 46.0, "step": 726 }, { "epoch": 0.12268489220773741, "grad_norm": 0.00038787827361375093, "learning_rate": 0.00010141117307915537, "loss": 46.0, "step": 727 }, { "epoch": 0.12285364721765177, "grad_norm": 0.0009175827144645154, "learning_rate": 0.00010119408078568774, "loss": 46.0, "step": 728 }, { "epoch": 0.12302240222756614, "grad_norm": 0.0005741899949498475, "learning_rate": 0.00010097698286367409, "loss": 46.0, "step": 729 }, { "epoch": 0.12319115723748049, "grad_norm": 0.0005662553012371063, "learning_rate": 0.00010075988033645025, "loss": 46.0, "step": 730 }, { "epoch": 0.12319115723748049, "eval_loss": 11.5, "eval_runtime": 14.1785, "eval_samples_per_second": 175.97, "eval_steps_per_second": 88.021, "step": 730 }, { "epoch": 0.12335991224739484, "grad_norm": 0.0005169582436792552, "learning_rate": 0.00010054277422737369, "loss": 46.0, "step": 731 }, { "epoch": 0.1235286672573092, "grad_norm": 0.00028787710471078753, "learning_rate": 0.0001003256655598189, "loss": 46.0, "step": 732 }, { "epoch": 0.12369742226722356, "grad_norm": 0.0016796085983514786, "learning_rate": 0.00010010855535717232, "loss": 46.0, "step": 733 }, { "epoch": 0.12386617727713792, "grad_norm": 0.0016076072352007031, "learning_rate": 9.98914446428277e-05, "loss": 46.0, "step": 734 }, { "epoch": 0.12403493228705227, "grad_norm": 0.0008219809387810528, "learning_rate": 9.967433444018111e-05, "loss": 46.0, "step": 735 }, { "epoch": 0.12420368729696663, "grad_norm": 0.002049337374046445, "learning_rate": 9.945722577262632e-05, "loss": 46.0, "step": 736 }, { "epoch": 0.12437244230688098, "grad_norm": 0.0005754379089921713, "learning_rate": 9.92401196635498e-05, "loss": 46.0, "step": 737 }, { "epoch": 0.12454119731679535, "grad_norm": 0.0013358064461499453, "learning_rate": 9.902301713632592e-05, "loss": 46.0, "step": 738 }, { "epoch": 0.1247099523267097, "grad_norm": 0.0006682523526251316, "learning_rate": 9.880591921431227e-05, "loss": 46.0, "step": 739 }, { "epoch": 0.12487870733662405, "grad_norm": 0.00043337634997442365, "learning_rate": 9.858882692084466e-05, "loss": 46.0, "step": 740 }, { "epoch": 0.1250474623465384, "grad_norm": 0.0005919807590544224, "learning_rate": 9.837174127923237e-05, "loss": 46.0, "step": 741 }, { "epoch": 0.12521621735645277, "grad_norm": 0.0013382198521867394, "learning_rate": 9.815466331275335e-05, "loss": 46.0, "step": 742 }, { "epoch": 0.12538497236636711, "grad_norm": 0.000782837625592947, "learning_rate": 9.793759404464936e-05, "loss": 46.0, "step": 743 }, { "epoch": 0.12555372737628148, "grad_norm": 0.0009372693602927029, "learning_rate": 9.772053449812118e-05, "loss": 46.0, "step": 744 }, { "epoch": 0.12572248238619585, "grad_norm": 0.000794100807979703, "learning_rate": 9.750348569632375e-05, "loss": 46.0, "step": 745 }, { "epoch": 0.1258912373961102, "grad_norm": 0.0010177150834351778, "learning_rate": 9.728644866236132e-05, "loss": 46.0, "step": 746 }, { "epoch": 0.12605999240602456, "grad_norm": 0.0010124148102477193, "learning_rate": 9.706942441928273e-05, "loss": 46.0, "step": 747 }, { "epoch": 0.12622874741593892, "grad_norm": 0.0010855060536414385, "learning_rate": 9.68524139900765e-05, "loss": 46.0, "step": 748 }, { "epoch": 0.12639750242585326, "grad_norm": 0.0008628432988189161, "learning_rate": 9.66354183976661e-05, "loss": 46.0, "step": 749 }, { "epoch": 0.12656625743576763, "grad_norm": 0.0007373031694442034, "learning_rate": 9.641843866490492e-05, "loss": 46.0, "step": 750 }, { "epoch": 0.12673501244568197, "grad_norm": 0.0007096838089637458, "learning_rate": 9.62014758145717e-05, "loss": 46.0, "step": 751 }, { "epoch": 0.12690376745559634, "grad_norm": 0.0009518765727989376, "learning_rate": 9.598453086936559e-05, "loss": 46.0, "step": 752 }, { "epoch": 0.1270725224655107, "grad_norm": 0.0013600359670817852, "learning_rate": 9.576760485190137e-05, "loss": 46.0, "step": 753 }, { "epoch": 0.12724127747542505, "grad_norm": 0.0011481187539175153, "learning_rate": 9.555069878470449e-05, "loss": 46.0, "step": 754 }, { "epoch": 0.12741003248533941, "grad_norm": 0.000521646230481565, "learning_rate": 9.533381369020646e-05, "loss": 46.0, "step": 755 }, { "epoch": 0.12757878749525375, "grad_norm": 0.0005510274204425514, "learning_rate": 9.511695059073989e-05, "loss": 46.0, "step": 756 }, { "epoch": 0.12774754250516812, "grad_norm": 0.003858919721096754, "learning_rate": 9.490011050853375e-05, "loss": 46.0, "step": 757 }, { "epoch": 0.1279162975150825, "grad_norm": 0.0031276738736778498, "learning_rate": 9.468329446570848e-05, "loss": 46.0, "step": 758 }, { "epoch": 0.12808505252499683, "grad_norm": 0.0007159564993344247, "learning_rate": 9.446650348427117e-05, "loss": 46.0, "step": 759 }, { "epoch": 0.1282538075349112, "grad_norm": 0.0005575700779445469, "learning_rate": 9.424973858611088e-05, "loss": 46.0, "step": 760 }, { "epoch": 0.12842256254482554, "grad_norm": 0.00020999553089495748, "learning_rate": 9.403300079299364e-05, "loss": 46.0, "step": 761 }, { "epoch": 0.1285913175547399, "grad_norm": 0.0008280250476673245, "learning_rate": 9.38162911265577e-05, "loss": 46.0, "step": 762 }, { "epoch": 0.12876007256465427, "grad_norm": 0.0007614243077114224, "learning_rate": 9.35996106083088e-05, "loss": 46.0, "step": 763 }, { "epoch": 0.1289288275745686, "grad_norm": 0.0007390084792859852, "learning_rate": 9.338296025961521e-05, "loss": 46.0, "step": 764 }, { "epoch": 0.12909758258448298, "grad_norm": 0.0005527955945581198, "learning_rate": 9.31663411017031e-05, "loss": 46.0, "step": 765 }, { "epoch": 0.12926633759439735, "grad_norm": 0.0007797405123710632, "learning_rate": 9.294975415565151e-05, "loss": 46.0, "step": 766 }, { "epoch": 0.12943509260431169, "grad_norm": 0.0008938325918279588, "learning_rate": 9.273320044238763e-05, "loss": 46.0, "step": 767 }, { "epoch": 0.12960384761422605, "grad_norm": 0.0011889681918546557, "learning_rate": 9.251668098268209e-05, "loss": 46.0, "step": 768 }, { "epoch": 0.1297726026241404, "grad_norm": 0.0007662948337383568, "learning_rate": 9.230019679714405e-05, "loss": 46.0, "step": 769 }, { "epoch": 0.12994135763405476, "grad_norm": 0.0007776907877996564, "learning_rate": 9.208374890621628e-05, "loss": 46.0, "step": 770 }, { "epoch": 0.13011011264396913, "grad_norm": 0.0019411947578191757, "learning_rate": 9.186733833017061e-05, "loss": 46.0, "step": 771 }, { "epoch": 0.13027886765388347, "grad_norm": 0.0007572571048513055, "learning_rate": 9.165096608910287e-05, "loss": 46.0, "step": 772 }, { "epoch": 0.13044762266379784, "grad_norm": 0.0006218423368409276, "learning_rate": 9.143463320292832e-05, "loss": 46.0, "step": 773 }, { "epoch": 0.13061637767371218, "grad_norm": 0.0005072550848126411, "learning_rate": 9.12183406913766e-05, "loss": 46.0, "step": 774 }, { "epoch": 0.13078513268362654, "grad_norm": 0.0011237628059461713, "learning_rate": 9.100208957398703e-05, "loss": 46.0, "step": 775 }, { "epoch": 0.1309538876935409, "grad_norm": 0.0009162898059003055, "learning_rate": 9.078588087010389e-05, "loss": 46.0, "step": 776 }, { "epoch": 0.13112264270345525, "grad_norm": 0.003182788612321019, "learning_rate": 9.056971559887145e-05, "loss": 46.0, "step": 777 }, { "epoch": 0.13129139771336962, "grad_norm": 0.0018501750892028213, "learning_rate": 9.035359477922936e-05, "loss": 46.0, "step": 778 }, { "epoch": 0.13146015272328399, "grad_norm": 0.0009051130618900061, "learning_rate": 9.013751942990766e-05, "loss": 46.0, "step": 779 }, { "epoch": 0.13162890773319832, "grad_norm": 0.0003044608747586608, "learning_rate": 8.9921490569422e-05, "loss": 46.0, "step": 780 }, { "epoch": 0.1317976627431127, "grad_norm": 0.0008892896585166454, "learning_rate": 8.970550921606903e-05, "loss": 46.0, "step": 781 }, { "epoch": 0.13196641775302703, "grad_norm": 0.002685688668861985, "learning_rate": 8.948957638792144e-05, "loss": 46.0, "step": 782 }, { "epoch": 0.1321351727629414, "grad_norm": 0.0016494488809257746, "learning_rate": 8.927369310282302e-05, "loss": 46.0, "step": 783 }, { "epoch": 0.13230392777285577, "grad_norm": 0.0006050110096111894, "learning_rate": 8.905786037838426e-05, "loss": 46.0, "step": 784 }, { "epoch": 0.1324726827827701, "grad_norm": 0.0009582451893948019, "learning_rate": 8.884207923197715e-05, "loss": 46.0, "step": 785 }, { "epoch": 0.13264143779268447, "grad_norm": 0.004504833836108446, "learning_rate": 8.86263506807307e-05, "loss": 46.0, "step": 786 }, { "epoch": 0.13281019280259881, "grad_norm": 0.0005395681946538389, "learning_rate": 8.841067574152589e-05, "loss": 46.0, "step": 787 }, { "epoch": 0.13297894781251318, "grad_norm": 0.001102139474824071, "learning_rate": 8.819505543099099e-05, "loss": 46.0, "step": 788 }, { "epoch": 0.13314770282242755, "grad_norm": 0.000723580364137888, "learning_rate": 8.797949076549685e-05, "loss": 46.0, "step": 789 }, { "epoch": 0.1333164578323419, "grad_norm": 0.002134741051122546, "learning_rate": 8.776398276115198e-05, "loss": 46.0, "step": 790 }, { "epoch": 0.13348521284225626, "grad_norm": 0.0018205747473984957, "learning_rate": 8.75485324337977e-05, "loss": 46.0, "step": 791 }, { "epoch": 0.13365396785217062, "grad_norm": 0.0017829221906140447, "learning_rate": 8.733314079900372e-05, "loss": 46.0, "step": 792 }, { "epoch": 0.13382272286208496, "grad_norm": 0.000708136591129005, "learning_rate": 8.71178088720628e-05, "loss": 46.0, "step": 793 }, { "epoch": 0.13399147787199933, "grad_norm": 0.0009631026186980307, "learning_rate": 8.690253766798644e-05, "loss": 46.0, "step": 794 }, { "epoch": 0.13416023288191367, "grad_norm": 0.0010598397348076105, "learning_rate": 8.668732820149989e-05, "loss": 46.0, "step": 795 }, { "epoch": 0.13432898789182804, "grad_norm": 0.0007573203183710575, "learning_rate": 8.647218148703727e-05, "loss": 46.0, "step": 796 }, { "epoch": 0.1344977429017424, "grad_norm": 0.0017247875221073627, "learning_rate": 8.625709853873705e-05, "loss": 46.0, "step": 797 }, { "epoch": 0.13466649791165675, "grad_norm": 0.000708206498529762, "learning_rate": 8.6042080370437e-05, "loss": 46.0, "step": 798 }, { "epoch": 0.1348352529215711, "grad_norm": 0.0007804720080457628, "learning_rate": 8.582712799566969e-05, "loss": 46.0, "step": 799 }, { "epoch": 0.13500400793148545, "grad_norm": 0.002211306942626834, "learning_rate": 8.561224242765744e-05, "loss": 46.0, "step": 800 }, { "epoch": 0.13517276294139982, "grad_norm": 0.0037598570343106985, "learning_rate": 8.539742467930761e-05, "loss": 46.0, "step": 801 }, { "epoch": 0.1353415179513142, "grad_norm": 0.0007354649715125561, "learning_rate": 8.518267576320806e-05, "loss": 46.0, "step": 802 }, { "epoch": 0.13551027296122853, "grad_norm": 0.00044775562128052115, "learning_rate": 8.496799669162206e-05, "loss": 46.0, "step": 803 }, { "epoch": 0.1356790279711429, "grad_norm": 0.0005587537889368832, "learning_rate": 8.475338847648361e-05, "loss": 46.0, "step": 804 }, { "epoch": 0.13584778298105726, "grad_norm": 0.0005210166564211249, "learning_rate": 8.453885212939294e-05, "loss": 46.0, "step": 805 }, { "epoch": 0.1360165379909716, "grad_norm": 0.0007426153169944882, "learning_rate": 8.432438866161124e-05, "loss": 46.0, "step": 806 }, { "epoch": 0.13618529300088597, "grad_norm": 0.0013306104810908437, "learning_rate": 8.410999908405635e-05, "loss": 46.0, "step": 807 }, { "epoch": 0.1363540480108003, "grad_norm": 0.0013473851140588522, "learning_rate": 8.389568440729776e-05, "loss": 46.0, "step": 808 }, { "epoch": 0.13652280302071468, "grad_norm": 0.00044588756281882524, "learning_rate": 8.368144564155185e-05, "loss": 46.0, "step": 809 }, { "epoch": 0.13669155803062905, "grad_norm": 0.0006963923806324601, "learning_rate": 8.346728379667727e-05, "loss": 46.0, "step": 810 }, { "epoch": 0.13686031304054339, "grad_norm": 0.001505714375525713, "learning_rate": 8.325319988217004e-05, "loss": 46.0, "step": 811 }, { "epoch": 0.13702906805045775, "grad_norm": 0.0009234807803295553, "learning_rate": 8.303919490715881e-05, "loss": 46.0, "step": 812 }, { "epoch": 0.1371978230603721, "grad_norm": 0.001101883128285408, "learning_rate": 8.282526988040022e-05, "loss": 46.0, "step": 813 }, { "epoch": 0.13736657807028646, "grad_norm": 0.00047088676365092397, "learning_rate": 8.261142581027392e-05, "loss": 46.0, "step": 814 }, { "epoch": 0.13753533308020083, "grad_norm": 0.0010181186953559518, "learning_rate": 8.239766370477811e-05, "loss": 46.0, "step": 815 }, { "epoch": 0.13770408809011517, "grad_norm": 0.0008076895028352737, "learning_rate": 8.218398457152454e-05, "loss": 46.0, "step": 816 }, { "epoch": 0.13787284310002954, "grad_norm": 0.000841393368318677, "learning_rate": 8.197038941773381e-05, "loss": 46.0, "step": 817 }, { "epoch": 0.1380415981099439, "grad_norm": 0.0008007394499145448, "learning_rate": 8.175687925023082e-05, "loss": 46.0, "step": 818 }, { "epoch": 0.13821035311985824, "grad_norm": 0.000764928525313735, "learning_rate": 8.154345507543972e-05, "loss": 46.0, "step": 819 }, { "epoch": 0.1383791081297726, "grad_norm": 0.0015394032234326005, "learning_rate": 8.133011789937937e-05, "loss": 46.0, "step": 820 }, { "epoch": 0.13854786313968695, "grad_norm": 0.0008911428158171475, "learning_rate": 8.111686872765859e-05, "loss": 46.0, "step": 821 }, { "epoch": 0.13871661814960132, "grad_norm": 0.0006434863316826522, "learning_rate": 8.090370856547127e-05, "loss": 46.0, "step": 822 }, { "epoch": 0.13888537315951568, "grad_norm": 0.0009041949524544179, "learning_rate": 8.069063841759182e-05, "loss": 46.0, "step": 823 }, { "epoch": 0.13905412816943002, "grad_norm": 0.0003908716607838869, "learning_rate": 8.047765928837034e-05, "loss": 46.0, "step": 824 }, { "epoch": 0.1392228831793444, "grad_norm": 0.0004880438500549644, "learning_rate": 8.026477218172785e-05, "loss": 46.0, "step": 825 }, { "epoch": 0.13939163818925873, "grad_norm": 0.0008036144427023828, "learning_rate": 8.005197810115168e-05, "loss": 46.0, "step": 826 }, { "epoch": 0.1395603931991731, "grad_norm": 0.00038311423850245774, "learning_rate": 7.983927804969054e-05, "loss": 46.0, "step": 827 }, { "epoch": 0.13972914820908747, "grad_norm": 0.0007264793966896832, "learning_rate": 7.962667302995004e-05, "loss": 46.0, "step": 828 }, { "epoch": 0.1398979032190018, "grad_norm": 0.006564402021467686, "learning_rate": 7.941416404408778e-05, "loss": 46.0, "step": 829 }, { "epoch": 0.14006665822891617, "grad_norm": 0.0009483057074248791, "learning_rate": 7.920175209380865e-05, "loss": 46.0, "step": 830 }, { "epoch": 0.14023541323883051, "grad_norm": 0.001220498699694872, "learning_rate": 7.898943818036024e-05, "loss": 46.0, "step": 831 }, { "epoch": 0.14040416824874488, "grad_norm": 0.0012295391643419862, "learning_rate": 7.877722330452795e-05, "loss": 46.0, "step": 832 }, { "epoch": 0.14057292325865925, "grad_norm": 0.0008359167259186506, "learning_rate": 7.856510846663031e-05, "loss": 46.0, "step": 833 }, { "epoch": 0.1407416782685736, "grad_norm": 0.001273457077331841, "learning_rate": 7.835309466651444e-05, "loss": 46.0, "step": 834 }, { "epoch": 0.14091043327848796, "grad_norm": 0.0027280368376523256, "learning_rate": 7.814118290355102e-05, "loss": 46.0, "step": 835 }, { "epoch": 0.14107918828840232, "grad_norm": 0.0015108464285731316, "learning_rate": 7.792937417662993e-05, "loss": 46.0, "step": 836 }, { "epoch": 0.14124794329831666, "grad_norm": 0.002349273534491658, "learning_rate": 7.771766948415525e-05, "loss": 46.0, "step": 837 }, { "epoch": 0.14141669830823103, "grad_norm": 0.0012695115292444825, "learning_rate": 7.750606982404065e-05, "loss": 46.0, "step": 838 }, { "epoch": 0.14158545331814537, "grad_norm": 0.0009470002260059118, "learning_rate": 7.729457619370489e-05, "loss": 46.0, "step": 839 }, { "epoch": 0.14175420832805974, "grad_norm": 0.0006858884589746594, "learning_rate": 7.708318959006669e-05, "loss": 46.0, "step": 840 }, { "epoch": 0.1419229633379741, "grad_norm": 0.0016968476120382547, "learning_rate": 7.687191100954046e-05, "loss": 46.0, "step": 841 }, { "epoch": 0.14209171834788845, "grad_norm": 0.002445329912006855, "learning_rate": 7.666074144803139e-05, "loss": 46.0, "step": 842 }, { "epoch": 0.1422604733578028, "grad_norm": 0.0007218089303933084, "learning_rate": 7.644968190093067e-05, "loss": 46.0, "step": 843 }, { "epoch": 0.14242922836771715, "grad_norm": 0.0014190871734172106, "learning_rate": 7.623873336311108e-05, "loss": 46.0, "step": 844 }, { "epoch": 0.14259798337763152, "grad_norm": 0.0006293446640484035, "learning_rate": 7.602789682892203e-05, "loss": 46.0, "step": 845 }, { "epoch": 0.1427667383875459, "grad_norm": 0.0012009006459265947, "learning_rate": 7.581717329218499e-05, "loss": 46.0, "step": 846 }, { "epoch": 0.14293549339746023, "grad_norm": 0.0011125734308734536, "learning_rate": 7.560656374618886e-05, "loss": 46.0, "step": 847 }, { "epoch": 0.1431042484073746, "grad_norm": 0.002301817527040839, "learning_rate": 7.539606918368507e-05, "loss": 46.0, "step": 848 }, { "epoch": 0.14327300341728896, "grad_norm": 0.0006231066072359681, "learning_rate": 7.518569059688325e-05, "loss": 46.0, "step": 849 }, { "epoch": 0.1434417584272033, "grad_norm": 0.002790312049910426, "learning_rate": 7.497542897744622e-05, "loss": 46.0, "step": 850 }, { "epoch": 0.14361051343711767, "grad_norm": 0.0007836490985937417, "learning_rate": 7.476528531648544e-05, "loss": 46.0, "step": 851 }, { "epoch": 0.143779268447032, "grad_norm": 0.0008998861303552985, "learning_rate": 7.455526060455648e-05, "loss": 46.0, "step": 852 }, { "epoch": 0.14394802345694638, "grad_norm": 0.0022323448210954666, "learning_rate": 7.434535583165408e-05, "loss": 46.0, "step": 853 }, { "epoch": 0.14411677846686075, "grad_norm": 0.0007675597444176674, "learning_rate": 7.413557198720765e-05, "loss": 46.0, "step": 854 }, { "epoch": 0.14428553347677509, "grad_norm": 0.0015968787483870983, "learning_rate": 7.392591006007666e-05, "loss": 46.0, "step": 855 }, { "epoch": 0.14445428848668945, "grad_norm": 0.0005020895623601973, "learning_rate": 7.37163710385458e-05, "loss": 46.0, "step": 856 }, { "epoch": 0.1446230434966038, "grad_norm": 0.0010543781099840999, "learning_rate": 7.350695591032049e-05, "loss": 46.0, "step": 857 }, { "epoch": 0.14479179850651816, "grad_norm": 0.000769981590565294, "learning_rate": 7.329766566252212e-05, "loss": 46.0, "step": 858 }, { "epoch": 0.14496055351643253, "grad_norm": 0.0009824162116274238, "learning_rate": 7.30885012816834e-05, "loss": 46.0, "step": 859 }, { "epoch": 0.14512930852634687, "grad_norm": 0.0020662923343479633, "learning_rate": 7.287946375374385e-05, "loss": 46.0, "step": 860 }, { "epoch": 0.14529806353626123, "grad_norm": 0.002364357467740774, "learning_rate": 7.26705540640449e-05, "loss": 46.0, "step": 861 }, { "epoch": 0.1454668185461756, "grad_norm": 0.001292189583182335, "learning_rate": 7.246177319732543e-05, "loss": 46.0, "step": 862 }, { "epoch": 0.14563557355608994, "grad_norm": 0.000383058562874794, "learning_rate": 7.225312213771722e-05, "loss": 46.0, "step": 863 }, { "epoch": 0.1458043285660043, "grad_norm": 0.0009881592122837901, "learning_rate": 7.204460186873995e-05, "loss": 46.0, "step": 864 }, { "epoch": 0.14597308357591865, "grad_norm": 0.0010527949780225754, "learning_rate": 7.183621337329703e-05, "loss": 46.0, "step": 865 }, { "epoch": 0.14614183858583302, "grad_norm": 0.001268843188881874, "learning_rate": 7.162795763367049e-05, "loss": 46.0, "step": 866 }, { "epoch": 0.14631059359574738, "grad_norm": 0.003271568100899458, "learning_rate": 7.141983563151677e-05, "loss": 46.0, "step": 867 }, { "epoch": 0.14647934860566172, "grad_norm": 0.0008496344089508057, "learning_rate": 7.121184834786188e-05, "loss": 46.0, "step": 868 }, { "epoch": 0.1466481036155761, "grad_norm": 0.001987769966945052, "learning_rate": 7.10039967630967e-05, "loss": 46.0, "step": 869 }, { "epoch": 0.14681685862549043, "grad_norm": 0.0011683752527460456, "learning_rate": 7.079628185697258e-05, "loss": 46.0, "step": 870 }, { "epoch": 0.1469856136354048, "grad_norm": 0.000853047997225076, "learning_rate": 7.058870460859656e-05, "loss": 46.0, "step": 871 }, { "epoch": 0.14715436864531917, "grad_norm": 0.001560655073262751, "learning_rate": 7.03812659964268e-05, "loss": 46.0, "step": 872 }, { "epoch": 0.1473231236552335, "grad_norm": 0.0007004750659689307, "learning_rate": 7.017396699826803e-05, "loss": 46.0, "step": 873 }, { "epoch": 0.14749187866514787, "grad_norm": 0.0006875868421047926, "learning_rate": 6.996680859126677e-05, "loss": 46.0, "step": 874 }, { "epoch": 0.14766063367506224, "grad_norm": 0.0009068532381206751, "learning_rate": 6.975979175190688e-05, "loss": 46.0, "step": 875 }, { "epoch": 0.14782938868497658, "grad_norm": 0.0005159789579920471, "learning_rate": 6.955291745600498e-05, "loss": 46.0, "step": 876 }, { "epoch": 0.14799814369489095, "grad_norm": 0.0014988022157922387, "learning_rate": 6.934618667870567e-05, "loss": 46.0, "step": 877 }, { "epoch": 0.1481668987048053, "grad_norm": 0.0007760879816487432, "learning_rate": 6.913960039447718e-05, "loss": 46.0, "step": 878 }, { "epoch": 0.14833565371471966, "grad_norm": 0.0010894398437812924, "learning_rate": 6.893315957710649e-05, "loss": 46.0, "step": 879 }, { "epoch": 0.14850440872463402, "grad_norm": 0.0004840297333430499, "learning_rate": 6.8726865199695e-05, "loss": 46.0, "step": 880 }, { "epoch": 0.14867316373454836, "grad_norm": 0.0005790110444650054, "learning_rate": 6.852071823465383e-05, "loss": 46.0, "step": 881 }, { "epoch": 0.14884191874446273, "grad_norm": 0.002223587827757001, "learning_rate": 6.831471965369914e-05, "loss": 46.0, "step": 882 }, { "epoch": 0.14901067375437707, "grad_norm": 0.002203379524871707, "learning_rate": 6.810887042784777e-05, "loss": 46.0, "step": 883 }, { "epoch": 0.14917942876429144, "grad_norm": 0.002671119524165988, "learning_rate": 6.790317152741249e-05, "loss": 46.0, "step": 884 }, { "epoch": 0.1493481837742058, "grad_norm": 0.001122485613450408, "learning_rate": 6.769762392199748e-05, "loss": 46.0, "step": 885 }, { "epoch": 0.14951693878412015, "grad_norm": 0.0007207071175798774, "learning_rate": 6.749222858049382e-05, "loss": 46.0, "step": 886 }, { "epoch": 0.1496856937940345, "grad_norm": 0.0007145823910832405, "learning_rate": 6.728698647107475e-05, "loss": 46.0, "step": 887 }, { "epoch": 0.14985444880394888, "grad_norm": 0.0008989453199319541, "learning_rate": 6.708189856119128e-05, "loss": 46.0, "step": 888 }, { "epoch": 0.15002320381386322, "grad_norm": 0.0011213996913284063, "learning_rate": 6.687696581756763e-05, "loss": 46.0, "step": 889 }, { "epoch": 0.1501919588237776, "grad_norm": 0.0005325007368810475, "learning_rate": 6.667218920619649e-05, "loss": 46.0, "step": 890 }, { "epoch": 0.15036071383369193, "grad_norm": 0.0008855744963511825, "learning_rate": 6.646756969233463e-05, "loss": 46.0, "step": 891 }, { "epoch": 0.1505294688436063, "grad_norm": 0.0007719965651631355, "learning_rate": 6.626310824049838e-05, "loss": 46.0, "step": 892 }, { "epoch": 0.15069822385352066, "grad_norm": 0.001363624120131135, "learning_rate": 6.605880581445894e-05, "loss": 46.0, "step": 893 }, { "epoch": 0.150866978863435, "grad_norm": 0.0006050238152965903, "learning_rate": 6.585466337723796e-05, "loss": 46.0, "step": 894 }, { "epoch": 0.15103573387334937, "grad_norm": 0.00020488305017352104, "learning_rate": 6.565068189110286e-05, "loss": 46.0, "step": 895 }, { "epoch": 0.1512044888832637, "grad_norm": 0.000528375618159771, "learning_rate": 6.544686231756246e-05, "loss": 46.0, "step": 896 }, { "epoch": 0.15137324389317808, "grad_norm": 0.0006251371232792735, "learning_rate": 6.52432056173624e-05, "loss": 46.0, "step": 897 }, { "epoch": 0.15154199890309245, "grad_norm": 0.000713423069100827, "learning_rate": 6.503971275048042e-05, "loss": 46.0, "step": 898 }, { "epoch": 0.15171075391300679, "grad_norm": 0.0008193039102479815, "learning_rate": 6.483638467612227e-05, "loss": 46.0, "step": 899 }, { "epoch": 0.15187950892292115, "grad_norm": 0.000522931688465178, "learning_rate": 6.463322235271666e-05, "loss": 46.0, "step": 900 }, { "epoch": 0.1520482639328355, "grad_norm": 0.001072798273526132, "learning_rate": 6.44302267379111e-05, "loss": 46.0, "step": 901 }, { "epoch": 0.15221701894274986, "grad_norm": 0.0006312826299108565, "learning_rate": 6.422739878856735e-05, "loss": 46.0, "step": 902 }, { "epoch": 0.15238577395266423, "grad_norm": 0.002877163467928767, "learning_rate": 6.402473946075671e-05, "loss": 46.0, "step": 903 }, { "epoch": 0.15255452896257857, "grad_norm": 0.0008996203541755676, "learning_rate": 6.382224970975572e-05, "loss": 46.0, "step": 904 }, { "epoch": 0.15272328397249293, "grad_norm": 0.0006124867359176278, "learning_rate": 6.361993049004163e-05, "loss": 46.0, "step": 905 }, { "epoch": 0.1528920389824073, "grad_norm": 0.001687378971837461, "learning_rate": 6.341778275528773e-05, "loss": 46.0, "step": 906 }, { "epoch": 0.15306079399232164, "grad_norm": 0.001008613035082817, "learning_rate": 6.321580745835915e-05, "loss": 46.0, "step": 907 }, { "epoch": 0.153229549002236, "grad_norm": 0.0006372160278260708, "learning_rate": 6.301400555130805e-05, "loss": 46.0, "step": 908 }, { "epoch": 0.15339830401215035, "grad_norm": 0.0005495776422321796, "learning_rate": 6.281237798536932e-05, "loss": 46.0, "step": 909 }, { "epoch": 0.15356705902206472, "grad_norm": 0.0008954824879765511, "learning_rate": 6.261092571095614e-05, "loss": 46.0, "step": 910 }, { "epoch": 0.15373581403197908, "grad_norm": 0.001377457519993186, "learning_rate": 6.240964967765528e-05, "loss": 46.0, "step": 911 }, { "epoch": 0.15390456904189342, "grad_norm": 0.0011930032633244991, "learning_rate": 6.220855083422285e-05, "loss": 46.0, "step": 912 }, { "epoch": 0.1540733240518078, "grad_norm": 0.0013531373115256429, "learning_rate": 6.200763012857973e-05, "loss": 46.0, "step": 913 }, { "epoch": 0.15424207906172213, "grad_norm": 0.0009069031220860779, "learning_rate": 6.180688850780711e-05, "loss": 46.0, "step": 914 }, { "epoch": 0.1544108340716365, "grad_norm": 0.0008781193173490465, "learning_rate": 6.160632691814203e-05, "loss": 46.0, "step": 915 }, { "epoch": 0.15457958908155087, "grad_norm": 0.0026150341145694256, "learning_rate": 6.140594630497287e-05, "loss": 46.0, "step": 916 }, { "epoch": 0.1547483440914652, "grad_norm": 0.0013742209412157536, "learning_rate": 6.120574761283497e-05, "loss": 46.0, "step": 917 }, { "epoch": 0.15491709910137957, "grad_norm": 0.0027837788220494986, "learning_rate": 6.100573178540619e-05, "loss": 46.0, "step": 918 }, { "epoch": 0.15508585411129394, "grad_norm": 0.000727921084035188, "learning_rate": 6.080589976550233e-05, "loss": 46.0, "step": 919 }, { "epoch": 0.15525460912120828, "grad_norm": 0.0010688056936487556, "learning_rate": 6.06062524950729e-05, "loss": 46.0, "step": 920 }, { "epoch": 0.15542336413112265, "grad_norm": 0.0006157424068078399, "learning_rate": 6.040679091519643e-05, "loss": 46.0, "step": 921 }, { "epoch": 0.155592119141037, "grad_norm": 0.0018731793388724327, "learning_rate": 6.020751596607621e-05, "loss": 46.0, "step": 922 }, { "epoch": 0.15576087415095136, "grad_norm": 0.0011395288165658712, "learning_rate": 6.000842858703585e-05, "loss": 46.0, "step": 923 }, { "epoch": 0.15592962916086572, "grad_norm": 0.0004488412232603878, "learning_rate": 5.980952971651472e-05, "loss": 46.0, "step": 924 }, { "epoch": 0.15609838417078006, "grad_norm": 0.0012818429386243224, "learning_rate": 5.9610820292063665e-05, "loss": 46.0, "step": 925 }, { "epoch": 0.15626713918069443, "grad_norm": 0.0011927819577977061, "learning_rate": 5.9412301250340584e-05, "loss": 46.0, "step": 926 }, { "epoch": 0.15643589419060877, "grad_norm": 0.0010172206675633788, "learning_rate": 5.921397352710587e-05, "loss": 46.0, "step": 927 }, { "epoch": 0.15660464920052314, "grad_norm": 0.001916920649819076, "learning_rate": 5.9015838057218196e-05, "loss": 46.0, "step": 928 }, { "epoch": 0.1567734042104375, "grad_norm": 0.00064216268947348, "learning_rate": 5.881789577462993e-05, "loss": 46.0, "step": 929 }, { "epoch": 0.15694215922035185, "grad_norm": 0.0006899251602590084, "learning_rate": 5.862014761238281e-05, "loss": 46.0, "step": 930 }, { "epoch": 0.1571109142302662, "grad_norm": 0.0008296074229292572, "learning_rate": 5.842259450260366e-05, "loss": 46.0, "step": 931 }, { "epoch": 0.15727966924018058, "grad_norm": 0.0014352595899254084, "learning_rate": 5.822523737649974e-05, "loss": 46.0, "step": 932 }, { "epoch": 0.15744842425009492, "grad_norm": 0.0011921770637854934, "learning_rate": 5.80280771643546e-05, "loss": 46.0, "step": 933 }, { "epoch": 0.1576171792600093, "grad_norm": 0.0011576671386137605, "learning_rate": 5.7831114795523547e-05, "loss": 46.0, "step": 934 }, { "epoch": 0.15778593426992363, "grad_norm": 0.000808388867881149, "learning_rate": 5.7634351198429304e-05, "loss": 46.0, "step": 935 }, { "epoch": 0.157954689279838, "grad_norm": 0.0016751131042838097, "learning_rate": 5.74377873005577e-05, "loss": 46.0, "step": 936 }, { "epoch": 0.15812344428975236, "grad_norm": 0.0008697423618286848, "learning_rate": 5.724142402845318e-05, "loss": 46.0, "step": 937 }, { "epoch": 0.1582921992996667, "grad_norm": 0.0005162409506738186, "learning_rate": 5.7045262307714497e-05, "loss": 46.0, "step": 938 }, { "epoch": 0.15846095430958107, "grad_norm": 0.0005604674806818366, "learning_rate": 5.68493030629905e-05, "loss": 46.0, "step": 939 }, { "epoch": 0.1586297093194954, "grad_norm": 0.0007223184802569449, "learning_rate": 5.6653547217975354e-05, "loss": 46.0, "step": 940 }, { "epoch": 0.15879846432940978, "grad_norm": 0.0010802462929859757, "learning_rate": 5.645799569540463e-05, "loss": 46.0, "step": 941 }, { "epoch": 0.15896721933932415, "grad_norm": 0.0014821782242506742, "learning_rate": 5.626264941705086e-05, "loss": 46.0, "step": 942 }, { "epoch": 0.15913597434923848, "grad_norm": 0.000693553127348423, "learning_rate": 5.606750930371888e-05, "loss": 46.0, "step": 943 }, { "epoch": 0.15930472935915285, "grad_norm": 0.000700769480317831, "learning_rate": 5.587257627524195e-05, "loss": 46.0, "step": 944 }, { "epoch": 0.15947348436906722, "grad_norm": 0.0012417947873473167, "learning_rate": 5.567785125047708e-05, "loss": 46.0, "step": 945 }, { "epoch": 0.15964223937898156, "grad_norm": 0.0007827091030776501, "learning_rate": 5.548333514730082e-05, "loss": 46.0, "step": 946 }, { "epoch": 0.15981099438889593, "grad_norm": 0.0004621573316399008, "learning_rate": 5.528902888260493e-05, "loss": 46.0, "step": 947 }, { "epoch": 0.15997974939881027, "grad_norm": 0.0009308409062214196, "learning_rate": 5.509493337229208e-05, "loss": 46.0, "step": 948 }, { "epoch": 0.16014850440872463, "grad_norm": 0.000546308874618262, "learning_rate": 5.4901049531271474e-05, "loss": 46.0, "step": 949 }, { "epoch": 0.160317259418639, "grad_norm": 0.002490605926141143, "learning_rate": 5.470737827345458e-05, "loss": 46.0, "step": 950 }, { "epoch": 0.16048601442855334, "grad_norm": 0.0007810198585502803, "learning_rate": 5.451392051175079e-05, "loss": 46.0, "step": 951 }, { "epoch": 0.1606547694384677, "grad_norm": 0.0004202220879960805, "learning_rate": 5.4320677158063246e-05, "loss": 46.0, "step": 952 }, { "epoch": 0.16082352444838205, "grad_norm": 0.0005529209738597274, "learning_rate": 5.4127649123284264e-05, "loss": 46.0, "step": 953 }, { "epoch": 0.16099227945829642, "grad_norm": 0.000546621042303741, "learning_rate": 5.3934837317291276e-05, "loss": 46.0, "step": 954 }, { "epoch": 0.16116103446821078, "grad_norm": 0.0007445806404575706, "learning_rate": 5.374224264894261e-05, "loss": 46.0, "step": 955 }, { "epoch": 0.16132978947812512, "grad_norm": 0.0006346721784211695, "learning_rate": 5.354986602607279e-05, "loss": 46.0, "step": 956 }, { "epoch": 0.1614985444880395, "grad_norm": 0.0010019588517025113, "learning_rate": 5.335770835548883e-05, "loss": 46.0, "step": 957 }, { "epoch": 0.16166729949795386, "grad_norm": 0.001932504354044795, "learning_rate": 5.316577054296551e-05, "loss": 46.0, "step": 958 }, { "epoch": 0.1618360545078682, "grad_norm": 0.0013626782456412911, "learning_rate": 5.2974053493241274e-05, "loss": 46.0, "step": 959 }, { "epoch": 0.16200480951778257, "grad_norm": 0.0013853182317689061, "learning_rate": 5.278255811001398e-05, "loss": 46.0, "step": 960 }, { "epoch": 0.1621735645276969, "grad_norm": 0.00069183245068416, "learning_rate": 5.259128529593661e-05, "loss": 46.0, "step": 961 }, { "epoch": 0.16234231953761127, "grad_norm": 0.00028503654175437987, "learning_rate": 5.240023595261301e-05, "loss": 46.0, "step": 962 }, { "epoch": 0.16251107454752564, "grad_norm": 0.0003797942481469363, "learning_rate": 5.2209410980593674e-05, "loss": 46.0, "step": 963 }, { "epoch": 0.16267982955743998, "grad_norm": 0.0009620683849789202, "learning_rate": 5.2018811279371416e-05, "loss": 46.0, "step": 964 }, { "epoch": 0.16284858456735435, "grad_norm": 0.0010808344231918454, "learning_rate": 5.1828437747377354e-05, "loss": 46.0, "step": 965 }, { "epoch": 0.1630173395772687, "grad_norm": 0.0012897250708192587, "learning_rate": 5.163829128197626e-05, "loss": 46.0, "step": 966 }, { "epoch": 0.16318609458718306, "grad_norm": 0.0009198890766128898, "learning_rate": 5.144837277946273e-05, "loss": 46.0, "step": 967 }, { "epoch": 0.16335484959709742, "grad_norm": 0.0015073876129463315, "learning_rate": 5.125868313505691e-05, "loss": 46.0, "step": 968 }, { "epoch": 0.16352360460701176, "grad_norm": 0.000782114511821419, "learning_rate": 5.1069223242899876e-05, "loss": 46.0, "step": 969 }, { "epoch": 0.16369235961692613, "grad_norm": 0.0009239850332960486, "learning_rate": 5.087999399605006e-05, "loss": 46.0, "step": 970 }, { "epoch": 0.16386111462684047, "grad_norm": 0.0014594901585951447, "learning_rate": 5.0690996286478464e-05, "loss": 46.0, "step": 971 }, { "epoch": 0.16402986963675484, "grad_norm": 0.0007826307555660605, "learning_rate": 5.050223100506479e-05, "loss": 46.0, "step": 972 }, { "epoch": 0.1641986246466692, "grad_norm": 0.0008899096283130348, "learning_rate": 5.031369904159311e-05, "loss": 46.0, "step": 973 }, { "epoch": 0.16436737965658355, "grad_norm": 0.0009231562726199627, "learning_rate": 5.012540128474773e-05, "loss": 46.0, "step": 974 }, { "epoch": 0.1645361346664979, "grad_norm": 0.0013324058381840587, "learning_rate": 4.993733862210894e-05, "loss": 46.0, "step": 975 }, { "epoch": 0.16470488967641228, "grad_norm": 0.0003165770904161036, "learning_rate": 4.97495119401489e-05, "loss": 46.0, "step": 976 }, { "epoch": 0.16487364468632662, "grad_norm": 0.0030829953029751778, "learning_rate": 4.956192212422737e-05, "loss": 46.0, "step": 977 }, { "epoch": 0.165042399696241, "grad_norm": 0.0007854963187128305, "learning_rate": 4.9374570058587735e-05, "loss": 46.0, "step": 978 }, { "epoch": 0.16521115470615533, "grad_norm": 0.0004671328642871231, "learning_rate": 4.918745662635249e-05, "loss": 46.0, "step": 979 }, { "epoch": 0.1653799097160697, "grad_norm": 0.0011242057662457228, "learning_rate": 4.900058270951938e-05, "loss": 46.0, "step": 980 }, { "epoch": 0.16554866472598406, "grad_norm": 0.0012494113761931658, "learning_rate": 4.881394918895727e-05, "loss": 46.0, "step": 981 }, { "epoch": 0.1657174197358984, "grad_norm": 0.0012623087968677282, "learning_rate": 4.862755694440164e-05, "loss": 46.0, "step": 982 }, { "epoch": 0.16588617474581277, "grad_norm": 0.0015923196915537119, "learning_rate": 4.8441406854450764e-05, "loss": 46.0, "step": 983 }, { "epoch": 0.1660549297557271, "grad_norm": 0.0006417816039174795, "learning_rate": 4.8255499796561564e-05, "loss": 46.0, "step": 984 }, { "epoch": 0.16622368476564148, "grad_norm": 0.0016659084940329194, "learning_rate": 4.806983664704525e-05, "loss": 46.0, "step": 985 }, { "epoch": 0.16639243977555584, "grad_norm": 0.0007746117771603167, "learning_rate": 4.788441828106338e-05, "loss": 46.0, "step": 986 }, { "epoch": 0.16656119478547018, "grad_norm": 0.002332525560632348, "learning_rate": 4.769924557262364e-05, "loss": 46.0, "step": 987 }, { "epoch": 0.16672994979538455, "grad_norm": 0.0006849498022347689, "learning_rate": 4.751431939457579e-05, "loss": 46.0, "step": 988 }, { "epoch": 0.16689870480529892, "grad_norm": 0.0005523057770915329, "learning_rate": 4.732964061860752e-05, "loss": 46.0, "step": 989 }, { "epoch": 0.16706745981521326, "grad_norm": 0.000503736431710422, "learning_rate": 4.71452101152403e-05, "loss": 46.0, "step": 990 }, { "epoch": 0.16723621482512763, "grad_norm": 0.0019155279733240604, "learning_rate": 4.6961028753825364e-05, "loss": 46.0, "step": 991 }, { "epoch": 0.16740496983504197, "grad_norm": 0.00047645490849390626, "learning_rate": 4.677709740253953e-05, "loss": 46.0, "step": 992 }, { "epoch": 0.16757372484495633, "grad_norm": 0.00034740299452096224, "learning_rate": 4.659341692838113e-05, "loss": 46.0, "step": 993 }, { "epoch": 0.1677424798548707, "grad_norm": 0.0007441159687004983, "learning_rate": 4.640998819716605e-05, "loss": 46.0, "step": 994 }, { "epoch": 0.16791123486478504, "grad_norm": 0.0011230899253860116, "learning_rate": 4.622681207352335e-05, "loss": 46.0, "step": 995 }, { "epoch": 0.1680799898746994, "grad_norm": 0.0009723291150294244, "learning_rate": 4.604388942089146e-05, "loss": 46.0, "step": 996 }, { "epoch": 0.16824874488461375, "grad_norm": 0.0005291839479468763, "learning_rate": 4.586122110151414e-05, "loss": 46.0, "step": 997 }, { "epoch": 0.16841749989452812, "grad_norm": 0.0013463158393278718, "learning_rate": 4.5678807976436034e-05, "loss": 46.0, "step": 998 }, { "epoch": 0.16858625490444248, "grad_norm": 0.0005321825738064945, "learning_rate": 4.549665090549913e-05, "loss": 46.0, "step": 999 }, { "epoch": 0.16875500991435682, "grad_norm": 0.0006381895509548485, "learning_rate": 4.531475074733832e-05, "loss": 46.0, "step": 1000 }, { "epoch": 0.1689237649242712, "grad_norm": 0.0013920076889917254, "learning_rate": 4.5133108359377504e-05, "loss": 46.0, "step": 1001 }, { "epoch": 0.16909251993418556, "grad_norm": 0.0016773123061284423, "learning_rate": 4.495172459782556e-05, "loss": 46.0, "step": 1002 }, { "epoch": 0.1692612749440999, "grad_norm": 0.0011099010007455945, "learning_rate": 4.477060031767223e-05, "loss": 46.0, "step": 1003 }, { "epoch": 0.16943002995401427, "grad_norm": 0.0020444022957235575, "learning_rate": 4.4589736372684166e-05, "loss": 46.0, "step": 1004 }, { "epoch": 0.1695987849639286, "grad_norm": 0.0008166442858055234, "learning_rate": 4.440913361540087e-05, "loss": 46.0, "step": 1005 }, { "epoch": 0.16976753997384297, "grad_norm": 0.0013784863986074924, "learning_rate": 4.4228792897130654e-05, "loss": 46.0, "step": 1006 }, { "epoch": 0.16993629498375734, "grad_norm": 0.0005884088459424675, "learning_rate": 4.40487150679468e-05, "loss": 46.0, "step": 1007 }, { "epoch": 0.17010504999367168, "grad_norm": 0.000776112952735275, "learning_rate": 4.386890097668317e-05, "loss": 46.0, "step": 1008 }, { "epoch": 0.17027380500358605, "grad_norm": 0.0008750237757340074, "learning_rate": 4.3689351470930574e-05, "loss": 46.0, "step": 1009 }, { "epoch": 0.1704425600135004, "grad_norm": 0.000761337811127305, "learning_rate": 4.351006739703275e-05, "loss": 46.0, "step": 1010 }, { "epoch": 0.17061131502341476, "grad_norm": 0.0031786442268639803, "learning_rate": 4.333104960008203e-05, "loss": 46.0, "step": 1011 }, { "epoch": 0.17078007003332912, "grad_norm": 0.004400862380862236, "learning_rate": 4.3152298923915734e-05, "loss": 46.0, "step": 1012 }, { "epoch": 0.17094882504324346, "grad_norm": 0.0022119544446468353, "learning_rate": 4.2973816211112095e-05, "loss": 46.0, "step": 1013 }, { "epoch": 0.17111758005315783, "grad_norm": 0.000868487695697695, "learning_rate": 4.279560230298616e-05, "loss": 46.0, "step": 1014 }, { "epoch": 0.1712863350630722, "grad_norm": 0.0006466138293035328, "learning_rate": 4.261765803958594e-05, "loss": 46.0, "step": 1015 }, { "epoch": 0.17145509007298654, "grad_norm": 0.0007957179914228618, "learning_rate": 4.243998425968841e-05, "loss": 46.0, "step": 1016 }, { "epoch": 0.1716238450829009, "grad_norm": 0.00033654290018603206, "learning_rate": 4.2262581800795586e-05, "loss": 46.0, "step": 1017 }, { "epoch": 0.17179260009281525, "grad_norm": 0.003317353082820773, "learning_rate": 4.2085451499130524e-05, "loss": 46.0, "step": 1018 }, { "epoch": 0.1719613551027296, "grad_norm": 0.00047639801050536335, "learning_rate": 4.190859418963341e-05, "loss": 46.0, "step": 1019 }, { "epoch": 0.17213011011264398, "grad_norm": 0.002305058529600501, "learning_rate": 4.173201070595775e-05, "loss": 46.0, "step": 1020 }, { "epoch": 0.17229886512255832, "grad_norm": 0.0012476850533857942, "learning_rate": 4.1555701880466105e-05, "loss": 46.0, "step": 1021 }, { "epoch": 0.1724676201324727, "grad_norm": 0.004061322659254074, "learning_rate": 4.137966854422647e-05, "loss": 46.0, "step": 1022 }, { "epoch": 0.17263637514238703, "grad_norm": 0.0006147538078948855, "learning_rate": 4.120391152700841e-05, "loss": 46.0, "step": 1023 }, { "epoch": 0.1728051301523014, "grad_norm": 0.0011847203131765127, "learning_rate": 4.102843165727873e-05, "loss": 46.0, "step": 1024 }, { "epoch": 0.17297388516221576, "grad_norm": 0.0005954731605015695, "learning_rate": 4.085322976219802e-05, "loss": 46.0, "step": 1025 }, { "epoch": 0.1731426401721301, "grad_norm": 0.0009544877684675157, "learning_rate": 4.0678306667616606e-05, "loss": 46.0, "step": 1026 }, { "epoch": 0.17331139518204447, "grad_norm": 0.0006944774650037289, "learning_rate": 4.050366319807052e-05, "loss": 46.0, "step": 1027 }, { "epoch": 0.17348015019195884, "grad_norm": 0.002084943698719144, "learning_rate": 4.0329300176777775e-05, "loss": 46.0, "step": 1028 }, { "epoch": 0.17364890520187318, "grad_norm": 0.002356353448703885, "learning_rate": 4.015521842563444e-05, "loss": 46.0, "step": 1029 }, { "epoch": 0.17381766021178754, "grad_norm": 0.0005482649430632591, "learning_rate": 3.998141876521074e-05, "loss": 46.0, "step": 1030 }, { "epoch": 0.17398641522170188, "grad_norm": 0.0008993589435704052, "learning_rate": 3.980790201474723e-05, "loss": 46.0, "step": 1031 }, { "epoch": 0.17415517023161625, "grad_norm": 0.0011524191359058022, "learning_rate": 3.9634668992150895e-05, "loss": 46.0, "step": 1032 }, { "epoch": 0.17432392524153062, "grad_norm": 0.0006604307563975453, "learning_rate": 3.9461720513991285e-05, "loss": 46.0, "step": 1033 }, { "epoch": 0.17449268025144496, "grad_norm": 0.0013253578217700124, "learning_rate": 3.928905739549675e-05, "loss": 46.0, "step": 1034 }, { "epoch": 0.17466143526135933, "grad_norm": 0.0005015510250814259, "learning_rate": 3.911668045055047e-05, "loss": 46.0, "step": 1035 }, { "epoch": 0.17483019027127367, "grad_norm": 0.0005467745941132307, "learning_rate": 3.8944590491686816e-05, "loss": 46.0, "step": 1036 }, { "epoch": 0.17499894528118803, "grad_norm": 0.0007600914686918259, "learning_rate": 3.87727883300872e-05, "loss": 46.0, "step": 1037 }, { "epoch": 0.1751677002911024, "grad_norm": 0.0011487057199701667, "learning_rate": 3.860127477557654e-05, "loss": 46.0, "step": 1038 }, { "epoch": 0.17533645530101674, "grad_norm": 0.003172243246808648, "learning_rate": 3.8430050636619406e-05, "loss": 46.0, "step": 1039 }, { "epoch": 0.1755052103109311, "grad_norm": 0.0005271739210002124, "learning_rate": 3.825911672031611e-05, "loss": 46.0, "step": 1040 }, { "epoch": 0.17567396532084545, "grad_norm": 0.001015481655485928, "learning_rate": 3.8088473832398806e-05, "loss": 46.0, "step": 1041 }, { "epoch": 0.17584272033075982, "grad_norm": 0.0004703805025201291, "learning_rate": 3.7918122777228026e-05, "loss": 46.0, "step": 1042 }, { "epoch": 0.17601147534067418, "grad_norm": 0.001103199552744627, "learning_rate": 3.774806435778858e-05, "loss": 46.0, "step": 1043 }, { "epoch": 0.17618023035058852, "grad_norm": 0.0005269388784654438, "learning_rate": 3.757829937568591e-05, "loss": 46.0, "step": 1044 }, { "epoch": 0.1763489853605029, "grad_norm": 0.0017238155705854297, "learning_rate": 3.7408828631142237e-05, "loss": 46.0, "step": 1045 }, { "epoch": 0.17651774037041726, "grad_norm": 0.0006927169160917401, "learning_rate": 3.7239652922992894e-05, "loss": 46.0, "step": 1046 }, { "epoch": 0.1766864953803316, "grad_norm": 0.002759503899142146, "learning_rate": 3.707077304868245e-05, "loss": 46.0, "step": 1047 }, { "epoch": 0.17685525039024597, "grad_norm": 0.0005645275814458728, "learning_rate": 3.690218980426101e-05, "loss": 46.0, "step": 1048 }, { "epoch": 0.1770240054001603, "grad_norm": 0.00040731692570261657, "learning_rate": 3.6733903984380545e-05, "loss": 46.0, "step": 1049 }, { "epoch": 0.17719276041007467, "grad_norm": 0.0008958344697020948, "learning_rate": 3.656591638229088e-05, "loss": 46.0, "step": 1050 }, { "epoch": 0.17736151541998904, "grad_norm": 0.0009003482409752905, "learning_rate": 3.63982277898362e-05, "loss": 46.0, "step": 1051 }, { "epoch": 0.17753027042990338, "grad_norm": 0.00033022521529346704, "learning_rate": 3.6230838997451365e-05, "loss": 46.0, "step": 1052 }, { "epoch": 0.17769902543981775, "grad_norm": 0.0003871631342917681, "learning_rate": 3.606375079415797e-05, "loss": 46.0, "step": 1053 }, { "epoch": 0.1778677804497321, "grad_norm": 0.0018638977780938148, "learning_rate": 3.589696396756058e-05, "loss": 46.0, "step": 1054 }, { "epoch": 0.17803653545964646, "grad_norm": 0.0007231778581626713, "learning_rate": 3.5730479303843446e-05, "loss": 46.0, "step": 1055 }, { "epoch": 0.17820529046956082, "grad_norm": 0.001085842028260231, "learning_rate": 3.556429758776629e-05, "loss": 46.0, "step": 1056 }, { "epoch": 0.17837404547947516, "grad_norm": 0.0005750608397647738, "learning_rate": 3.5398419602660935e-05, "loss": 46.0, "step": 1057 }, { "epoch": 0.17854280048938953, "grad_norm": 0.0007418044842779636, "learning_rate": 3.523284613042745e-05, "loss": 46.0, "step": 1058 }, { "epoch": 0.1787115554993039, "grad_norm": 0.004355406854301691, "learning_rate": 3.506757795153056e-05, "loss": 46.0, "step": 1059 }, { "epoch": 0.17888031050921824, "grad_norm": 0.0003858884156215936, "learning_rate": 3.49026158449959e-05, "loss": 46.0, "step": 1060 }, { "epoch": 0.1790490655191326, "grad_norm": 0.0006274758488871157, "learning_rate": 3.4737960588406374e-05, "loss": 46.0, "step": 1061 }, { "epoch": 0.17921782052904695, "grad_norm": 0.001342911273241043, "learning_rate": 3.457361295789849e-05, "loss": 46.0, "step": 1062 }, { "epoch": 0.1793865755389613, "grad_norm": 0.0006192997680045664, "learning_rate": 3.440957372815872e-05, "loss": 46.0, "step": 1063 }, { "epoch": 0.17955533054887568, "grad_norm": 0.0011153030209243298, "learning_rate": 3.424584367241973e-05, "loss": 46.0, "step": 1064 }, { "epoch": 0.17972408555879002, "grad_norm": 0.0015017263358458877, "learning_rate": 3.408242356245701e-05, "loss": 46.0, "step": 1065 }, { "epoch": 0.1798928405687044, "grad_norm": 0.0004973539616912603, "learning_rate": 3.3919314168584924e-05, "loss": 46.0, "step": 1066 }, { "epoch": 0.18006159557861873, "grad_norm": 0.00341922789812088, "learning_rate": 3.375651625965315e-05, "loss": 46.0, "step": 1067 }, { "epoch": 0.1802303505885331, "grad_norm": 0.0006390162161551416, "learning_rate": 3.3594030603043304e-05, "loss": 46.0, "step": 1068 }, { "epoch": 0.18039910559844746, "grad_norm": 0.00042940647108480334, "learning_rate": 3.3431857964665003e-05, "loss": 46.0, "step": 1069 }, { "epoch": 0.1805678606083618, "grad_norm": 0.0022605019621551037, "learning_rate": 3.326999910895242e-05, "loss": 46.0, "step": 1070 }, { "epoch": 0.18073661561827617, "grad_norm": 0.0012887527700513601, "learning_rate": 3.3108454798860656e-05, "loss": 46.0, "step": 1071 }, { "epoch": 0.18090537062819054, "grad_norm": 0.0010164948180317879, "learning_rate": 3.2947225795862125e-05, "loss": 46.0, "step": 1072 }, { "epoch": 0.18107412563810488, "grad_norm": 0.002262418856844306, "learning_rate": 3.278631285994298e-05, "loss": 46.0, "step": 1073 }, { "epoch": 0.18124288064801924, "grad_norm": 0.00039437160012312233, "learning_rate": 3.2625716749599546e-05, "loss": 46.0, "step": 1074 }, { "epoch": 0.18141163565793358, "grad_norm": 0.0006692737806588411, "learning_rate": 3.246543822183469e-05, "loss": 46.0, "step": 1075 }, { "epoch": 0.18158039066784795, "grad_norm": 0.0014356159372255206, "learning_rate": 3.2305478032154324e-05, "loss": 46.0, "step": 1076 }, { "epoch": 0.18174914567776232, "grad_norm": 0.001208776026032865, "learning_rate": 3.2145836934563745e-05, "loss": 46.0, "step": 1077 }, { "epoch": 0.18191790068767666, "grad_norm": 0.0006577944732271135, "learning_rate": 3.198651568156426e-05, "loss": 46.0, "step": 1078 }, { "epoch": 0.18208665569759103, "grad_norm": 0.0010622587287798524, "learning_rate": 3.1827515024149445e-05, "loss": 46.0, "step": 1079 }, { "epoch": 0.18225541070750537, "grad_norm": 0.0010634633945301175, "learning_rate": 3.166883571180159e-05, "loss": 46.0, "step": 1080 }, { "epoch": 0.18242416571741973, "grad_norm": 0.0010152349714189768, "learning_rate": 3.151047849248847e-05, "loss": 46.0, "step": 1081 }, { "epoch": 0.1825929207273341, "grad_norm": 0.0008210540981963277, "learning_rate": 3.1352444112659484e-05, "loss": 46.0, "step": 1082 }, { "epoch": 0.18276167573724844, "grad_norm": 0.0006314092315733433, "learning_rate": 3.119473331724219e-05, "loss": 46.0, "step": 1083 }, { "epoch": 0.1829304307471628, "grad_norm": 0.0015025155153125525, "learning_rate": 3.103734684963902e-05, "loss": 46.0, "step": 1084 }, { "epoch": 0.18309918575707718, "grad_norm": 0.0008753265137784183, "learning_rate": 3.088028545172352e-05, "loss": 46.0, "step": 1085 }, { "epoch": 0.18326794076699152, "grad_norm": 0.0014098742976784706, "learning_rate": 3.0723549863836996e-05, "loss": 46.0, "step": 1086 }, { "epoch": 0.18343669577690588, "grad_norm": 0.0014926984440535307, "learning_rate": 3.056714082478496e-05, "loss": 46.0, "step": 1087 }, { "epoch": 0.18360545078682022, "grad_norm": 0.0008304324583150446, "learning_rate": 3.0411059071833668e-05, "loss": 46.0, "step": 1088 }, { "epoch": 0.1837742057967346, "grad_norm": 0.0004860296321567148, "learning_rate": 3.025530534070664e-05, "loss": 46.0, "step": 1089 }, { "epoch": 0.18394296080664896, "grad_norm": 0.0006332461489364505, "learning_rate": 3.00998803655812e-05, "loss": 46.0, "step": 1090 }, { "epoch": 0.1841117158165633, "grad_norm": 0.0013554253382608294, "learning_rate": 2.9944784879085065e-05, "loss": 46.0, "step": 1091 }, { "epoch": 0.18428047082647767, "grad_norm": 0.0008776708273217082, "learning_rate": 2.979001961229281e-05, "loss": 46.0, "step": 1092 }, { "epoch": 0.184449225836392, "grad_norm": 0.001277380739338696, "learning_rate": 2.9635585294722336e-05, "loss": 46.0, "step": 1093 }, { "epoch": 0.18461798084630637, "grad_norm": 0.000868733914103359, "learning_rate": 2.948148265433174e-05, "loss": 46.0, "step": 1094 }, { "epoch": 0.18478673585622074, "grad_norm": 0.0004029095871374011, "learning_rate": 2.9327712417515597e-05, "loss": 46.0, "step": 1095 }, { "epoch": 0.18478673585622074, "eval_loss": 11.5, "eval_runtime": 14.1998, "eval_samples_per_second": 175.707, "eval_steps_per_second": 87.888, "step": 1095 }, { "epoch": 0.18495549086613508, "grad_norm": 0.0018527479842305183, "learning_rate": 2.917427530910154e-05, "loss": 46.0, "step": 1096 }, { "epoch": 0.18512424587604945, "grad_norm": 0.000325352099025622, "learning_rate": 2.9021172052347113e-05, "loss": 46.0, "step": 1097 }, { "epoch": 0.18529300088596382, "grad_norm": 0.0008033345802687109, "learning_rate": 2.886840336893606e-05, "loss": 46.0, "step": 1098 }, { "epoch": 0.18546175589587816, "grad_norm": 0.003062557429075241, "learning_rate": 2.871596997897509e-05, "loss": 46.0, "step": 1099 }, { "epoch": 0.18563051090579252, "grad_norm": 0.0005990050849504769, "learning_rate": 2.8563872600990394e-05, "loss": 46.0, "step": 1100 }, { "epoch": 0.18579926591570686, "grad_norm": 0.00030623353086411953, "learning_rate": 2.841211195192436e-05, "loss": 46.0, "step": 1101 }, { "epoch": 0.18596802092562123, "grad_norm": 0.0005539836129173636, "learning_rate": 2.826068874713208e-05, "loss": 46.0, "step": 1102 }, { "epoch": 0.1861367759355356, "grad_norm": 0.0013511950382962823, "learning_rate": 2.8109603700378074e-05, "loss": 46.0, "step": 1103 }, { "epoch": 0.18630553094544994, "grad_norm": 0.0009734542109072208, "learning_rate": 2.795885752383284e-05, "loss": 46.0, "step": 1104 }, { "epoch": 0.1864742859553643, "grad_norm": 0.000757011875975877, "learning_rate": 2.780845092806964e-05, "loss": 46.0, "step": 1105 }, { "epoch": 0.18664304096527864, "grad_norm": 0.0009687045239843428, "learning_rate": 2.7658384622060873e-05, "loss": 46.0, "step": 1106 }, { "epoch": 0.186811795975193, "grad_norm": 0.0008338790503330529, "learning_rate": 2.7508659313175112e-05, "loss": 46.0, "step": 1107 }, { "epoch": 0.18698055098510738, "grad_norm": 0.000879534287378192, "learning_rate": 2.735927570717348e-05, "loss": 46.0, "step": 1108 }, { "epoch": 0.18714930599502172, "grad_norm": 0.000895138131454587, "learning_rate": 2.7210234508206313e-05, "loss": 46.0, "step": 1109 }, { "epoch": 0.1873180610049361, "grad_norm": 0.0016813467955216765, "learning_rate": 2.706153641881013e-05, "loss": 46.0, "step": 1110 }, { "epoch": 0.18748681601485043, "grad_norm": 0.00046861334703862667, "learning_rate": 2.6913182139904014e-05, "loss": 46.0, "step": 1111 }, { "epoch": 0.1876555710247648, "grad_norm": 0.0008181455777958035, "learning_rate": 2.6765172370786463e-05, "loss": 46.0, "step": 1112 }, { "epoch": 0.18782432603467916, "grad_norm": 0.000511069200001657, "learning_rate": 2.6617507809132038e-05, "loss": 46.0, "step": 1113 }, { "epoch": 0.1879930810445935, "grad_norm": 0.0009120728354901075, "learning_rate": 2.647018915098809e-05, "loss": 46.0, "step": 1114 }, { "epoch": 0.18816183605450787, "grad_norm": 0.0011518978280946612, "learning_rate": 2.6323217090771512e-05, "loss": 46.0, "step": 1115 }, { "epoch": 0.18833059106442224, "grad_norm": 0.001370842568576336, "learning_rate": 2.6176592321265425e-05, "loss": 46.0, "step": 1116 }, { "epoch": 0.18849934607433658, "grad_norm": 0.002501879585906863, "learning_rate": 2.6030315533615878e-05, "loss": 46.0, "step": 1117 }, { "epoch": 0.18866810108425094, "grad_norm": 0.0005397344939410686, "learning_rate": 2.588438741732876e-05, "loss": 46.0, "step": 1118 }, { "epoch": 0.18883685609416528, "grad_norm": 0.0013044921215623617, "learning_rate": 2.5738808660266235e-05, "loss": 46.0, "step": 1119 }, { "epoch": 0.18900561110407965, "grad_norm": 0.0007338232826441526, "learning_rate": 2.5593579948643874e-05, "loss": 46.0, "step": 1120 }, { "epoch": 0.18917436611399402, "grad_norm": 0.0011662261094897985, "learning_rate": 2.5448701967027167e-05, "loss": 46.0, "step": 1121 }, { "epoch": 0.18934312112390836, "grad_norm": 0.00048239415627904236, "learning_rate": 2.5304175398328278e-05, "loss": 46.0, "step": 1122 }, { "epoch": 0.18951187613382273, "grad_norm": 0.002095632255077362, "learning_rate": 2.5160000923803063e-05, "loss": 46.0, "step": 1123 }, { "epoch": 0.18968063114373707, "grad_norm": 0.00038515665801241994, "learning_rate": 2.501617922304763e-05, "loss": 46.0, "step": 1124 }, { "epoch": 0.18984938615365143, "grad_norm": 0.0008104884182102978, "learning_rate": 2.4872710973995204e-05, "loss": 46.0, "step": 1125 }, { "epoch": 0.1900181411635658, "grad_norm": 0.0006576053565368056, "learning_rate": 2.4729596852912996e-05, "loss": 46.0, "step": 1126 }, { "epoch": 0.19018689617348014, "grad_norm": 0.0006934543489478528, "learning_rate": 2.4586837534398898e-05, "loss": 46.0, "step": 1127 }, { "epoch": 0.1903556511833945, "grad_norm": 0.0016504209488630295, "learning_rate": 2.444443369137841e-05, "loss": 46.0, "step": 1128 }, { "epoch": 0.19052440619330888, "grad_norm": 0.0016103170346468687, "learning_rate": 2.4302385995101417e-05, "loss": 46.0, "step": 1129 }, { "epoch": 0.19069316120322322, "grad_norm": 0.0009479466243647039, "learning_rate": 2.4160695115138998e-05, "loss": 46.0, "step": 1130 }, { "epoch": 0.19086191621313758, "grad_norm": 0.0006660729413852096, "learning_rate": 2.401936171938043e-05, "loss": 46.0, "step": 1131 }, { "epoch": 0.19103067122305192, "grad_norm": 0.002802348928526044, "learning_rate": 2.3878386474029724e-05, "loss": 46.0, "step": 1132 }, { "epoch": 0.1911994262329663, "grad_norm": 0.0021046160254627466, "learning_rate": 2.373777004360278e-05, "loss": 46.0, "step": 1133 }, { "epoch": 0.19136818124288066, "grad_norm": 0.0005733026191592216, "learning_rate": 2.3597513090924228e-05, "loss": 46.0, "step": 1134 }, { "epoch": 0.191536936252795, "grad_norm": 0.001300279633142054, "learning_rate": 2.3457616277124018e-05, "loss": 46.0, "step": 1135 }, { "epoch": 0.19170569126270937, "grad_norm": 0.0020821443758904934, "learning_rate": 2.3318080261634723e-05, "loss": 46.0, "step": 1136 }, { "epoch": 0.1918744462726237, "grad_norm": 0.0010038736509159207, "learning_rate": 2.3178905702188124e-05, "loss": 46.0, "step": 1137 }, { "epoch": 0.19204320128253807, "grad_norm": 0.0026241031009703875, "learning_rate": 2.3040093254812123e-05, "loss": 46.0, "step": 1138 }, { "epoch": 0.19221195629245244, "grad_norm": 0.0008017763611860573, "learning_rate": 2.2901643573827914e-05, "loss": 46.0, "step": 1139 }, { "epoch": 0.19238071130236678, "grad_norm": 0.0006083068437874317, "learning_rate": 2.2763557311846583e-05, "loss": 46.0, "step": 1140 }, { "epoch": 0.19254946631228115, "grad_norm": 0.0014171472284942865, "learning_rate": 2.2625835119766202e-05, "loss": 46.0, "step": 1141 }, { "epoch": 0.19271822132219552, "grad_norm": 0.00046038886648602784, "learning_rate": 2.2488477646768724e-05, "loss": 46.0, "step": 1142 }, { "epoch": 0.19288697633210986, "grad_norm": 0.0006552223348990083, "learning_rate": 2.235148554031694e-05, "loss": 46.0, "step": 1143 }, { "epoch": 0.19305573134202422, "grad_norm": 0.0009186275419779122, "learning_rate": 2.221485944615137e-05, "loss": 46.0, "step": 1144 }, { "epoch": 0.19322448635193856, "grad_norm": 0.0005543860606849194, "learning_rate": 2.207860000828731e-05, "loss": 46.0, "step": 1145 }, { "epoch": 0.19339324136185293, "grad_norm": 0.0007307975320145488, "learning_rate": 2.1942707869011682e-05, "loss": 46.0, "step": 1146 }, { "epoch": 0.1935619963717673, "grad_norm": 0.0009457955020479858, "learning_rate": 2.1807183668880194e-05, "loss": 46.0, "step": 1147 }, { "epoch": 0.19373075138168164, "grad_norm": 0.0009510859963484108, "learning_rate": 2.1672028046713998e-05, "loss": 46.0, "step": 1148 }, { "epoch": 0.193899506391596, "grad_norm": 0.0012068504001945257, "learning_rate": 2.1537241639597082e-05, "loss": 46.0, "step": 1149 }, { "epoch": 0.19406826140151034, "grad_norm": 0.0008088087779469788, "learning_rate": 2.1402825082872968e-05, "loss": 46.0, "step": 1150 }, { "epoch": 0.1942370164114247, "grad_norm": 0.00042066810419782996, "learning_rate": 2.1268779010141737e-05, "loss": 46.0, "step": 1151 }, { "epoch": 0.19440577142133908, "grad_norm": 0.0008168341009877622, "learning_rate": 2.1135104053257272e-05, "loss": 46.0, "step": 1152 }, { "epoch": 0.19457452643125342, "grad_norm": 0.0011002906830981374, "learning_rate": 2.1001800842324026e-05, "loss": 46.0, "step": 1153 }, { "epoch": 0.1947432814411678, "grad_norm": 0.0006414004019461572, "learning_rate": 2.0868870005694173e-05, "loss": 46.0, "step": 1154 }, { "epoch": 0.19491203645108215, "grad_norm": 0.0005531748756766319, "learning_rate": 2.0736312169964635e-05, "loss": 46.0, "step": 1155 }, { "epoch": 0.1950807914609965, "grad_norm": 0.000641561346128583, "learning_rate": 2.0604127959974107e-05, "loss": 46.0, "step": 1156 }, { "epoch": 0.19524954647091086, "grad_norm": 0.0006983129424043, "learning_rate": 2.0472317998800106e-05, "loss": 46.0, "step": 1157 }, { "epoch": 0.1954183014808252, "grad_norm": 0.0006936094141565263, "learning_rate": 2.0340882907756076e-05, "loss": 46.0, "step": 1158 }, { "epoch": 0.19558705649073957, "grad_norm": 0.0006084066699258983, "learning_rate": 2.020982330638841e-05, "loss": 46.0, "step": 1159 }, { "epoch": 0.19575581150065394, "grad_norm": 0.004117981996387243, "learning_rate": 2.0079139812473636e-05, "loss": 46.0, "step": 1160 }, { "epoch": 0.19592456651056828, "grad_norm": 0.0016725576715543866, "learning_rate": 1.994883304201527e-05, "loss": 46.0, "step": 1161 }, { "epoch": 0.19609332152048264, "grad_norm": 0.000729929655790329, "learning_rate": 1.981890360924116e-05, "loss": 46.0, "step": 1162 }, { "epoch": 0.19626207653039698, "grad_norm": 0.0005720785120502114, "learning_rate": 1.9689352126600536e-05, "loss": 46.0, "step": 1163 }, { "epoch": 0.19643083154031135, "grad_norm": 0.0008306180825456977, "learning_rate": 1.9560179204760954e-05, "loss": 46.0, "step": 1164 }, { "epoch": 0.19659958655022572, "grad_norm": 0.0006977242301218212, "learning_rate": 1.943138545260569e-05, "loss": 46.0, "step": 1165 }, { "epoch": 0.19676834156014006, "grad_norm": 0.0005357645568437874, "learning_rate": 1.9302971477230613e-05, "loss": 46.0, "step": 1166 }, { "epoch": 0.19693709657005443, "grad_norm": 0.0009179019252769649, "learning_rate": 1.9174937883941503e-05, "loss": 46.0, "step": 1167 }, { "epoch": 0.1971058515799688, "grad_norm": 0.000713715679012239, "learning_rate": 1.904728527625108e-05, "loss": 46.0, "step": 1168 }, { "epoch": 0.19727460658988313, "grad_norm": 0.0006716207135468721, "learning_rate": 1.8920014255876263e-05, "loss": 46.0, "step": 1169 }, { "epoch": 0.1974433615997975, "grad_norm": 0.0018655718304216862, "learning_rate": 1.8793125422735235e-05, "loss": 46.0, "step": 1170 }, { "epoch": 0.19761211660971184, "grad_norm": 0.0017995084635913372, "learning_rate": 1.8666619374944684e-05, "loss": 46.0, "step": 1171 }, { "epoch": 0.1977808716196262, "grad_norm": 0.001270498731173575, "learning_rate": 1.8540496708816935e-05, "loss": 46.0, "step": 1172 }, { "epoch": 0.19794962662954058, "grad_norm": 0.0005029829917475581, "learning_rate": 1.8414758018857248e-05, "loss": 46.0, "step": 1173 }, { "epoch": 0.19811838163945492, "grad_norm": 0.0007416327716782689, "learning_rate": 1.8289403897760815e-05, "loss": 46.0, "step": 1174 }, { "epoch": 0.19828713664936928, "grad_norm": 0.0004362264589872211, "learning_rate": 1.8164434936410114e-05, "loss": 46.0, "step": 1175 }, { "epoch": 0.19845589165928362, "grad_norm": 0.0005498372483998537, "learning_rate": 1.8039851723872204e-05, "loss": 46.0, "step": 1176 }, { "epoch": 0.198624646669198, "grad_norm": 0.0005490055773407221, "learning_rate": 1.7915654847395646e-05, "loss": 46.0, "step": 1177 }, { "epoch": 0.19879340167911236, "grad_norm": 0.0007399431779049337, "learning_rate": 1.7791844892408104e-05, "loss": 46.0, "step": 1178 }, { "epoch": 0.1989621566890267, "grad_norm": 0.00065398751758039, "learning_rate": 1.766842244251332e-05, "loss": 46.0, "step": 1179 }, { "epoch": 0.19913091169894107, "grad_norm": 0.0003902087628375739, "learning_rate": 1.7545388079488455e-05, "loss": 46.0, "step": 1180 }, { "epoch": 0.19929966670885543, "grad_norm": 0.0006421052967198193, "learning_rate": 1.7422742383281355e-05, "loss": 46.0, "step": 1181 }, { "epoch": 0.19946842171876977, "grad_norm": 0.000821156136225909, "learning_rate": 1.7300485932007794e-05, "loss": 46.0, "step": 1182 }, { "epoch": 0.19963717672868414, "grad_norm": 0.000910441973246634, "learning_rate": 1.7178619301948774e-05, "loss": 46.0, "step": 1183 }, { "epoch": 0.19980593173859848, "grad_norm": 0.0008174055255949497, "learning_rate": 1.7057143067547788e-05, "loss": 46.0, "step": 1184 }, { "epoch": 0.19997468674851285, "grad_norm": 0.0007186917937360704, "learning_rate": 1.69360578014081e-05, "loss": 46.0, "step": 1185 }, { "epoch": 0.20014344175842722, "grad_norm": 0.000985615304671228, "learning_rate": 1.6815364074290153e-05, "loss": 46.0, "step": 1186 }, { "epoch": 0.20031219676834155, "grad_norm": 0.0004897791077382863, "learning_rate": 1.6695062455108646e-05, "loss": 46.0, "step": 1187 }, { "epoch": 0.20048095177825592, "grad_norm": 0.0006414767703972757, "learning_rate": 1.6575153510930065e-05, "loss": 46.0, "step": 1188 }, { "epoch": 0.20064970678817026, "grad_norm": 0.00031621320522390306, "learning_rate": 1.6455637806970027e-05, "loss": 46.0, "step": 1189 }, { "epoch": 0.20081846179808463, "grad_norm": 0.004970474634319544, "learning_rate": 1.6336515906590354e-05, "loss": 46.0, "step": 1190 }, { "epoch": 0.200987216807999, "grad_norm": 0.0018685179529711604, "learning_rate": 1.621778837129676e-05, "loss": 46.0, "step": 1191 }, { "epoch": 0.20115597181791334, "grad_norm": 0.0012024708557873964, "learning_rate": 1.6099455760735937e-05, "loss": 46.0, "step": 1192 }, { "epoch": 0.2013247268278277, "grad_norm": 0.0004908541450276971, "learning_rate": 1.598151863269306e-05, "loss": 46.0, "step": 1193 }, { "epoch": 0.20149348183774204, "grad_norm": 0.0007760179578326643, "learning_rate": 1.5863977543089092e-05, "loss": 46.0, "step": 1194 }, { "epoch": 0.2016622368476564, "grad_norm": 0.00047840975457802415, "learning_rate": 1.5746833045978216e-05, "loss": 46.0, "step": 1195 }, { "epoch": 0.20183099185757078, "grad_norm": 0.0011836383491754532, "learning_rate": 1.563008569354516e-05, "loss": 46.0, "step": 1196 }, { "epoch": 0.20199974686748512, "grad_norm": 0.000663977290969342, "learning_rate": 1.5513736036102644e-05, "loss": 46.0, "step": 1197 }, { "epoch": 0.2021685018773995, "grad_norm": 0.0010960629442706704, "learning_rate": 1.5397784622088772e-05, "loss": 46.0, "step": 1198 }, { "epoch": 0.20233725688731385, "grad_norm": 0.000916484568733722, "learning_rate": 1.52822319980645e-05, "loss": 46.0, "step": 1199 }, { "epoch": 0.2025060118972282, "grad_norm": 0.0016858198214322329, "learning_rate": 1.516707870871089e-05, "loss": 46.0, "step": 1200 }, { "epoch": 0.20267476690714256, "grad_norm": 0.0004934574826620519, "learning_rate": 1.5052325296826708e-05, "loss": 46.0, "step": 1201 }, { "epoch": 0.2028435219170569, "grad_norm": 0.0013131732121109962, "learning_rate": 1.4937972303325909e-05, "loss": 46.0, "step": 1202 }, { "epoch": 0.20301227692697127, "grad_norm": 0.0024639791809022427, "learning_rate": 1.4824020267234828e-05, "loss": 46.0, "step": 1203 }, { "epoch": 0.20318103193688564, "grad_norm": 0.001286798040382564, "learning_rate": 1.471046972568989e-05, "loss": 46.0, "step": 1204 }, { "epoch": 0.20334978694679998, "grad_norm": 0.0017689433880150318, "learning_rate": 1.4597321213935045e-05, "loss": 46.0, "step": 1205 }, { "epoch": 0.20351854195671434, "grad_norm": 0.0009689538856036961, "learning_rate": 1.4484575265319112e-05, "loss": 46.0, "step": 1206 }, { "epoch": 0.20368729696662868, "grad_norm": 0.0009614164009690285, "learning_rate": 1.4372232411293373e-05, "loss": 46.0, "step": 1207 }, { "epoch": 0.20385605197654305, "grad_norm": 0.0008921355474740267, "learning_rate": 1.4260293181409023e-05, "loss": 46.0, "step": 1208 }, { "epoch": 0.20402480698645742, "grad_norm": 0.0005731172277592123, "learning_rate": 1.414875810331473e-05, "loss": 46.0, "step": 1209 }, { "epoch": 0.20419356199637176, "grad_norm": 0.0005115303792990744, "learning_rate": 1.4037627702754064e-05, "loss": 46.0, "step": 1210 }, { "epoch": 0.20436231700628613, "grad_norm": 0.0010284315794706345, "learning_rate": 1.3926902503563099e-05, "loss": 46.0, "step": 1211 }, { "epoch": 0.2045310720162005, "grad_norm": 0.0005095731467008591, "learning_rate": 1.3816583027667895e-05, "loss": 46.0, "step": 1212 }, { "epoch": 0.20469982702611483, "grad_norm": 0.0006683744722977281, "learning_rate": 1.370666979508206e-05, "loss": 46.0, "step": 1213 }, { "epoch": 0.2048685820360292, "grad_norm": 0.0013482188805937767, "learning_rate": 1.3597163323904238e-05, "loss": 46.0, "step": 1214 }, { "epoch": 0.20503733704594354, "grad_norm": 0.0006872873054817319, "learning_rate": 1.3488064130315858e-05, "loss": 46.0, "step": 1215 }, { "epoch": 0.2052060920558579, "grad_norm": 0.000505985866766423, "learning_rate": 1.3379372728578387e-05, "loss": 46.0, "step": 1216 }, { "epoch": 0.20537484706577228, "grad_norm": 0.0005020827520638704, "learning_rate": 1.3271089631031152e-05, "loss": 46.0, "step": 1217 }, { "epoch": 0.20554360207568662, "grad_norm": 0.0010777136776596308, "learning_rate": 1.316321534808893e-05, "loss": 46.0, "step": 1218 }, { "epoch": 0.20571235708560098, "grad_norm": 0.0018740965751931071, "learning_rate": 1.3055750388239374e-05, "loss": 46.0, "step": 1219 }, { "epoch": 0.20588111209551532, "grad_norm": 0.0017112314235419035, "learning_rate": 1.2948695258040734e-05, "loss": 46.0, "step": 1220 }, { "epoch": 0.2060498671054297, "grad_norm": 0.0021086453925818205, "learning_rate": 1.2842050462119426e-05, "loss": 46.0, "step": 1221 }, { "epoch": 0.20621862211534406, "grad_norm": 0.0006901904125697911, "learning_rate": 1.2735816503167708e-05, "loss": 46.0, "step": 1222 }, { "epoch": 0.2063873771252584, "grad_norm": 0.0009343160782009363, "learning_rate": 1.2629993881941249e-05, "loss": 46.0, "step": 1223 }, { "epoch": 0.20655613213517277, "grad_norm": 0.0008830896113067865, "learning_rate": 1.2524583097256793e-05, "loss": 46.0, "step": 1224 }, { "epoch": 0.20672488714508713, "grad_norm": 0.0018291313899680972, "learning_rate": 1.2419584645989823e-05, "loss": 46.0, "step": 1225 }, { "epoch": 0.20689364215500147, "grad_norm": 0.00036748184356838465, "learning_rate": 1.2314999023072182e-05, "loss": 46.0, "step": 1226 }, { "epoch": 0.20706239716491584, "grad_norm": 0.0010965716792270541, "learning_rate": 1.2210826721489765e-05, "loss": 46.0, "step": 1227 }, { "epoch": 0.20723115217483018, "grad_norm": 0.0008960228879004717, "learning_rate": 1.2107068232280238e-05, "loss": 46.0, "step": 1228 }, { "epoch": 0.20739990718474455, "grad_norm": 0.004184830002486706, "learning_rate": 1.2003724044530596e-05, "loss": 46.0, "step": 1229 }, { "epoch": 0.20756866219465891, "grad_norm": 0.0006089484668336809, "learning_rate": 1.190079464537498e-05, "loss": 46.0, "step": 1230 }, { "epoch": 0.20773741720457325, "grad_norm": 0.0005351770669221878, "learning_rate": 1.1798280519992366e-05, "loss": 46.0, "step": 1231 }, { "epoch": 0.20790617221448762, "grad_norm": 0.000453771062893793, "learning_rate": 1.169618215160424e-05, "loss": 46.0, "step": 1232 }, { "epoch": 0.20807492722440196, "grad_norm": 0.0010923264781013131, "learning_rate": 1.1594500021472243e-05, "loss": 46.0, "step": 1233 }, { "epoch": 0.20824368223431633, "grad_norm": 0.0011471729958429933, "learning_rate": 1.1493234608896141e-05, "loss": 46.0, "step": 1234 }, { "epoch": 0.2084124372442307, "grad_norm": 0.0009632937726564705, "learning_rate": 1.1392386391211307e-05, "loss": 46.0, "step": 1235 }, { "epoch": 0.20858119225414504, "grad_norm": 0.0008420674130320549, "learning_rate": 1.1291955843786617e-05, "loss": 46.0, "step": 1236 }, { "epoch": 0.2087499472640594, "grad_norm": 0.0013222956331446767, "learning_rate": 1.1191943440022179e-05, "loss": 46.0, "step": 1237 }, { "epoch": 0.20891870227397377, "grad_norm": 0.001955078449100256, "learning_rate": 1.1092349651347055e-05, "loss": 46.0, "step": 1238 }, { "epoch": 0.2090874572838881, "grad_norm": 0.0019749640487134457, "learning_rate": 1.0993174947217144e-05, "loss": 46.0, "step": 1239 }, { "epoch": 0.20925621229380248, "grad_norm": 0.0010516421170905232, "learning_rate": 1.089441979511282e-05, "loss": 46.0, "step": 1240 }, { "epoch": 0.20942496730371682, "grad_norm": 0.0008922962588258088, "learning_rate": 1.079608466053692e-05, "loss": 46.0, "step": 1241 }, { "epoch": 0.2095937223136312, "grad_norm": 0.0005941609852015972, "learning_rate": 1.0698170007012332e-05, "loss": 46.0, "step": 1242 }, { "epoch": 0.20976247732354555, "grad_norm": 0.0004414799332153052, "learning_rate": 1.060067629607996e-05, "loss": 46.0, "step": 1243 }, { "epoch": 0.2099312323334599, "grad_norm": 0.002375362440943718, "learning_rate": 1.0503603987296562e-05, "loss": 46.0, "step": 1244 }, { "epoch": 0.21009998734337426, "grad_norm": 0.0010825851932168007, "learning_rate": 1.0406953538232479e-05, "loss": 46.0, "step": 1245 }, { "epoch": 0.2102687423532886, "grad_norm": 0.0012761763064190745, "learning_rate": 1.0310725404469479e-05, "loss": 46.0, "step": 1246 }, { "epoch": 0.21043749736320297, "grad_norm": 0.0008807352278381586, "learning_rate": 1.0214920039598774e-05, "loss": 46.0, "step": 1247 }, { "epoch": 0.21060625237311734, "grad_norm": 0.0006370813935063779, "learning_rate": 1.011953789521869e-05, "loss": 46.0, "step": 1248 }, { "epoch": 0.21077500738303168, "grad_norm": 0.0006687640561722219, "learning_rate": 1.0024579420932633e-05, "loss": 46.0, "step": 1249 }, { "epoch": 0.21094376239294604, "grad_norm": 0.0018508404027670622, "learning_rate": 9.93004506434696e-06, "loss": 46.0, "step": 1250 }, { "epoch": 0.2111125174028604, "grad_norm": 0.00048345469986088574, "learning_rate": 9.835935271068842e-06, "loss": 46.0, "step": 1251 }, { "epoch": 0.21128127241277475, "grad_norm": 0.0006890186341479421, "learning_rate": 9.74225048470422e-06, "loss": 46.0, "step": 1252 }, { "epoch": 0.21145002742268912, "grad_norm": 0.0008637924329377711, "learning_rate": 9.648991146855636e-06, "loss": 46.0, "step": 1253 }, { "epoch": 0.21161878243260346, "grad_norm": 0.0007931495201773942, "learning_rate": 9.556157697120215e-06, "loss": 46.0, "step": 1254 }, { "epoch": 0.21178753744251783, "grad_norm": 0.0005852460162714124, "learning_rate": 9.463750573087571e-06, "loss": 46.0, "step": 1255 }, { "epoch": 0.2119562924524322, "grad_norm": 0.0008610963704995811, "learning_rate": 9.371770210337727e-06, "loss": 46.0, "step": 1256 }, { "epoch": 0.21212504746234653, "grad_norm": 0.000635376840364188, "learning_rate": 9.280217042439122e-06, "loss": 46.0, "step": 1257 }, { "epoch": 0.2122938024722609, "grad_norm": 0.0008849898586049676, "learning_rate": 9.18909150094649e-06, "loss": 46.0, "step": 1258 }, { "epoch": 0.21246255748217524, "grad_norm": 0.0008719837060198188, "learning_rate": 9.098394015398814e-06, "loss": 46.0, "step": 1259 }, { "epoch": 0.2126313124920896, "grad_norm": 0.0021515190601348877, "learning_rate": 9.008125013317448e-06, "loss": 46.0, "step": 1260 }, { "epoch": 0.21280006750200398, "grad_norm": 0.00268219574354589, "learning_rate": 8.918284920203934e-06, "loss": 46.0, "step": 1261 }, { "epoch": 0.21296882251191832, "grad_norm": 0.0009798301616683602, "learning_rate": 8.828874159538091e-06, "loss": 46.0, "step": 1262 }, { "epoch": 0.21313757752183268, "grad_norm": 0.000541876011993736, "learning_rate": 8.739893152775958e-06, "loss": 46.0, "step": 1263 }, { "epoch": 0.21330633253174702, "grad_norm": 0.00041087419958785176, "learning_rate": 8.651342319347867e-06, "loss": 46.0, "step": 1264 }, { "epoch": 0.2134750875416614, "grad_norm": 0.0012815961381420493, "learning_rate": 8.563222076656429e-06, "loss": 46.0, "step": 1265 }, { "epoch": 0.21364384255157576, "grad_norm": 0.000593251024838537, "learning_rate": 8.475532840074585e-06, "loss": 46.0, "step": 1266 }, { "epoch": 0.2138125975614901, "grad_norm": 0.00048146312474273145, "learning_rate": 8.388275022943648e-06, "loss": 46.0, "step": 1267 }, { "epoch": 0.21398135257140447, "grad_norm": 0.0008700843318365514, "learning_rate": 8.301449036571319e-06, "loss": 46.0, "step": 1268 }, { "epoch": 0.21415010758131883, "grad_norm": 0.0005789704155176878, "learning_rate": 8.215055290229779e-06, "loss": 46.0, "step": 1269 }, { "epoch": 0.21431886259123317, "grad_norm": 0.0015106883365660906, "learning_rate": 8.129094191153808e-06, "loss": 46.0, "step": 1270 }, { "epoch": 0.21448761760114754, "grad_norm": 0.0004934304743073881, "learning_rate": 8.04356614453874e-06, "loss": 46.0, "step": 1271 }, { "epoch": 0.21465637261106188, "grad_norm": 0.0019693290814757347, "learning_rate": 7.95847155353865e-06, "loss": 46.0, "step": 1272 }, { "epoch": 0.21482512762097625, "grad_norm": 0.002005916088819504, "learning_rate": 7.873810819264483e-06, "loss": 46.0, "step": 1273 }, { "epoch": 0.21499388263089061, "grad_norm": 0.0012562709162011743, "learning_rate": 7.789584340782075e-06, "loss": 46.0, "step": 1274 }, { "epoch": 0.21516263764080495, "grad_norm": 0.0009011728689074516, "learning_rate": 7.705792515110232e-06, "loss": 46.0, "step": 1275 }, { "epoch": 0.21533139265071932, "grad_norm": 0.0014264247147366405, "learning_rate": 7.622435737219069e-06, "loss": 46.0, "step": 1276 }, { "epoch": 0.21550014766063366, "grad_norm": 0.0006953159463591874, "learning_rate": 7.539514400027925e-06, "loss": 46.0, "step": 1277 }, { "epoch": 0.21566890267054803, "grad_norm": 0.001039333757944405, "learning_rate": 7.457028894403628e-06, "loss": 46.0, "step": 1278 }, { "epoch": 0.2158376576804624, "grad_norm": 0.000679376011248678, "learning_rate": 7.374979609158617e-06, "loss": 46.0, "step": 1279 }, { "epoch": 0.21600641269037674, "grad_norm": 0.000941453967243433, "learning_rate": 7.293366931049128e-06, "loss": 46.0, "step": 1280 }, { "epoch": 0.2161751677002911, "grad_norm": 0.0007799931336194277, "learning_rate": 7.212191244773336e-06, "loss": 46.0, "step": 1281 }, { "epoch": 0.21634392271020547, "grad_norm": 0.0007033965666778386, "learning_rate": 7.131452932969595e-06, "loss": 46.0, "step": 1282 }, { "epoch": 0.2165126777201198, "grad_norm": 0.001040134928189218, "learning_rate": 7.051152376214565e-06, "loss": 46.0, "step": 1283 }, { "epoch": 0.21668143273003418, "grad_norm": 0.0005914743524044752, "learning_rate": 6.9712899530215095e-06, "loss": 46.0, "step": 1284 }, { "epoch": 0.21685018773994852, "grad_norm": 0.0005753316800110042, "learning_rate": 6.891866039838391e-06, "loss": 46.0, "step": 1285 }, { "epoch": 0.2170189427498629, "grad_norm": 0.00033438560785725713, "learning_rate": 6.812881011046246e-06, "loss": 46.0, "step": 1286 }, { "epoch": 0.21718769775977725, "grad_norm": 0.000527498428709805, "learning_rate": 6.734335238957301e-06, "loss": 46.0, "step": 1287 }, { "epoch": 0.2173564527696916, "grad_norm": 0.0015326968859881163, "learning_rate": 6.656229093813182e-06, "loss": 46.0, "step": 1288 }, { "epoch": 0.21752520777960596, "grad_norm": 0.0006215130561031401, "learning_rate": 6.578562943783384e-06, "loss": 46.0, "step": 1289 }, { "epoch": 0.2176939627895203, "grad_norm": 0.0009670006693340838, "learning_rate": 6.501337154963305e-06, "loss": 46.0, "step": 1290 }, { "epoch": 0.21786271779943467, "grad_norm": 0.0005261996411718428, "learning_rate": 6.424552091372604e-06, "loss": 46.0, "step": 1291 }, { "epoch": 0.21803147280934904, "grad_norm": 0.000595112273003906, "learning_rate": 6.3482081149535355e-06, "loss": 46.0, "step": 1292 }, { "epoch": 0.21820022781926338, "grad_norm": 0.000555274193175137, "learning_rate": 6.272305585569172e-06, "loss": 46.0, "step": 1293 }, { "epoch": 0.21836898282917774, "grad_norm": 0.0012713369214907289, "learning_rate": 6.196844861001727e-06, "loss": 46.0, "step": 1294 }, { "epoch": 0.2185377378390921, "grad_norm": 0.0006671750452369452, "learning_rate": 6.1218262969509055e-06, "loss": 46.0, "step": 1295 }, { "epoch": 0.21870649284900645, "grad_norm": 0.001461333711631596, "learning_rate": 6.047250247032165e-06, "loss": 46.0, "step": 1296 }, { "epoch": 0.21887524785892082, "grad_norm": 0.000863353256136179, "learning_rate": 5.973117062775113e-06, "loss": 46.0, "step": 1297 }, { "epoch": 0.21904400286883516, "grad_norm": 0.0012815648224204779, "learning_rate": 5.899427093621768e-06, "loss": 46.0, "step": 1298 }, { "epoch": 0.21921275787874953, "grad_norm": 0.0009904132457450032, "learning_rate": 5.826180686925031e-06, "loss": 46.0, "step": 1299 }, { "epoch": 0.2193815128886639, "grad_norm": 0.0006559500470757484, "learning_rate": 5.753378187946967e-06, "loss": 46.0, "step": 1300 }, { "epoch": 0.21955026789857823, "grad_norm": 0.001320411916822195, "learning_rate": 5.6810199398570905e-06, "loss": 46.0, "step": 1301 }, { "epoch": 0.2197190229084926, "grad_norm": 0.00033066104515455663, "learning_rate": 5.609106283730991e-06, "loss": 46.0, "step": 1302 }, { "epoch": 0.21988777791840694, "grad_norm": 0.0012177408207207918, "learning_rate": 5.537637558548525e-06, "loss": 46.0, "step": 1303 }, { "epoch": 0.2200565329283213, "grad_norm": 0.00042670281254686415, "learning_rate": 5.4666141011922025e-06, "loss": 46.0, "step": 1304 }, { "epoch": 0.22022528793823568, "grad_norm": 0.0011261850595474243, "learning_rate": 5.39603624644579e-06, "loss": 46.0, "step": 1305 }, { "epoch": 0.22039404294815002, "grad_norm": 0.0009061134187504649, "learning_rate": 5.325904326992548e-06, "loss": 46.0, "step": 1306 }, { "epoch": 0.22056279795806438, "grad_norm": 0.0008035211358219385, "learning_rate": 5.2562186734137485e-06, "loss": 46.0, "step": 1307 }, { "epoch": 0.22073155296797875, "grad_norm": 0.0004889973206445575, "learning_rate": 5.186979614187071e-06, "loss": 46.0, "step": 1308 }, { "epoch": 0.2209003079778931, "grad_norm": 0.0006611088174395263, "learning_rate": 5.118187475685121e-06, "loss": 46.0, "step": 1309 }, { "epoch": 0.22106906298780746, "grad_norm": 0.0021591607946902514, "learning_rate": 5.049842582173814e-06, "loss": 46.0, "step": 1310 }, { "epoch": 0.2212378179977218, "grad_norm": 0.0015975041314959526, "learning_rate": 4.9819452558109045e-06, "loss": 46.0, "step": 1311 }, { "epoch": 0.22140657300763616, "grad_norm": 0.00249864743091166, "learning_rate": 4.914495816644449e-06, "loss": 46.0, "step": 1312 }, { "epoch": 0.22157532801755053, "grad_norm": 0.0006162183126434684, "learning_rate": 4.847494582611301e-06, "loss": 46.0, "step": 1313 }, { "epoch": 0.22174408302746487, "grad_norm": 0.0007919514318928123, "learning_rate": 4.78094186953556e-06, "loss": 46.0, "step": 1314 }, { "epoch": 0.22191283803737924, "grad_norm": 0.0022155398037284613, "learning_rate": 4.714837991127186e-06, "loss": 46.0, "step": 1315 }, { "epoch": 0.22208159304729358, "grad_norm": 0.0005009054439142346, "learning_rate": 4.6491832589804605e-06, "loss": 46.0, "step": 1316 }, { "epoch": 0.22225034805720795, "grad_norm": 0.0015528866788372397, "learning_rate": 4.583977982572463e-06, "loss": 46.0, "step": 1317 }, { "epoch": 0.22241910306712231, "grad_norm": 0.0009101424948312342, "learning_rate": 4.519222469261731e-06, "loss": 46.0, "step": 1318 }, { "epoch": 0.22258785807703665, "grad_norm": 0.000738188624382019, "learning_rate": 4.454917024286742e-06, "loss": 46.0, "step": 1319 }, { "epoch": 0.22275661308695102, "grad_norm": 0.0008253224659711123, "learning_rate": 4.391061950764453e-06, "loss": 46.0, "step": 1320 }, { "epoch": 0.2229253680968654, "grad_norm": 0.001060123322531581, "learning_rate": 4.327657549688935e-06, "loss": 46.0, "step": 1321 }, { "epoch": 0.22309412310677973, "grad_norm": 0.0007293847156688571, "learning_rate": 4.264704119929897e-06, "loss": 46.0, "step": 1322 }, { "epoch": 0.2232628781166941, "grad_norm": 0.00065107416594401, "learning_rate": 4.202201958231322e-06, "loss": 46.0, "step": 1323 }, { "epoch": 0.22343163312660844, "grad_norm": 0.0006281050737015903, "learning_rate": 4.140151359210043e-06, "loss": 46.0, "step": 1324 }, { "epoch": 0.2236003881365228, "grad_norm": 0.000606034358497709, "learning_rate": 4.078552615354325e-06, "loss": 46.0, "step": 1325 }, { "epoch": 0.22376914314643717, "grad_norm": 0.0006844737217761576, "learning_rate": 4.017406017022607e-06, "loss": 46.0, "step": 1326 }, { "epoch": 0.2239378981563515, "grad_norm": 0.000884491775650531, "learning_rate": 3.95671185244193e-06, "loss": 46.0, "step": 1327 }, { "epoch": 0.22410665316626588, "grad_norm": 0.0005510865012183785, "learning_rate": 3.896470407706798e-06, "loss": 46.0, "step": 1328 }, { "epoch": 0.22427540817618022, "grad_norm": 0.0006715485942550004, "learning_rate": 3.836681966777678e-06, "loss": 46.0, "step": 1329 }, { "epoch": 0.2244441631860946, "grad_norm": 0.001269484986551106, "learning_rate": 3.7773468114796764e-06, "loss": 46.0, "step": 1330 }, { "epoch": 0.22461291819600895, "grad_norm": 0.0022162743844091892, "learning_rate": 3.718465221501344e-06, "loss": 46.0, "step": 1331 }, { "epoch": 0.2247816732059233, "grad_norm": 0.00027326264535076916, "learning_rate": 3.660037474393152e-06, "loss": 46.0, "step": 1332 }, { "epoch": 0.22495042821583766, "grad_norm": 0.0005874920170754194, "learning_rate": 3.602063845566361e-06, "loss": 46.0, "step": 1333 }, { "epoch": 0.225119183225752, "grad_norm": 0.0009573047864250839, "learning_rate": 3.544544608291622e-06, "loss": 46.0, "step": 1334 }, { "epoch": 0.22528793823566637, "grad_norm": 0.0007656016387045383, "learning_rate": 3.487480033697721e-06, "loss": 46.0, "step": 1335 }, { "epoch": 0.22545669324558074, "grad_norm": 0.00044490606524050236, "learning_rate": 3.430870390770291e-06, "loss": 46.0, "step": 1336 }, { "epoch": 0.22562544825549508, "grad_norm": 0.0003960455651395023, "learning_rate": 3.3747159463505595e-06, "loss": 46.0, "step": 1337 }, { "epoch": 0.22579420326540944, "grad_norm": 0.000521904497873038, "learning_rate": 3.3190169651340806e-06, "loss": 46.0, "step": 1338 }, { "epoch": 0.2259629582753238, "grad_norm": 0.0004184047575108707, "learning_rate": 3.2637737096695154e-06, "loss": 46.0, "step": 1339 }, { "epoch": 0.22613171328523815, "grad_norm": 0.001061237882822752, "learning_rate": 3.2089864403572976e-06, "loss": 46.0, "step": 1340 }, { "epoch": 0.22630046829515252, "grad_norm": 0.000829793163575232, "learning_rate": 3.1546554154485486e-06, "loss": 46.0, "step": 1341 }, { "epoch": 0.22646922330506686, "grad_norm": 0.0010989286238327622, "learning_rate": 3.100780891043764e-06, "loss": 46.0, "step": 1342 }, { "epoch": 0.22663797831498123, "grad_norm": 0.000658164091873914, "learning_rate": 3.0473631210915954e-06, "loss": 46.0, "step": 1343 }, { "epoch": 0.2268067333248956, "grad_norm": 0.0018878680421039462, "learning_rate": 2.994402357387738e-06, "loss": 46.0, "step": 1344 }, { "epoch": 0.22697548833480993, "grad_norm": 0.0011438899673521519, "learning_rate": 2.9418988495736765e-06, "loss": 46.0, "step": 1345 }, { "epoch": 0.2271442433447243, "grad_norm": 0.0005721378256566823, "learning_rate": 2.889852845135521e-06, "loss": 46.0, "step": 1346 }, { "epoch": 0.22731299835463864, "grad_norm": 0.0004976931377314031, "learning_rate": 2.8382645894028393e-06, "loss": 46.0, "step": 1347 }, { "epoch": 0.227481753364553, "grad_norm": 0.00046737625962123275, "learning_rate": 2.7871343255475135e-06, "loss": 46.0, "step": 1348 }, { "epoch": 0.22765050837446738, "grad_norm": 0.0006955791031941772, "learning_rate": 2.736462294582598e-06, "loss": 46.0, "step": 1349 }, { "epoch": 0.22781926338438171, "grad_norm": 0.0017836546758189797, "learning_rate": 2.686248735361141e-06, "loss": 46.0, "step": 1350 }, { "epoch": 0.22798801839429608, "grad_norm": 0.000547305797226727, "learning_rate": 2.6364938845751196e-06, "loss": 46.0, "step": 1351 }, { "epoch": 0.22815677340421045, "grad_norm": 0.0018898543203249574, "learning_rate": 2.5871979767543077e-06, "loss": 46.0, "step": 1352 }, { "epoch": 0.2283255284141248, "grad_norm": 0.0005670526879839599, "learning_rate": 2.5383612442650973e-06, "loss": 46.0, "step": 1353 }, { "epoch": 0.22849428342403916, "grad_norm": 0.0006443196325562894, "learning_rate": 2.489983917309502e-06, "loss": 46.0, "step": 1354 }, { "epoch": 0.2286630384339535, "grad_norm": 0.000439947412814945, "learning_rate": 2.4420662239240666e-06, "loss": 46.0, "step": 1355 }, { "epoch": 0.22883179344386786, "grad_norm": 0.0012257567141205072, "learning_rate": 2.3946083899786697e-06, "loss": 46.0, "step": 1356 }, { "epoch": 0.22900054845378223, "grad_norm": 0.0011901329271495342, "learning_rate": 2.3476106391756347e-06, "loss": 46.0, "step": 1357 }, { "epoch": 0.22916930346369657, "grad_norm": 0.0009685796103440225, "learning_rate": 2.3010731930485417e-06, "loss": 46.0, "step": 1358 }, { "epoch": 0.22933805847361094, "grad_norm": 0.0011787050170823932, "learning_rate": 2.2549962709612293e-06, "loss": 46.0, "step": 1359 }, { "epoch": 0.22950681348352528, "grad_norm": 0.0006927854265086353, "learning_rate": 2.209380090106794e-06, "loss": 46.0, "step": 1360 }, { "epoch": 0.22967556849343965, "grad_norm": 0.0021828326862305403, "learning_rate": 2.164224865506492e-06, "loss": 46.0, "step": 1361 }, { "epoch": 0.22984432350335401, "grad_norm": 0.0011952252825722098, "learning_rate": 2.1195308100087964e-06, "loss": 46.0, "step": 1362 }, { "epoch": 0.23001307851326835, "grad_norm": 0.0008620006265118718, "learning_rate": 2.0752981342883504e-06, "loss": 46.0, "step": 1363 }, { "epoch": 0.23018183352318272, "grad_norm": 0.0007816816796548665, "learning_rate": 2.031527046844994e-06, "loss": 46.0, "step": 1364 }, { "epoch": 0.2303505885330971, "grad_norm": 0.00032747103250585496, "learning_rate": 1.9882177540027856e-06, "loss": 46.0, "step": 1365 }, { "epoch": 0.23051934354301143, "grad_norm": 0.0009276359342038631, "learning_rate": 1.9453704599089906e-06, "loss": 46.0, "step": 1366 }, { "epoch": 0.2306880985529258, "grad_norm": 0.0007578957010991871, "learning_rate": 1.902985366533172e-06, "loss": 46.0, "step": 1367 }, { "epoch": 0.23085685356284014, "grad_norm": 0.0009802145650610328, "learning_rate": 1.8610626736662252e-06, "loss": 46.0, "step": 1368 }, { "epoch": 0.2310256085727545, "grad_norm": 0.0017398048657923937, "learning_rate": 1.8196025789193771e-06, "loss": 46.0, "step": 1369 }, { "epoch": 0.23119436358266887, "grad_norm": 0.001226657535880804, "learning_rate": 1.7786052777233663e-06, "loss": 46.0, "step": 1370 }, { "epoch": 0.2313631185925832, "grad_norm": 0.0007180358516052365, "learning_rate": 1.7380709633274095e-06, "loss": 46.0, "step": 1371 }, { "epoch": 0.23153187360249758, "grad_norm": 0.002623442793264985, "learning_rate": 1.6979998267983577e-06, "loss": 46.0, "step": 1372 }, { "epoch": 0.23170062861241192, "grad_norm": 0.0011678035371005535, "learning_rate": 1.6583920570197752e-06, "loss": 46.0, "step": 1373 }, { "epoch": 0.23186938362232629, "grad_norm": 0.0005888472078368068, "learning_rate": 1.6192478406910626e-06, "loss": 46.0, "step": 1374 }, { "epoch": 0.23203813863224065, "grad_norm": 0.001251881243661046, "learning_rate": 1.5805673623265572e-06, "loss": 46.0, "step": 1375 }, { "epoch": 0.232206893642155, "grad_norm": 0.0005478327511809766, "learning_rate": 1.5423508042546553e-06, "loss": 46.0, "step": 1376 }, { "epoch": 0.23237564865206936, "grad_norm": 0.0005645382916554809, "learning_rate": 1.504598346616981e-06, "loss": 46.0, "step": 1377 }, { "epoch": 0.23254440366198373, "grad_norm": 0.0011800862848758698, "learning_rate": 1.4673101673675639e-06, "loss": 46.0, "step": 1378 }, { "epoch": 0.23271315867189807, "grad_norm": 0.0021919957362115383, "learning_rate": 1.4304864422718945e-06, "loss": 46.0, "step": 1379 }, { "epoch": 0.23288191368181244, "grad_norm": 0.000592592463362962, "learning_rate": 1.3941273449062041e-06, "loss": 46.0, "step": 1380 }, { "epoch": 0.23305066869172678, "grad_norm": 0.002734198933467269, "learning_rate": 1.3582330466565985e-06, "loss": 46.0, "step": 1381 }, { "epoch": 0.23321942370164114, "grad_norm": 0.0006839183042757213, "learning_rate": 1.3228037167182573e-06, "loss": 46.0, "step": 1382 }, { "epoch": 0.2333881787115555, "grad_norm": 0.0003775713557843119, "learning_rate": 1.2878395220946248e-06, "loss": 46.0, "step": 1383 }, { "epoch": 0.23355693372146985, "grad_norm": 0.00045846737339161336, "learning_rate": 1.253340627596644e-06, "loss": 46.0, "step": 1384 }, { "epoch": 0.23372568873138422, "grad_norm": 0.0007842537015676498, "learning_rate": 1.2193071958419788e-06, "loss": 46.0, "step": 1385 }, { "epoch": 0.23389444374129856, "grad_norm": 0.0012929689837619662, "learning_rate": 1.1857393872542255e-06, "loss": 46.0, "step": 1386 }, { "epoch": 0.23406319875121293, "grad_norm": 0.0015821013366803527, "learning_rate": 1.1526373600621699e-06, "loss": 46.0, "step": 1387 }, { "epoch": 0.2342319537611273, "grad_norm": 0.0006062084576115012, "learning_rate": 1.1200012702990425e-06, "loss": 46.0, "step": 1388 }, { "epoch": 0.23440070877104163, "grad_norm": 0.0006992828566581011, "learning_rate": 1.0878312718017868e-06, "loss": 46.0, "step": 1389 }, { "epoch": 0.234569463780956, "grad_norm": 0.001030724961310625, "learning_rate": 1.0561275162103035e-06, "loss": 46.0, "step": 1390 }, { "epoch": 0.23473821879087037, "grad_norm": 0.001839980366639793, "learning_rate": 1.0248901529668286e-06, "loss": 46.0, "step": 1391 }, { "epoch": 0.2349069738007847, "grad_norm": 0.0017480964306741953, "learning_rate": 9.941193293150796e-07, "loss": 46.0, "step": 1392 }, { "epoch": 0.23507572881069907, "grad_norm": 0.0008748589316383004, "learning_rate": 9.638151902996773e-07, "loss": 46.0, "step": 1393 }, { "epoch": 0.23524448382061341, "grad_norm": 0.0008244823548011482, "learning_rate": 9.339778787654574e-07, "loss": 46.0, "step": 1394 }, { "epoch": 0.23541323883052778, "grad_norm": 0.001938622328452766, "learning_rate": 9.046075353567163e-07, "loss": 46.0, "step": 1395 }, { "epoch": 0.23558199384044215, "grad_norm": 0.0008106924360617995, "learning_rate": 8.757042985166331e-07, "loss": 46.0, "step": 1396 }, { "epoch": 0.2357507488503565, "grad_norm": 0.0010104329558089375, "learning_rate": 8.472683044865815e-07, "loss": 46.0, "step": 1397 }, { "epoch": 0.23591950386027086, "grad_norm": 0.0008030526805669069, "learning_rate": 8.192996873054748e-07, "loss": 46.0, "step": 1398 }, { "epoch": 0.2360882588701852, "grad_norm": 0.003335982095450163, "learning_rate": 7.917985788091553e-07, "loss": 46.0, "step": 1399 }, { "epoch": 0.23625701388009956, "grad_norm": 0.0006814859807491302, "learning_rate": 7.647651086297835e-07, "loss": 46.0, "step": 1400 }, { "epoch": 0.23642576889001393, "grad_norm": 0.001208942150697112, "learning_rate": 7.381994041951945e-07, "loss": 46.0, "step": 1401 }, { "epoch": 0.23659452389992827, "grad_norm": 0.0026390880811959505, "learning_rate": 7.12101590728298e-07, "loss": 46.0, "step": 1402 }, { "epoch": 0.23676327890984264, "grad_norm": 0.0010781860910356045, "learning_rate": 6.864717912465346e-07, "loss": 46.0, "step": 1403 }, { "epoch": 0.23693203391975698, "grad_norm": 0.001180395483970642, "learning_rate": 6.613101265612431e-07, "loss": 46.0, "step": 1404 }, { "epoch": 0.23710078892967135, "grad_norm": 0.0006595318554900587, "learning_rate": 6.366167152771274e-07, "loss": 46.0, "step": 1405 }, { "epoch": 0.23726954393958571, "grad_norm": 0.0005041745025664568, "learning_rate": 6.123916737916568e-07, "loss": 46.0, "step": 1406 }, { "epoch": 0.23743829894950005, "grad_norm": 0.0013777145650237799, "learning_rate": 5.886351162945891e-07, "loss": 46.0, "step": 1407 }, { "epoch": 0.23760705395941442, "grad_norm": 0.0009335106587968767, "learning_rate": 5.653471547673262e-07, "loss": 46.0, "step": 1408 }, { "epoch": 0.2377758089693288, "grad_norm": 0.0007560536614619195, "learning_rate": 5.425278989824922e-07, "loss": 46.0, "step": 1409 }, { "epoch": 0.23794456397924313, "grad_norm": 0.0007272138609550893, "learning_rate": 5.201774565033679e-07, "loss": 46.0, "step": 1410 }, { "epoch": 0.2381133189891575, "grad_norm": 0.0035329603124409914, "learning_rate": 4.982959326833347e-07, "loss": 46.0, "step": 1411 }, { "epoch": 0.23828207399907184, "grad_norm": 0.0006473190151154995, "learning_rate": 4.76883430665509e-07, "loss": 46.0, "step": 1412 }, { "epoch": 0.2384508290089862, "grad_norm": 0.0016907091485336423, "learning_rate": 4.5594005138211993e-07, "loss": 46.0, "step": 1413 }, { "epoch": 0.23861958401890057, "grad_norm": 0.0016014818102121353, "learning_rate": 4.3546589355409894e-07, "loss": 46.0, "step": 1414 }, { "epoch": 0.2387883390288149, "grad_norm": 0.0006536655710078776, "learning_rate": 4.154610536906134e-07, "loss": 46.0, "step": 1415 }, { "epoch": 0.23895709403872928, "grad_norm": 0.0007482526707462966, "learning_rate": 3.959256260886113e-07, "loss": 46.0, "step": 1416 }, { "epoch": 0.23912584904864362, "grad_norm": 0.0012404083972796798, "learning_rate": 3.7685970283234395e-07, "loss": 46.0, "step": 1417 }, { "epoch": 0.23929460405855799, "grad_norm": 0.0006575691513717175, "learning_rate": 3.5826337379297746e-07, "loss": 46.0, "step": 1418 }, { "epoch": 0.23946335906847235, "grad_norm": 0.0005846316926181316, "learning_rate": 3.4013672662815964e-07, "loss": 46.0, "step": 1419 }, { "epoch": 0.2396321140783867, "grad_norm": 0.0015353142516687512, "learning_rate": 3.2247984678155374e-07, "loss": 46.0, "step": 1420 }, { "epoch": 0.23980086908830106, "grad_norm": 0.0007021796191111207, "learning_rate": 3.0529281748252757e-07, "loss": 46.0, "step": 1421 }, { "epoch": 0.23996962409821543, "grad_norm": 0.00043599194032140076, "learning_rate": 2.8857571974567623e-07, "loss": 46.0, "step": 1422 }, { "epoch": 0.24013837910812977, "grad_norm": 0.0008538305992260575, "learning_rate": 2.723286323704888e-07, "loss": 46.0, "step": 1423 }, { "epoch": 0.24030713411804414, "grad_norm": 0.0007377176661975682, "learning_rate": 2.565516319409711e-07, "loss": 46.0, "step": 1424 }, { "epoch": 0.24047588912795848, "grad_norm": 0.001053746440447867, "learning_rate": 2.412447928252459e-07, "loss": 46.0, "step": 1425 }, { "epoch": 0.24064464413787284, "grad_norm": 0.00034214206971228123, "learning_rate": 2.2640818717527546e-07, "loss": 46.0, "step": 1426 }, { "epoch": 0.2408133991477872, "grad_norm": 0.0006055811536498368, "learning_rate": 2.120418849264616e-07, "loss": 46.0, "step": 1427 }, { "epoch": 0.24098215415770155, "grad_norm": 0.0032110221218317747, "learning_rate": 1.9814595379732405e-07, "loss": 46.0, "step": 1428 }, { "epoch": 0.24115090916761592, "grad_norm": 0.0006545573123730719, "learning_rate": 1.8472045928920045e-07, "loss": 46.0, "step": 1429 }, { "epoch": 0.24131966417753026, "grad_norm": 0.0010023179929703474, "learning_rate": 1.7176546468592457e-07, "loss": 46.0, "step": 1430 }, { "epoch": 0.24148841918744463, "grad_norm": 0.0009001618018373847, "learning_rate": 1.5928103105354864e-07, "loss": 46.0, "step": 1431 }, { "epoch": 0.241657174197359, "grad_norm": 0.000825360300950706, "learning_rate": 1.4726721724001024e-07, "loss": 46.0, "step": 1432 }, { "epoch": 0.24182592920727333, "grad_norm": 0.0018932815873995423, "learning_rate": 1.3572407987491042e-07, "loss": 46.0, "step": 1433 }, { "epoch": 0.2419946842171877, "grad_norm": 0.0010684579610824585, "learning_rate": 1.2465167336920268e-07, "loss": 46.0, "step": 1434 }, { "epoch": 0.24216343922710207, "grad_norm": 0.0006767999730072916, "learning_rate": 1.1405004991495993e-07, "loss": 46.0, "step": 1435 }, { "epoch": 0.2423321942370164, "grad_norm": 0.0002995872055180371, "learning_rate": 1.0391925948511904e-07, "loss": 46.0, "step": 1436 }, { "epoch": 0.24250094924693077, "grad_norm": 0.0010398075683042407, "learning_rate": 9.425934983323669e-08, "loss": 46.0, "step": 1437 }, { "epoch": 0.24266970425684511, "grad_norm": 0.0006404778105206788, "learning_rate": 8.507036649331168e-08, "loss": 46.0, "step": 1438 }, { "epoch": 0.24283845926675948, "grad_norm": 0.0009962361073121428, "learning_rate": 7.635235277950736e-08, "loss": 46.0, "step": 1439 }, { "epoch": 0.24300721427667385, "grad_norm": 0.0007267189212143421, "learning_rate": 6.810534978597404e-08, "loss": 46.0, "step": 1440 }, { "epoch": 0.2431759692865882, "grad_norm": 0.000724569137673825, "learning_rate": 6.032939638664914e-08, "loss": 46.0, "step": 1441 }, { "epoch": 0.24334472429650256, "grad_norm": 0.0003782059939112514, "learning_rate": 5.3024529235112807e-08, "loss": 46.0, "step": 1442 }, { "epoch": 0.2435134793064169, "grad_norm": 0.001214990857988596, "learning_rate": 4.619078276436595e-08, "loss": 46.0, "step": 1443 }, { "epoch": 0.24368223431633126, "grad_norm": 0.0013174681225791574, "learning_rate": 3.982818918665254e-08, "loss": 46.0, "step": 1444 }, { "epoch": 0.24385098932624563, "grad_norm": 0.0019085375824943185, "learning_rate": 3.393677849340415e-08, "loss": 46.0, "step": 1445 }, { "epoch": 0.24401974433615997, "grad_norm": 0.0005340630887076259, "learning_rate": 2.8516578454973465e-08, "loss": 46.0, "step": 1446 }, { "epoch": 0.24418849934607434, "grad_norm": 0.0010641046101227403, "learning_rate": 2.3567614620600975e-08, "loss": 46.0, "step": 1447 }, { "epoch": 0.2443572543559887, "grad_norm": 0.0006958871381357312, "learning_rate": 1.9089910318259572e-08, "loss": 46.0, "step": 1448 }, { "epoch": 0.24452600936590305, "grad_norm": 0.0013515378814190626, "learning_rate": 1.5083486654510203e-08, "loss": 46.0, "step": 1449 }, { "epoch": 0.24469476437581741, "grad_norm": 0.0014520830009132624, "learning_rate": 1.1548362514479661e-08, "loss": 46.0, "step": 1450 }, { "epoch": 0.24486351938573175, "grad_norm": 0.0015810457989573479, "learning_rate": 8.484554561682956e-09, "loss": 46.0, "step": 1451 }, { "epoch": 0.24503227439564612, "grad_norm": 0.001024417346343398, "learning_rate": 5.8920772380233195e-09, "loss": 46.0, "step": 1452 }, { "epoch": 0.2452010294055605, "grad_norm": 0.000991427805274725, "learning_rate": 3.7709427636700714e-09, "loss": 46.0, "step": 1453 }, { "epoch": 0.24536978441547483, "grad_norm": 0.001307089813053608, "learning_rate": 2.1211611370475226e-09, "loss": 46.0, "step": 1454 }, { "epoch": 0.2455385394253892, "grad_norm": 0.0008045461145229638, "learning_rate": 9.427401347128495e-10, "loss": 46.0, "step": 1455 }, { "epoch": 0.24570729443530354, "grad_norm": 0.0017816101899370551, "learning_rate": 2.356853114116042e-10, "loss": 46.0, "step": 1456 }, { "epoch": 0.2458760494452179, "grad_norm": 0.000787016935646534, "learning_rate": 0.0, "loss": 46.0, "step": 1457 } ], "logging_steps": 1, "max_steps": 1457, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 365, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 29027633922048.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }