{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9599808003839924, "eval_steps": 5000.0, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.999880002399952e-05, "grad_norm": 4.141736030578613, "learning_rate": 1.3972055888223551e-08, "loss": 0.8692, "step": 1 }, { "epoch": 0.00011999760004799904, "grad_norm": 5.302210330963135, "learning_rate": 2.7944111776447102e-08, "loss": 0.8621, "step": 2 }, { "epoch": 0.00017999640007199856, "grad_norm": 4.701464653015137, "learning_rate": 4.191616766467066e-08, "loss": 1.0161, "step": 3 }, { "epoch": 0.00023999520009599807, "grad_norm": 4.097288608551025, "learning_rate": 5.5888223552894204e-08, "loss": 0.9355, "step": 4 }, { "epoch": 0.0002999940001199976, "grad_norm": 4.103788375854492, "learning_rate": 6.986027944111776e-08, "loss": 0.9105, "step": 5 }, { "epoch": 0.0003599928001439971, "grad_norm": 5.255908489227295, "learning_rate": 8.383233532934132e-08, "loss": 0.9617, "step": 6 }, { "epoch": 0.00041999160016799666, "grad_norm": 4.78436279296875, "learning_rate": 9.780439121756487e-08, "loss": 0.9286, "step": 7 }, { "epoch": 0.00047999040019199615, "grad_norm": 4.4373674392700195, "learning_rate": 1.1177644710578841e-07, "loss": 0.8207, "step": 8 }, { "epoch": 0.0005399892002159956, "grad_norm": 4.867762088775635, "learning_rate": 1.2574850299401197e-07, "loss": 0.8974, "step": 9 }, { "epoch": 0.0005999880002399952, "grad_norm": 4.384710311889648, "learning_rate": 1.3972055888223553e-07, "loss": 0.8894, "step": 10 }, { "epoch": 0.0006599868002639947, "grad_norm": 4.9110236167907715, "learning_rate": 1.5369261477045908e-07, "loss": 0.9228, "step": 11 }, { "epoch": 0.0007199856002879943, "grad_norm": 5.594161510467529, "learning_rate": 1.6766467065868263e-07, "loss": 0.9674, "step": 12 }, { "epoch": 0.0007799844003119938, "grad_norm": 4.808990955352783, "learning_rate": 1.8163672654690618e-07, "loss": 0.8794, "step": 13 }, { "epoch": 0.0008399832003359933, "grad_norm": 4.454570770263672, "learning_rate": 1.9560878243512974e-07, "loss": 0.8883, "step": 14 }, { "epoch": 0.0008999820003599928, "grad_norm": 4.503767490386963, "learning_rate": 2.095808383233533e-07, "loss": 0.905, "step": 15 }, { "epoch": 0.0009599808003839923, "grad_norm": 4.279079437255859, "learning_rate": 2.2355289421157682e-07, "loss": 0.9153, "step": 16 }, { "epoch": 0.0010199796004079918, "grad_norm": 4.7273969650268555, "learning_rate": 2.375249500998004e-07, "loss": 0.9237, "step": 17 }, { "epoch": 0.0010799784004319913, "grad_norm": 4.1117401123046875, "learning_rate": 2.5149700598802395e-07, "loss": 0.9427, "step": 18 }, { "epoch": 0.001139977200455991, "grad_norm": 4.929619789123535, "learning_rate": 2.6546906187624753e-07, "loss": 0.908, "step": 19 }, { "epoch": 0.0011999760004799903, "grad_norm": 4.232673645019531, "learning_rate": 2.7944111776447105e-07, "loss": 0.9234, "step": 20 }, { "epoch": 0.00125997480050399, "grad_norm": 4.668841361999512, "learning_rate": 2.934131736526946e-07, "loss": 0.917, "step": 21 }, { "epoch": 0.0013199736005279894, "grad_norm": 4.232138633728027, "learning_rate": 3.0738522954091816e-07, "loss": 0.8868, "step": 22 }, { "epoch": 0.0013799724005519889, "grad_norm": 3.64571213722229, "learning_rate": 3.213572854291417e-07, "loss": 0.9337, "step": 23 }, { "epoch": 0.0014399712005759885, "grad_norm": 3.918726921081543, "learning_rate": 3.3532934131736526e-07, "loss": 0.8663, "step": 24 }, { "epoch": 0.001499970000599988, "grad_norm": 23.378986358642578, "learning_rate": 3.4930139720558884e-07, "loss": 0.8998, "step": 25 }, { "epoch": 0.0015599688006239876, "grad_norm": 4.882752418518066, "learning_rate": 3.6327345309381237e-07, "loss": 0.8658, "step": 26 }, { "epoch": 0.001619967600647987, "grad_norm": 3.9547297954559326, "learning_rate": 3.7724550898203595e-07, "loss": 0.9543, "step": 27 }, { "epoch": 0.0016799664006719867, "grad_norm": 3.5016565322875977, "learning_rate": 3.912175648702595e-07, "loss": 0.8126, "step": 28 }, { "epoch": 0.001739965200695986, "grad_norm": 4.040828227996826, "learning_rate": 4.0518962075848305e-07, "loss": 0.799, "step": 29 }, { "epoch": 0.0017999640007199855, "grad_norm": 4.187844276428223, "learning_rate": 4.191616766467066e-07, "loss": 0.8054, "step": 30 }, { "epoch": 0.0018599628007439852, "grad_norm": 3.623718500137329, "learning_rate": 4.331337325349301e-07, "loss": 0.8697, "step": 31 }, { "epoch": 0.0019199616007679846, "grad_norm": 3.5884199142456055, "learning_rate": 4.4710578842315363e-07, "loss": 0.7982, "step": 32 }, { "epoch": 0.001979960400791984, "grad_norm": 3.976799249649048, "learning_rate": 4.6107784431137726e-07, "loss": 0.8239, "step": 33 }, { "epoch": 0.0020399592008159837, "grad_norm": 4.1542510986328125, "learning_rate": 4.750499001996008e-07, "loss": 0.8663, "step": 34 }, { "epoch": 0.0020999580008399833, "grad_norm": 3.092752695083618, "learning_rate": 4.890219560878243e-07, "loss": 0.8563, "step": 35 }, { "epoch": 0.0021599568008639825, "grad_norm": 3.5959906578063965, "learning_rate": 5.029940119760479e-07, "loss": 0.8284, "step": 36 }, { "epoch": 0.002219955600887982, "grad_norm": 3.643440008163452, "learning_rate": 5.169660678642714e-07, "loss": 0.7214, "step": 37 }, { "epoch": 0.002279954400911982, "grad_norm": 2.9109535217285156, "learning_rate": 5.309381237524951e-07, "loss": 0.797, "step": 38 }, { "epoch": 0.0023399532009359815, "grad_norm": 2.9446604251861572, "learning_rate": 5.449101796407185e-07, "loss": 0.8562, "step": 39 }, { "epoch": 0.0023999520009599807, "grad_norm": 2.7083091735839844, "learning_rate": 5.588822355289421e-07, "loss": 0.7405, "step": 40 }, { "epoch": 0.0024599508009839803, "grad_norm": 2.6711504459381104, "learning_rate": 5.728542914171657e-07, "loss": 0.8258, "step": 41 }, { "epoch": 0.00251994960100798, "grad_norm": 2.967825412750244, "learning_rate": 5.868263473053892e-07, "loss": 0.768, "step": 42 }, { "epoch": 0.002579948401031979, "grad_norm": 2.8544764518737793, "learning_rate": 6.007984031936128e-07, "loss": 0.8345, "step": 43 }, { "epoch": 0.002639947201055979, "grad_norm": 2.7953662872314453, "learning_rate": 6.147704590818363e-07, "loss": 0.7565, "step": 44 }, { "epoch": 0.0026999460010799785, "grad_norm": 2.5240628719329834, "learning_rate": 6.287425149700599e-07, "loss": 0.7678, "step": 45 }, { "epoch": 0.0027599448011039777, "grad_norm": 2.7698166370391846, "learning_rate": 6.427145708582834e-07, "loss": 0.826, "step": 46 }, { "epoch": 0.0028199436011279774, "grad_norm": 2.569336175918579, "learning_rate": 6.56686626746507e-07, "loss": 0.7964, "step": 47 }, { "epoch": 0.002879942401151977, "grad_norm": 2.3304383754730225, "learning_rate": 6.706586826347305e-07, "loss": 0.7218, "step": 48 }, { "epoch": 0.0029399412011759767, "grad_norm": 2.2634005546569824, "learning_rate": 6.846307385229541e-07, "loss": 0.7923, "step": 49 }, { "epoch": 0.002999940001199976, "grad_norm": 2.049023151397705, "learning_rate": 6.986027944111777e-07, "loss": 0.7297, "step": 50 }, { "epoch": 0.0030599388012239755, "grad_norm": 2.065080165863037, "learning_rate": 7.125748502994012e-07, "loss": 0.7013, "step": 51 }, { "epoch": 0.003119937601247975, "grad_norm": 1.9504543542861938, "learning_rate": 7.265469061876247e-07, "loss": 0.7294, "step": 52 }, { "epoch": 0.0031799364012719744, "grad_norm": 1.9622265100479126, "learning_rate": 7.405189620758483e-07, "loss": 0.7199, "step": 53 }, { "epoch": 0.003239935201295974, "grad_norm": 1.9049369096755981, "learning_rate": 7.544910179640719e-07, "loss": 0.6864, "step": 54 }, { "epoch": 0.0032999340013199737, "grad_norm": 1.6903384923934937, "learning_rate": 7.684630738522954e-07, "loss": 0.7591, "step": 55 }, { "epoch": 0.0033599328013439733, "grad_norm": 1.7994821071624756, "learning_rate": 7.82435129740519e-07, "loss": 0.6998, "step": 56 }, { "epoch": 0.0034199316013679725, "grad_norm": 1.9396770000457764, "learning_rate": 7.964071856287424e-07, "loss": 0.7729, "step": 57 }, { "epoch": 0.003479930401391972, "grad_norm": 1.758116602897644, "learning_rate": 8.103792415169661e-07, "loss": 0.6832, "step": 58 }, { "epoch": 0.003539929201415972, "grad_norm": 1.530038595199585, "learning_rate": 8.243512974051896e-07, "loss": 0.6324, "step": 59 }, { "epoch": 0.003599928001439971, "grad_norm": 1.600562572479248, "learning_rate": 8.383233532934132e-07, "loss": 0.6569, "step": 60 }, { "epoch": 0.0036599268014639707, "grad_norm": 1.640321969985962, "learning_rate": 8.522954091816367e-07, "loss": 0.7014, "step": 61 }, { "epoch": 0.0037199256014879703, "grad_norm": 1.6266229152679443, "learning_rate": 8.662674650698602e-07, "loss": 0.7492, "step": 62 }, { "epoch": 0.0037799244015119695, "grad_norm": 1.5920917987823486, "learning_rate": 8.802395209580838e-07, "loss": 0.6637, "step": 63 }, { "epoch": 0.003839923201535969, "grad_norm": 1.671547770500183, "learning_rate": 8.942115768463073e-07, "loss": 0.7675, "step": 64 }, { "epoch": 0.003899922001559969, "grad_norm": 1.6720383167266846, "learning_rate": 9.081836327345308e-07, "loss": 0.7334, "step": 65 }, { "epoch": 0.003959920801583968, "grad_norm": 1.6420422792434692, "learning_rate": 9.221556886227545e-07, "loss": 0.6672, "step": 66 }, { "epoch": 0.004019919601607968, "grad_norm": 1.6394959688186646, "learning_rate": 9.361277445109781e-07, "loss": 0.7058, "step": 67 }, { "epoch": 0.004079918401631967, "grad_norm": 1.5535897016525269, "learning_rate": 9.500998003992016e-07, "loss": 0.7041, "step": 68 }, { "epoch": 0.004139917201655967, "grad_norm": 1.6691968441009521, "learning_rate": 9.640718562874252e-07, "loss": 0.6361, "step": 69 }, { "epoch": 0.004199916001679967, "grad_norm": 1.7305561304092407, "learning_rate": 9.780439121756486e-07, "loss": 0.6569, "step": 70 }, { "epoch": 0.004259914801703966, "grad_norm": 1.4672731161117554, "learning_rate": 9.920159680638723e-07, "loss": 0.6517, "step": 71 }, { "epoch": 0.004319913601727965, "grad_norm": 1.5354033708572388, "learning_rate": 1.0059880239520958e-06, "loss": 0.6663, "step": 72 }, { "epoch": 0.004379912401751965, "grad_norm": 1.6137850284576416, "learning_rate": 1.0199600798403193e-06, "loss": 0.7014, "step": 73 }, { "epoch": 0.004439911201775964, "grad_norm": 1.6154518127441406, "learning_rate": 1.0339321357285427e-06, "loss": 0.7258, "step": 74 }, { "epoch": 0.004499910001799964, "grad_norm": 1.5782337188720703, "learning_rate": 1.0479041916167664e-06, "loss": 0.6014, "step": 75 }, { "epoch": 0.004559908801823964, "grad_norm": 1.426303744316101, "learning_rate": 1.0618762475049901e-06, "loss": 0.6968, "step": 76 }, { "epoch": 0.004619907601847963, "grad_norm": 1.5739352703094482, "learning_rate": 1.0758483033932136e-06, "loss": 0.6994, "step": 77 }, { "epoch": 0.004679906401871963, "grad_norm": 1.4065396785736084, "learning_rate": 1.089820359281437e-06, "loss": 0.6284, "step": 78 }, { "epoch": 0.004739905201895962, "grad_norm": 1.6314438581466675, "learning_rate": 1.1037924151696607e-06, "loss": 0.7232, "step": 79 }, { "epoch": 0.004799904001919961, "grad_norm": 1.4562597274780273, "learning_rate": 1.1177644710578842e-06, "loss": 0.6883, "step": 80 }, { "epoch": 0.004859902801943961, "grad_norm": 1.6040973663330078, "learning_rate": 1.1317365269461077e-06, "loss": 0.6885, "step": 81 }, { "epoch": 0.004919901601967961, "grad_norm": 1.5304336547851562, "learning_rate": 1.1457085828343314e-06, "loss": 0.5818, "step": 82 }, { "epoch": 0.00497990040199196, "grad_norm": 1.4329891204833984, "learning_rate": 1.1596806387225548e-06, "loss": 0.5999, "step": 83 }, { "epoch": 0.00503989920201596, "grad_norm": 1.600127935409546, "learning_rate": 1.1736526946107783e-06, "loss": 0.6988, "step": 84 }, { "epoch": 0.00509989800203996, "grad_norm": 1.4135138988494873, "learning_rate": 1.1876247504990018e-06, "loss": 0.6458, "step": 85 }, { "epoch": 0.005159896802063958, "grad_norm": 2.0585179328918457, "learning_rate": 1.2015968063872257e-06, "loss": 0.6666, "step": 86 }, { "epoch": 0.005219895602087958, "grad_norm": 1.526089072227478, "learning_rate": 1.2155688622754492e-06, "loss": 0.6642, "step": 87 }, { "epoch": 0.005279894402111958, "grad_norm": 1.387961983680725, "learning_rate": 1.2295409181636726e-06, "loss": 0.5938, "step": 88 }, { "epoch": 0.005339893202135957, "grad_norm": 1.3829454183578491, "learning_rate": 1.2435129740518963e-06, "loss": 0.6434, "step": 89 }, { "epoch": 0.005399892002159957, "grad_norm": 1.410423755645752, "learning_rate": 1.2574850299401198e-06, "loss": 0.6449, "step": 90 }, { "epoch": 0.005459890802183957, "grad_norm": 1.3820126056671143, "learning_rate": 1.2714570858283433e-06, "loss": 0.6711, "step": 91 }, { "epoch": 0.005519889602207955, "grad_norm": 1.4861762523651123, "learning_rate": 1.2854291417165667e-06, "loss": 0.6799, "step": 92 }, { "epoch": 0.005579888402231955, "grad_norm": 1.6225950717926025, "learning_rate": 1.2994011976047904e-06, "loss": 0.673, "step": 93 }, { "epoch": 0.005639887202255955, "grad_norm": 1.4182862043380737, "learning_rate": 1.313373253493014e-06, "loss": 0.6911, "step": 94 }, { "epoch": 0.005699886002279954, "grad_norm": 1.5134121179580688, "learning_rate": 1.3273453093812374e-06, "loss": 0.5831, "step": 95 }, { "epoch": 0.005759884802303954, "grad_norm": 1.4077290296554565, "learning_rate": 1.341317365269461e-06, "loss": 0.6727, "step": 96 }, { "epoch": 0.005819883602327954, "grad_norm": 1.3341296911239624, "learning_rate": 1.3552894211576847e-06, "loss": 0.5744, "step": 97 }, { "epoch": 0.005879882402351953, "grad_norm": 1.3938210010528564, "learning_rate": 1.3692614770459082e-06, "loss": 0.6085, "step": 98 }, { "epoch": 0.005939881202375952, "grad_norm": 1.283300518989563, "learning_rate": 1.3832335329341317e-06, "loss": 0.6444, "step": 99 }, { "epoch": 0.005999880002399952, "grad_norm": 1.4032351970672607, "learning_rate": 1.3972055888223554e-06, "loss": 0.5983, "step": 100 }, { "epoch": 0.006059878802423951, "grad_norm": 1.4117417335510254, "learning_rate": 1.4111776447105788e-06, "loss": 0.5301, "step": 101 }, { "epoch": 0.006119877602447951, "grad_norm": 1.4865469932556152, "learning_rate": 1.4251497005988023e-06, "loss": 0.598, "step": 102 }, { "epoch": 0.006179876402471951, "grad_norm": 1.3297923803329468, "learning_rate": 1.4391217564870258e-06, "loss": 0.6612, "step": 103 }, { "epoch": 0.00623987520249595, "grad_norm": 1.4294739961624146, "learning_rate": 1.4530938123752495e-06, "loss": 0.6367, "step": 104 }, { "epoch": 0.00629987400251995, "grad_norm": 1.367441177368164, "learning_rate": 1.467065868263473e-06, "loss": 0.6428, "step": 105 }, { "epoch": 0.006359872802543949, "grad_norm": 1.29706609249115, "learning_rate": 1.4810379241516966e-06, "loss": 0.5738, "step": 106 }, { "epoch": 0.006419871602567948, "grad_norm": 1.3483470678329468, "learning_rate": 1.49500998003992e-06, "loss": 0.6475, "step": 107 }, { "epoch": 0.006479870402591948, "grad_norm": 1.3442851305007935, "learning_rate": 1.5089820359281438e-06, "loss": 0.6187, "step": 108 }, { "epoch": 0.006539869202615948, "grad_norm": 1.4500023126602173, "learning_rate": 1.5229540918163673e-06, "loss": 0.6759, "step": 109 }, { "epoch": 0.006599868002639947, "grad_norm": 1.2544387578964233, "learning_rate": 1.5369261477045907e-06, "loss": 0.5978, "step": 110 }, { "epoch": 0.006659866802663947, "grad_norm": 1.437868356704712, "learning_rate": 1.5508982035928144e-06, "loss": 0.653, "step": 111 }, { "epoch": 0.006719865602687947, "grad_norm": 1.5178477764129639, "learning_rate": 1.564870259481038e-06, "loss": 0.57, "step": 112 }, { "epoch": 0.006779864402711945, "grad_norm": 1.3911404609680176, "learning_rate": 1.5788423153692614e-06, "loss": 0.5732, "step": 113 }, { "epoch": 0.006839863202735945, "grad_norm": 1.339751124382019, "learning_rate": 1.5928143712574848e-06, "loss": 0.6728, "step": 114 }, { "epoch": 0.006899862002759945, "grad_norm": 1.3412997722625732, "learning_rate": 1.6067864271457085e-06, "loss": 0.604, "step": 115 }, { "epoch": 0.006959860802783944, "grad_norm": 1.4401333332061768, "learning_rate": 1.6207584830339322e-06, "loss": 0.5539, "step": 116 }, { "epoch": 0.007019859602807944, "grad_norm": 1.4395601749420166, "learning_rate": 1.6347305389221557e-06, "loss": 0.6444, "step": 117 }, { "epoch": 0.007079858402831944, "grad_norm": 1.4078809022903442, "learning_rate": 1.6487025948103792e-06, "loss": 0.6352, "step": 118 }, { "epoch": 0.007139857202855943, "grad_norm": 1.2953397035598755, "learning_rate": 1.6626746506986028e-06, "loss": 0.6026, "step": 119 }, { "epoch": 0.007199856002879942, "grad_norm": 1.3452595472335815, "learning_rate": 1.6766467065868263e-06, "loss": 0.6081, "step": 120 }, { "epoch": 0.007259854802903942, "grad_norm": 1.4475674629211426, "learning_rate": 1.6906187624750498e-06, "loss": 0.621, "step": 121 }, { "epoch": 0.007319853602927941, "grad_norm": 1.5487574338912964, "learning_rate": 1.7045908183632735e-06, "loss": 0.5831, "step": 122 }, { "epoch": 0.007379852402951941, "grad_norm": 1.4298254251480103, "learning_rate": 1.718562874251497e-06, "loss": 0.5492, "step": 123 }, { "epoch": 0.007439851202975941, "grad_norm": 1.3555940389633179, "learning_rate": 1.7325349301397204e-06, "loss": 0.5985, "step": 124 }, { "epoch": 0.00749985000299994, "grad_norm": 1.3311958312988281, "learning_rate": 1.7465069860279439e-06, "loss": 0.5828, "step": 125 }, { "epoch": 0.007559848803023939, "grad_norm": 1.2729368209838867, "learning_rate": 1.7604790419161676e-06, "loss": 0.6277, "step": 126 }, { "epoch": 0.007619847603047939, "grad_norm": 1.3045032024383545, "learning_rate": 1.774451097804391e-06, "loss": 0.624, "step": 127 }, { "epoch": 0.007679846403071938, "grad_norm": 1.3699638843536377, "learning_rate": 1.7884231536926145e-06, "loss": 0.5576, "step": 128 }, { "epoch": 0.007739845203095938, "grad_norm": 1.502261757850647, "learning_rate": 1.8023952095808382e-06, "loss": 0.6705, "step": 129 }, { "epoch": 0.007799844003119938, "grad_norm": 1.2833248376846313, "learning_rate": 1.8163672654690617e-06, "loss": 0.5608, "step": 130 }, { "epoch": 0.007859842803143937, "grad_norm": 1.4747649431228638, "learning_rate": 1.8303393213572856e-06, "loss": 0.5762, "step": 131 }, { "epoch": 0.007919841603167936, "grad_norm": 1.3838367462158203, "learning_rate": 1.844311377245509e-06, "loss": 0.5696, "step": 132 }, { "epoch": 0.007979840403191937, "grad_norm": 1.3263795375823975, "learning_rate": 1.8582834331337327e-06, "loss": 0.631, "step": 133 }, { "epoch": 0.008039839203215935, "grad_norm": 1.2811371088027954, "learning_rate": 1.8722554890219562e-06, "loss": 0.543, "step": 134 }, { "epoch": 0.008099838003239936, "grad_norm": 1.3780962228775024, "learning_rate": 1.8862275449101797e-06, "loss": 0.5987, "step": 135 }, { "epoch": 0.008159836803263935, "grad_norm": 1.3150513172149658, "learning_rate": 1.9001996007984032e-06, "loss": 0.6436, "step": 136 }, { "epoch": 0.008219835603287933, "grad_norm": 1.3936657905578613, "learning_rate": 1.914171656686627e-06, "loss": 0.6101, "step": 137 }, { "epoch": 0.008279834403311934, "grad_norm": 1.2749024629592896, "learning_rate": 1.9281437125748503e-06, "loss": 0.5467, "step": 138 }, { "epoch": 0.008339833203335933, "grad_norm": 1.4130276441574097, "learning_rate": 1.942115768463074e-06, "loss": 0.5881, "step": 139 }, { "epoch": 0.008399832003359933, "grad_norm": 1.3408379554748535, "learning_rate": 1.9560878243512973e-06, "loss": 0.6503, "step": 140 }, { "epoch": 0.008459830803383932, "grad_norm": 1.3560960292816162, "learning_rate": 1.9700598802395207e-06, "loss": 0.6394, "step": 141 }, { "epoch": 0.008519829603407933, "grad_norm": 1.4605231285095215, "learning_rate": 1.9840319361277446e-06, "loss": 0.6265, "step": 142 }, { "epoch": 0.008579828403431931, "grad_norm": 1.38413667678833, "learning_rate": 1.998003992015968e-06, "loss": 0.6409, "step": 143 }, { "epoch": 0.00863982720345593, "grad_norm": 1.3429043292999268, "learning_rate": 2.0119760479041916e-06, "loss": 0.6132, "step": 144 }, { "epoch": 0.00869982600347993, "grad_norm": 1.481097936630249, "learning_rate": 2.025948103792415e-06, "loss": 0.6468, "step": 145 }, { "epoch": 0.00875982480350393, "grad_norm": 1.3439576625823975, "learning_rate": 2.0399201596806385e-06, "loss": 0.5848, "step": 146 }, { "epoch": 0.00881982360352793, "grad_norm": 1.3599380254745483, "learning_rate": 2.053892215568862e-06, "loss": 0.5768, "step": 147 }, { "epoch": 0.008879822403551929, "grad_norm": 1.3778798580169678, "learning_rate": 2.0678642714570855e-06, "loss": 0.5932, "step": 148 }, { "epoch": 0.00893982120357593, "grad_norm": 1.3338335752487183, "learning_rate": 2.0818363273453094e-06, "loss": 0.569, "step": 149 }, { "epoch": 0.008999820003599928, "grad_norm": 1.3749724626541138, "learning_rate": 2.095808383233533e-06, "loss": 0.6596, "step": 150 }, { "epoch": 0.009059818803623927, "grad_norm": 1.3186976909637451, "learning_rate": 2.1097804391217567e-06, "loss": 0.5739, "step": 151 }, { "epoch": 0.009119817603647927, "grad_norm": 1.2696161270141602, "learning_rate": 2.1237524950099802e-06, "loss": 0.5571, "step": 152 }, { "epoch": 0.009179816403671926, "grad_norm": 1.2619379758834839, "learning_rate": 2.1377245508982037e-06, "loss": 0.5855, "step": 153 }, { "epoch": 0.009239815203695927, "grad_norm": 1.2904512882232666, "learning_rate": 2.151696606786427e-06, "loss": 0.5688, "step": 154 }, { "epoch": 0.009299814003719925, "grad_norm": 1.4418364763259888, "learning_rate": 2.1656686626746506e-06, "loss": 0.6345, "step": 155 }, { "epoch": 0.009359812803743926, "grad_norm": 1.3448662757873535, "learning_rate": 2.179640718562874e-06, "loss": 0.6061, "step": 156 }, { "epoch": 0.009419811603767925, "grad_norm": 1.4092036485671997, "learning_rate": 2.193612774451098e-06, "loss": 0.6146, "step": 157 }, { "epoch": 0.009479810403791923, "grad_norm": 1.4817478656768799, "learning_rate": 2.2075848303393215e-06, "loss": 0.5725, "step": 158 }, { "epoch": 0.009539809203815924, "grad_norm": 1.2791401147842407, "learning_rate": 2.221556886227545e-06, "loss": 0.6295, "step": 159 }, { "epoch": 0.009599808003839923, "grad_norm": 1.4284772872924805, "learning_rate": 2.2355289421157684e-06, "loss": 0.5909, "step": 160 }, { "epoch": 0.009659806803863923, "grad_norm": 1.5354111194610596, "learning_rate": 2.249500998003992e-06, "loss": 0.6395, "step": 161 }, { "epoch": 0.009719805603887922, "grad_norm": 1.3104029893875122, "learning_rate": 2.2634730538922154e-06, "loss": 0.5598, "step": 162 }, { "epoch": 0.009779804403911923, "grad_norm": 1.3600229024887085, "learning_rate": 2.277445109780439e-06, "loss": 0.6344, "step": 163 }, { "epoch": 0.009839803203935921, "grad_norm": 1.3578481674194336, "learning_rate": 2.2914171656686627e-06, "loss": 0.6019, "step": 164 }, { "epoch": 0.00989980200395992, "grad_norm": 1.2979942560195923, "learning_rate": 2.305389221556886e-06, "loss": 0.5513, "step": 165 }, { "epoch": 0.00995980080398392, "grad_norm": 1.2909666299819946, "learning_rate": 2.3193612774451097e-06, "loss": 0.6315, "step": 166 }, { "epoch": 0.01001979960400792, "grad_norm": 1.2450460195541382, "learning_rate": 2.333333333333333e-06, "loss": 0.5855, "step": 167 }, { "epoch": 0.01007979840403192, "grad_norm": 1.2852661609649658, "learning_rate": 2.3473053892215566e-06, "loss": 0.5722, "step": 168 }, { "epoch": 0.010139797204055919, "grad_norm": 1.408495545387268, "learning_rate": 2.36127744510978e-06, "loss": 0.5537, "step": 169 }, { "epoch": 0.01019979600407992, "grad_norm": 1.4354115724563599, "learning_rate": 2.3752495009980036e-06, "loss": 0.6616, "step": 170 }, { "epoch": 0.010259794804103918, "grad_norm": 1.4798223972320557, "learning_rate": 2.389221556886228e-06, "loss": 0.5893, "step": 171 }, { "epoch": 0.010319793604127917, "grad_norm": 1.3524624109268188, "learning_rate": 2.4031936127744514e-06, "loss": 0.5432, "step": 172 }, { "epoch": 0.010379792404151917, "grad_norm": 1.3069047927856445, "learning_rate": 2.417165668662675e-06, "loss": 0.584, "step": 173 }, { "epoch": 0.010439791204175916, "grad_norm": 1.3281811475753784, "learning_rate": 2.4311377245508983e-06, "loss": 0.5298, "step": 174 }, { "epoch": 0.010499790004199917, "grad_norm": 1.347853660583496, "learning_rate": 2.445109780439122e-06, "loss": 0.5856, "step": 175 }, { "epoch": 0.010559788804223915, "grad_norm": 1.4613703489303589, "learning_rate": 2.4590818363273453e-06, "loss": 0.6466, "step": 176 }, { "epoch": 0.010619787604247914, "grad_norm": 1.2593765258789062, "learning_rate": 2.4730538922155687e-06, "loss": 0.5284, "step": 177 }, { "epoch": 0.010679786404271915, "grad_norm": 1.2989939451217651, "learning_rate": 2.4870259481037926e-06, "loss": 0.5802, "step": 178 }, { "epoch": 0.010739785204295913, "grad_norm": 1.3688384294509888, "learning_rate": 2.500998003992016e-06, "loss": 0.5188, "step": 179 }, { "epoch": 0.010799784004319914, "grad_norm": 1.3316010236740112, "learning_rate": 2.5149700598802396e-06, "loss": 0.5773, "step": 180 }, { "epoch": 0.010859782804343913, "grad_norm": 1.3118091821670532, "learning_rate": 2.528942115768463e-06, "loss": 0.4981, "step": 181 }, { "epoch": 0.010919781604367913, "grad_norm": 1.2714734077453613, "learning_rate": 2.5429141716566865e-06, "loss": 0.6349, "step": 182 }, { "epoch": 0.010979780404391912, "grad_norm": 1.292654275894165, "learning_rate": 2.55688622754491e-06, "loss": 0.5638, "step": 183 }, { "epoch": 0.01103977920441591, "grad_norm": 1.3653242588043213, "learning_rate": 2.5708582834331335e-06, "loss": 0.5851, "step": 184 }, { "epoch": 0.011099778004439911, "grad_norm": 1.3275145292282104, "learning_rate": 2.584830339321357e-06, "loss": 0.5899, "step": 185 }, { "epoch": 0.01115977680446391, "grad_norm": 1.276073932647705, "learning_rate": 2.598802395209581e-06, "loss": 0.5659, "step": 186 }, { "epoch": 0.01121977560448791, "grad_norm": 1.239929437637329, "learning_rate": 2.6127744510978043e-06, "loss": 0.6274, "step": 187 }, { "epoch": 0.01127977440451191, "grad_norm": 1.453537106513977, "learning_rate": 2.626746506986028e-06, "loss": 0.5617, "step": 188 }, { "epoch": 0.01133977320453591, "grad_norm": 1.3796473741531372, "learning_rate": 2.6407185628742513e-06, "loss": 0.5556, "step": 189 }, { "epoch": 0.011399772004559909, "grad_norm": 1.3034827709197998, "learning_rate": 2.6546906187624747e-06, "loss": 0.6123, "step": 190 }, { "epoch": 0.011459770804583908, "grad_norm": 1.3395013809204102, "learning_rate": 2.6686626746506986e-06, "loss": 0.6228, "step": 191 }, { "epoch": 0.011519769604607908, "grad_norm": 1.2869280576705933, "learning_rate": 2.682634730538922e-06, "loss": 0.5294, "step": 192 }, { "epoch": 0.011579768404631907, "grad_norm": 1.3061389923095703, "learning_rate": 2.696606786427146e-06, "loss": 0.5691, "step": 193 }, { "epoch": 0.011639767204655907, "grad_norm": 1.277500867843628, "learning_rate": 2.7105788423153695e-06, "loss": 0.5874, "step": 194 }, { "epoch": 0.011699766004679906, "grad_norm": 1.3092859983444214, "learning_rate": 2.724550898203593e-06, "loss": 0.5059, "step": 195 }, { "epoch": 0.011759764804703907, "grad_norm": 1.4054105281829834, "learning_rate": 2.7385229540918164e-06, "loss": 0.5626, "step": 196 }, { "epoch": 0.011819763604727905, "grad_norm": 1.1981827020645142, "learning_rate": 2.75249500998004e-06, "loss": 0.5239, "step": 197 }, { "epoch": 0.011879762404751904, "grad_norm": 1.4791991710662842, "learning_rate": 2.7664670658682634e-06, "loss": 0.5846, "step": 198 }, { "epoch": 0.011939761204775905, "grad_norm": 1.3982822895050049, "learning_rate": 2.780439121756487e-06, "loss": 0.6242, "step": 199 }, { "epoch": 0.011999760004799903, "grad_norm": 1.3070803880691528, "learning_rate": 2.7944111776447107e-06, "loss": 0.5784, "step": 200 }, { "epoch": 0.012059758804823904, "grad_norm": 1.2907416820526123, "learning_rate": 2.808383233532934e-06, "loss": 0.6249, "step": 201 }, { "epoch": 0.012119757604847903, "grad_norm": 1.2998404502868652, "learning_rate": 2.8223552894211577e-06, "loss": 0.5319, "step": 202 }, { "epoch": 0.012179756404871903, "grad_norm": 1.319496989250183, "learning_rate": 2.836327345309381e-06, "loss": 0.5958, "step": 203 }, { "epoch": 0.012239755204895902, "grad_norm": 1.3422058820724487, "learning_rate": 2.8502994011976046e-06, "loss": 0.5624, "step": 204 }, { "epoch": 0.0122997540049199, "grad_norm": 1.2679402828216553, "learning_rate": 2.864271457085828e-06, "loss": 0.5572, "step": 205 }, { "epoch": 0.012359752804943901, "grad_norm": 1.4380614757537842, "learning_rate": 2.8782435129740516e-06, "loss": 0.6388, "step": 206 }, { "epoch": 0.0124197516049679, "grad_norm": 1.304176688194275, "learning_rate": 2.8922155688622755e-06, "loss": 0.5926, "step": 207 }, { "epoch": 0.0124797504049919, "grad_norm": 1.3440067768096924, "learning_rate": 2.906187624750499e-06, "loss": 0.5537, "step": 208 }, { "epoch": 0.0125397492050159, "grad_norm": 1.3192352056503296, "learning_rate": 2.9201596806387224e-06, "loss": 0.5501, "step": 209 }, { "epoch": 0.0125997480050399, "grad_norm": 1.394855260848999, "learning_rate": 2.934131736526946e-06, "loss": 0.5788, "step": 210 }, { "epoch": 0.012659746805063899, "grad_norm": 1.3843568563461304, "learning_rate": 2.94810379241517e-06, "loss": 0.5612, "step": 211 }, { "epoch": 0.012719745605087898, "grad_norm": 1.3294225931167603, "learning_rate": 2.9620758483033933e-06, "loss": 0.5941, "step": 212 }, { "epoch": 0.012779744405111898, "grad_norm": 1.406984567642212, "learning_rate": 2.9760479041916167e-06, "loss": 0.6024, "step": 213 }, { "epoch": 0.012839743205135897, "grad_norm": 1.4545899629592896, "learning_rate": 2.99001996007984e-06, "loss": 0.558, "step": 214 }, { "epoch": 0.012899742005159897, "grad_norm": 1.4726871252059937, "learning_rate": 3.003992015968064e-06, "loss": 0.6164, "step": 215 }, { "epoch": 0.012959740805183896, "grad_norm": 1.2997180223464966, "learning_rate": 3.0179640718562876e-06, "loss": 0.5682, "step": 216 }, { "epoch": 0.013019739605207897, "grad_norm": 1.253576397895813, "learning_rate": 3.031936127744511e-06, "loss": 0.4988, "step": 217 }, { "epoch": 0.013079738405231895, "grad_norm": 1.3575063943862915, "learning_rate": 3.0459081836327345e-06, "loss": 0.5961, "step": 218 }, { "epoch": 0.013139737205255894, "grad_norm": 1.5034719705581665, "learning_rate": 3.059880239520958e-06, "loss": 0.6331, "step": 219 }, { "epoch": 0.013199736005279895, "grad_norm": 1.320919156074524, "learning_rate": 3.0738522954091815e-06, "loss": 0.5506, "step": 220 }, { "epoch": 0.013259734805303893, "grad_norm": 1.5697249174118042, "learning_rate": 3.087824351297405e-06, "loss": 0.6438, "step": 221 }, { "epoch": 0.013319733605327894, "grad_norm": 1.3140949010849, "learning_rate": 3.101796407185629e-06, "loss": 0.5958, "step": 222 }, { "epoch": 0.013379732405351893, "grad_norm": 1.2841050624847412, "learning_rate": 3.1157684630738523e-06, "loss": 0.5908, "step": 223 }, { "epoch": 0.013439731205375893, "grad_norm": 1.279008150100708, "learning_rate": 3.129740518962076e-06, "loss": 0.4766, "step": 224 }, { "epoch": 0.013499730005399892, "grad_norm": 1.3918132781982422, "learning_rate": 3.1437125748502993e-06, "loss": 0.5106, "step": 225 }, { "epoch": 0.01355972880542389, "grad_norm": 1.2673313617706299, "learning_rate": 3.1576846307385227e-06, "loss": 0.6071, "step": 226 }, { "epoch": 0.013619727605447891, "grad_norm": 1.2754586935043335, "learning_rate": 3.171656686626746e-06, "loss": 0.5496, "step": 227 }, { "epoch": 0.01367972640547189, "grad_norm": 1.291854977607727, "learning_rate": 3.1856287425149697e-06, "loss": 0.5774, "step": 228 }, { "epoch": 0.01373972520549589, "grad_norm": 1.3589569330215454, "learning_rate": 3.1996007984031936e-06, "loss": 0.6015, "step": 229 }, { "epoch": 0.01379972400551989, "grad_norm": 1.3497122526168823, "learning_rate": 3.213572854291417e-06, "loss": 0.5645, "step": 230 }, { "epoch": 0.01385972280554389, "grad_norm": 1.3909778594970703, "learning_rate": 3.227544910179641e-06, "loss": 0.5858, "step": 231 }, { "epoch": 0.013919721605567889, "grad_norm": 1.3099526166915894, "learning_rate": 3.2415169660678644e-06, "loss": 0.5655, "step": 232 }, { "epoch": 0.013979720405591887, "grad_norm": 1.3645398616790771, "learning_rate": 3.255489021956088e-06, "loss": 0.6215, "step": 233 }, { "epoch": 0.014039719205615888, "grad_norm": 1.3764452934265137, "learning_rate": 3.2694610778443114e-06, "loss": 0.569, "step": 234 }, { "epoch": 0.014099718005639887, "grad_norm": 1.418866753578186, "learning_rate": 3.283433133732535e-06, "loss": 0.5438, "step": 235 }, { "epoch": 0.014159716805663887, "grad_norm": 1.2483700513839722, "learning_rate": 3.2974051896207583e-06, "loss": 0.456, "step": 236 }, { "epoch": 0.014219715605687886, "grad_norm": 1.38263738155365, "learning_rate": 3.3113772455089822e-06, "loss": 0.6127, "step": 237 }, { "epoch": 0.014279714405711887, "grad_norm": 1.3333990573883057, "learning_rate": 3.3253493013972057e-06, "loss": 0.6268, "step": 238 }, { "epoch": 0.014339713205735885, "grad_norm": 1.3191206455230713, "learning_rate": 3.339321357285429e-06, "loss": 0.5677, "step": 239 }, { "epoch": 0.014399712005759884, "grad_norm": 1.3427784442901611, "learning_rate": 3.3532934131736526e-06, "loss": 0.6116, "step": 240 }, { "epoch": 0.014459710805783885, "grad_norm": 1.5488548278808594, "learning_rate": 3.367265469061876e-06, "loss": 0.6285, "step": 241 }, { "epoch": 0.014519709605807883, "grad_norm": 1.5794357061386108, "learning_rate": 3.3812375249500996e-06, "loss": 0.5597, "step": 242 }, { "epoch": 0.014579708405831884, "grad_norm": 1.4802268743515015, "learning_rate": 3.395209580838323e-06, "loss": 0.6215, "step": 243 }, { "epoch": 0.014639707205855883, "grad_norm": 1.3142436742782593, "learning_rate": 3.409181636726547e-06, "loss": 0.5345, "step": 244 }, { "epoch": 0.014699706005879883, "grad_norm": 1.3265522718429565, "learning_rate": 3.4231536926147704e-06, "loss": 0.5682, "step": 245 }, { "epoch": 0.014759704805903882, "grad_norm": 1.4991726875305176, "learning_rate": 3.437125748502994e-06, "loss": 0.5577, "step": 246 }, { "epoch": 0.01481970360592788, "grad_norm": 1.34013032913208, "learning_rate": 3.4510978043912174e-06, "loss": 0.5753, "step": 247 }, { "epoch": 0.014879702405951881, "grad_norm": 1.450170874595642, "learning_rate": 3.465069860279441e-06, "loss": 0.6628, "step": 248 }, { "epoch": 0.01493970120597588, "grad_norm": 1.3612101078033447, "learning_rate": 3.4790419161676643e-06, "loss": 0.4317, "step": 249 }, { "epoch": 0.01499970000599988, "grad_norm": 1.4409794807434082, "learning_rate": 3.4930139720558878e-06, "loss": 0.5459, "step": 250 }, { "epoch": 0.01505969880602388, "grad_norm": 1.3675251007080078, "learning_rate": 3.506986027944112e-06, "loss": 0.5463, "step": 251 }, { "epoch": 0.015119697606047878, "grad_norm": 1.3263667821884155, "learning_rate": 3.520958083832335e-06, "loss": 0.6239, "step": 252 }, { "epoch": 0.015179696406071879, "grad_norm": 1.4775654077529907, "learning_rate": 3.534930139720559e-06, "loss": 0.6297, "step": 253 }, { "epoch": 0.015239695206095877, "grad_norm": 1.3308488130569458, "learning_rate": 3.548902195608782e-06, "loss": 0.599, "step": 254 }, { "epoch": 0.015299694006119878, "grad_norm": 1.3887192010879517, "learning_rate": 3.562874251497006e-06, "loss": 0.5638, "step": 255 }, { "epoch": 0.015359692806143877, "grad_norm": 1.3459516763687134, "learning_rate": 3.576846307385229e-06, "loss": 0.5403, "step": 256 }, { "epoch": 0.015419691606167877, "grad_norm": 1.3764580488204956, "learning_rate": 3.590818363273453e-06, "loss": 0.617, "step": 257 }, { "epoch": 0.015479690406191876, "grad_norm": 1.359186053276062, "learning_rate": 3.6047904191616764e-06, "loss": 0.6436, "step": 258 }, { "epoch": 0.015539689206215875, "grad_norm": 1.3940975666046143, "learning_rate": 3.6187624750499003e-06, "loss": 0.5274, "step": 259 }, { "epoch": 0.015599688006239875, "grad_norm": 1.4622869491577148, "learning_rate": 3.6327345309381234e-06, "loss": 0.5448, "step": 260 }, { "epoch": 0.015659686806263874, "grad_norm": 1.458171010017395, "learning_rate": 3.6467065868263473e-06, "loss": 0.5499, "step": 261 }, { "epoch": 0.015719685606287875, "grad_norm": 1.2669503688812256, "learning_rate": 3.660678642714571e-06, "loss": 0.542, "step": 262 }, { "epoch": 0.015779684406311875, "grad_norm": 1.2830555438995361, "learning_rate": 3.674650698602794e-06, "loss": 0.5818, "step": 263 }, { "epoch": 0.015839683206335872, "grad_norm": 1.292110800743103, "learning_rate": 3.688622754491018e-06, "loss": 0.5381, "step": 264 }, { "epoch": 0.015899682006359873, "grad_norm": 1.4080166816711426, "learning_rate": 3.7025948103792416e-06, "loss": 0.5542, "step": 265 }, { "epoch": 0.015959680806383873, "grad_norm": 1.3758563995361328, "learning_rate": 3.7165668662674655e-06, "loss": 0.5327, "step": 266 }, { "epoch": 0.01601967960640787, "grad_norm": 1.3257420063018799, "learning_rate": 3.7305389221556885e-06, "loss": 0.5007, "step": 267 }, { "epoch": 0.01607967840643187, "grad_norm": 1.3669954538345337, "learning_rate": 3.7445109780439124e-06, "loss": 0.6325, "step": 268 }, { "epoch": 0.01613967720645587, "grad_norm": 1.2485085725784302, "learning_rate": 3.7584830339321355e-06, "loss": 0.5373, "step": 269 }, { "epoch": 0.016199676006479872, "grad_norm": 1.4131968021392822, "learning_rate": 3.7724550898203594e-06, "loss": 0.5776, "step": 270 }, { "epoch": 0.01625967480650387, "grad_norm": 1.4505449533462524, "learning_rate": 3.7864271457085824e-06, "loss": 0.642, "step": 271 }, { "epoch": 0.01631967360652787, "grad_norm": 1.4932901859283447, "learning_rate": 3.8003992015968063e-06, "loss": 0.5791, "step": 272 }, { "epoch": 0.01637967240655187, "grad_norm": 1.3542786836624146, "learning_rate": 3.81437125748503e-06, "loss": 0.5055, "step": 273 }, { "epoch": 0.016439671206575867, "grad_norm": 1.5399646759033203, "learning_rate": 3.828343313373254e-06, "loss": 0.6455, "step": 274 }, { "epoch": 0.016499670006599867, "grad_norm": 1.127876877784729, "learning_rate": 3.842315369261476e-06, "loss": 0.4864, "step": 275 }, { "epoch": 0.016559668806623868, "grad_norm": 1.2935233116149902, "learning_rate": 3.856287425149701e-06, "loss": 0.5348, "step": 276 }, { "epoch": 0.01661966760664787, "grad_norm": 1.2960498332977295, "learning_rate": 3.870259481037924e-06, "loss": 0.5522, "step": 277 }, { "epoch": 0.016679666406671866, "grad_norm": 1.3820149898529053, "learning_rate": 3.884231536926148e-06, "loss": 0.5551, "step": 278 }, { "epoch": 0.016739665206695866, "grad_norm": 1.448799729347229, "learning_rate": 3.898203592814371e-06, "loss": 0.5189, "step": 279 }, { "epoch": 0.016799664006719867, "grad_norm": 1.3155148029327393, "learning_rate": 3.9121756487025945e-06, "loss": 0.5221, "step": 280 }, { "epoch": 0.016859662806743864, "grad_norm": 1.4772075414657593, "learning_rate": 3.926147704590819e-06, "loss": 0.6399, "step": 281 }, { "epoch": 0.016919661606767864, "grad_norm": 1.4804939031600952, "learning_rate": 3.9401197604790415e-06, "loss": 0.5414, "step": 282 }, { "epoch": 0.016979660406791865, "grad_norm": 1.4143316745758057, "learning_rate": 3.954091816367266e-06, "loss": 0.5057, "step": 283 }, { "epoch": 0.017039659206815865, "grad_norm": 1.4051432609558105, "learning_rate": 3.968063872255489e-06, "loss": 0.5518, "step": 284 }, { "epoch": 0.017099658006839862, "grad_norm": 1.2203288078308105, "learning_rate": 3.982035928143713e-06, "loss": 0.4971, "step": 285 }, { "epoch": 0.017159656806863863, "grad_norm": 1.3327751159667969, "learning_rate": 3.996007984031936e-06, "loss": 0.4914, "step": 286 }, { "epoch": 0.017219655606887863, "grad_norm": 1.3533471822738647, "learning_rate": 4.00998003992016e-06, "loss": 0.5812, "step": 287 }, { "epoch": 0.01727965440691186, "grad_norm": 1.2893446683883667, "learning_rate": 4.023952095808383e-06, "loss": 0.5991, "step": 288 }, { "epoch": 0.01733965320693586, "grad_norm": 1.426726222038269, "learning_rate": 4.037924151696607e-06, "loss": 0.5615, "step": 289 }, { "epoch": 0.01739965200695986, "grad_norm": 1.304439902305603, "learning_rate": 4.05189620758483e-06, "loss": 0.5705, "step": 290 }, { "epoch": 0.017459650806983862, "grad_norm": 1.4753317832946777, "learning_rate": 4.0658682634730544e-06, "loss": 0.606, "step": 291 }, { "epoch": 0.01751964960700786, "grad_norm": 1.3859730958938599, "learning_rate": 4.079840319361277e-06, "loss": 0.6396, "step": 292 }, { "epoch": 0.01757964840703186, "grad_norm": 1.3887025117874146, "learning_rate": 4.093812375249501e-06, "loss": 0.5376, "step": 293 }, { "epoch": 0.01763964720705586, "grad_norm": 1.3478258848190308, "learning_rate": 4.107784431137724e-06, "loss": 0.5819, "step": 294 }, { "epoch": 0.017699646007079857, "grad_norm": 1.3099949359893799, "learning_rate": 4.121756487025948e-06, "loss": 0.5717, "step": 295 }, { "epoch": 0.017759644807103857, "grad_norm": 1.4533863067626953, "learning_rate": 4.135728542914171e-06, "loss": 0.5646, "step": 296 }, { "epoch": 0.017819643607127858, "grad_norm": 1.334836483001709, "learning_rate": 4.149700598802395e-06, "loss": 0.6454, "step": 297 }, { "epoch": 0.01787964240715186, "grad_norm": 1.334847331047058, "learning_rate": 4.163672654690619e-06, "loss": 0.5684, "step": 298 }, { "epoch": 0.017939641207175856, "grad_norm": 1.3234820365905762, "learning_rate": 4.177644710578842e-06, "loss": 0.5301, "step": 299 }, { "epoch": 0.017999640007199856, "grad_norm": 1.1991833448410034, "learning_rate": 4.191616766467066e-06, "loss": 0.5694, "step": 300 }, { "epoch": 0.018059638807223857, "grad_norm": 1.3059628009796143, "learning_rate": 4.205588822355289e-06, "loss": 0.5537, "step": 301 }, { "epoch": 0.018119637607247854, "grad_norm": 1.3212558031082153, "learning_rate": 4.2195608782435135e-06, "loss": 0.5756, "step": 302 }, { "epoch": 0.018179636407271854, "grad_norm": 1.432188868522644, "learning_rate": 4.233532934131736e-06, "loss": 0.6262, "step": 303 }, { "epoch": 0.018239635207295855, "grad_norm": 1.3508119583129883, "learning_rate": 4.2475049900199604e-06, "loss": 0.5082, "step": 304 }, { "epoch": 0.018299634007319855, "grad_norm": 1.3678334951400757, "learning_rate": 4.261477045908184e-06, "loss": 0.517, "step": 305 }, { "epoch": 0.018359632807343852, "grad_norm": 1.4196102619171143, "learning_rate": 4.275449101796407e-06, "loss": 0.5248, "step": 306 }, { "epoch": 0.018419631607367853, "grad_norm": 1.2284907102584839, "learning_rate": 4.289421157684631e-06, "loss": 0.5235, "step": 307 }, { "epoch": 0.018479630407391853, "grad_norm": 1.2187949419021606, "learning_rate": 4.303393213572854e-06, "loss": 0.6005, "step": 308 }, { "epoch": 0.01853962920741585, "grad_norm": 1.3685590028762817, "learning_rate": 4.317365269461078e-06, "loss": 0.5612, "step": 309 }, { "epoch": 0.01859962800743985, "grad_norm": 1.3778631687164307, "learning_rate": 4.331337325349301e-06, "loss": 0.584, "step": 310 }, { "epoch": 0.01865962680746385, "grad_norm": 1.214868187904358, "learning_rate": 4.345309381237525e-06, "loss": 0.4098, "step": 311 }, { "epoch": 0.018719625607487852, "grad_norm": 1.2929904460906982, "learning_rate": 4.359281437125748e-06, "loss": 0.5425, "step": 312 }, { "epoch": 0.01877962440751185, "grad_norm": 1.2849739789962769, "learning_rate": 4.373253493013972e-06, "loss": 0.6116, "step": 313 }, { "epoch": 0.01883962320753585, "grad_norm": 1.3445991277694702, "learning_rate": 4.387225548902196e-06, "loss": 0.5561, "step": 314 }, { "epoch": 0.01889962200755985, "grad_norm": 1.4129794836044312, "learning_rate": 4.401197604790419e-06, "loss": 0.5319, "step": 315 }, { "epoch": 0.018959620807583847, "grad_norm": 1.48262357711792, "learning_rate": 4.415169660678643e-06, "loss": 0.5501, "step": 316 }, { "epoch": 0.019019619607607847, "grad_norm": 1.2201542854309082, "learning_rate": 4.4291417165668656e-06, "loss": 0.471, "step": 317 }, { "epoch": 0.019079618407631848, "grad_norm": 1.458894968032837, "learning_rate": 4.44311377245509e-06, "loss": 0.5757, "step": 318 }, { "epoch": 0.01913961720765585, "grad_norm": 1.35089111328125, "learning_rate": 4.457085828343313e-06, "loss": 0.5554, "step": 319 }, { "epoch": 0.019199616007679846, "grad_norm": 1.702471137046814, "learning_rate": 4.471057884231537e-06, "loss": 0.5486, "step": 320 }, { "epoch": 0.019259614807703846, "grad_norm": 1.5012791156768799, "learning_rate": 4.485029940119761e-06, "loss": 0.6316, "step": 321 }, { "epoch": 0.019319613607727847, "grad_norm": 1.2651991844177246, "learning_rate": 4.499001996007984e-06, "loss": 0.5637, "step": 322 }, { "epoch": 0.019379612407751844, "grad_norm": 1.2719963788986206, "learning_rate": 4.512974051896208e-06, "loss": 0.5496, "step": 323 }, { "epoch": 0.019439611207775844, "grad_norm": 1.4261711835861206, "learning_rate": 4.526946107784431e-06, "loss": 0.5044, "step": 324 }, { "epoch": 0.019499610007799845, "grad_norm": 1.519976258277893, "learning_rate": 4.540918163672655e-06, "loss": 0.6105, "step": 325 }, { "epoch": 0.019559608807823845, "grad_norm": 1.5053060054779053, "learning_rate": 4.554890219560878e-06, "loss": 0.658, "step": 326 }, { "epoch": 0.019619607607847842, "grad_norm": 1.4199657440185547, "learning_rate": 4.568862275449102e-06, "loss": 0.6325, "step": 327 }, { "epoch": 0.019679606407871843, "grad_norm": 1.3532506227493286, "learning_rate": 4.5828343313373255e-06, "loss": 0.5075, "step": 328 }, { "epoch": 0.019739605207895843, "grad_norm": 1.507430911064148, "learning_rate": 4.596806387225549e-06, "loss": 0.5275, "step": 329 }, { "epoch": 0.01979960400791984, "grad_norm": 1.460507869720459, "learning_rate": 4.610778443113772e-06, "loss": 0.4789, "step": 330 }, { "epoch": 0.01985960280794384, "grad_norm": 1.4215683937072754, "learning_rate": 4.624750499001996e-06, "loss": 0.5691, "step": 331 }, { "epoch": 0.01991960160796784, "grad_norm": 1.3611774444580078, "learning_rate": 4.638722554890219e-06, "loss": 0.6051, "step": 332 }, { "epoch": 0.019979600407991842, "grad_norm": 1.436708688735962, "learning_rate": 4.652694610778443e-06, "loss": 0.561, "step": 333 }, { "epoch": 0.02003959920801584, "grad_norm": 1.4192546606063843, "learning_rate": 4.666666666666666e-06, "loss": 0.5641, "step": 334 }, { "epoch": 0.02009959800803984, "grad_norm": 1.4291409254074097, "learning_rate": 4.680638722554891e-06, "loss": 0.5963, "step": 335 }, { "epoch": 0.02015959680806384, "grad_norm": 1.5291918516159058, "learning_rate": 4.694610778443113e-06, "loss": 0.5924, "step": 336 }, { "epoch": 0.020219595608087837, "grad_norm": 1.4490389823913574, "learning_rate": 4.708582834331338e-06, "loss": 0.538, "step": 337 }, { "epoch": 0.020279594408111837, "grad_norm": 1.3556782007217407, "learning_rate": 4.72255489021956e-06, "loss": 0.548, "step": 338 }, { "epoch": 0.020339593208135838, "grad_norm": 1.3301640748977661, "learning_rate": 4.7365269461077845e-06, "loss": 0.5335, "step": 339 }, { "epoch": 0.02039959200815984, "grad_norm": 1.3396371603012085, "learning_rate": 4.750499001996007e-06, "loss": 0.634, "step": 340 }, { "epoch": 0.020459590808183836, "grad_norm": 1.3835654258728027, "learning_rate": 4.7644710578842315e-06, "loss": 0.5737, "step": 341 }, { "epoch": 0.020519589608207836, "grad_norm": 1.4680688381195068, "learning_rate": 4.778443113772456e-06, "loss": 0.51, "step": 342 }, { "epoch": 0.020579588408231837, "grad_norm": 1.1909685134887695, "learning_rate": 4.792415169660678e-06, "loss": 0.5393, "step": 343 }, { "epoch": 0.020639587208255834, "grad_norm": 1.2841076850891113, "learning_rate": 4.806387225548903e-06, "loss": 0.6631, "step": 344 }, { "epoch": 0.020699586008279834, "grad_norm": 1.3367516994476318, "learning_rate": 4.820359281437125e-06, "loss": 0.5212, "step": 345 }, { "epoch": 0.020759584808303835, "grad_norm": 1.3215190172195435, "learning_rate": 4.83433133732535e-06, "loss": 0.5479, "step": 346 }, { "epoch": 0.020819583608327835, "grad_norm": 1.2938443422317505, "learning_rate": 4.848303393213572e-06, "loss": 0.5119, "step": 347 }, { "epoch": 0.020879582408351832, "grad_norm": 1.3678687810897827, "learning_rate": 4.862275449101797e-06, "loss": 0.5868, "step": 348 }, { "epoch": 0.020939581208375833, "grad_norm": 1.3793126344680786, "learning_rate": 4.87624750499002e-06, "loss": 0.5067, "step": 349 }, { "epoch": 0.020999580008399833, "grad_norm": 1.30298912525177, "learning_rate": 4.890219560878244e-06, "loss": 0.5799, "step": 350 }, { "epoch": 0.02105957880842383, "grad_norm": 1.1759623289108276, "learning_rate": 4.904191616766467e-06, "loss": 0.5204, "step": 351 }, { "epoch": 0.02111957760844783, "grad_norm": 1.3238939046859741, "learning_rate": 4.9181636726546905e-06, "loss": 0.5277, "step": 352 }, { "epoch": 0.02117957640847183, "grad_norm": 1.35255765914917, "learning_rate": 4.932135728542914e-06, "loss": 0.5597, "step": 353 }, { "epoch": 0.02123957520849583, "grad_norm": 1.3364213705062866, "learning_rate": 4.9461077844311375e-06, "loss": 0.5422, "step": 354 }, { "epoch": 0.02129957400851983, "grad_norm": 1.3523955345153809, "learning_rate": 4.960079840319361e-06, "loss": 0.5529, "step": 355 }, { "epoch": 0.02135957280854383, "grad_norm": 1.2957843542099, "learning_rate": 4.974051896207585e-06, "loss": 0.5008, "step": 356 }, { "epoch": 0.02141957160856783, "grad_norm": 1.3628146648406982, "learning_rate": 4.988023952095808e-06, "loss": 0.5826, "step": 357 }, { "epoch": 0.021479570408591827, "grad_norm": 1.201373815536499, "learning_rate": 5.001996007984032e-06, "loss": 0.5147, "step": 358 }, { "epoch": 0.021539569208615827, "grad_norm": 1.409692645072937, "learning_rate": 5.015968063872255e-06, "loss": 0.5537, "step": 359 }, { "epoch": 0.021599568008639828, "grad_norm": 1.3121532201766968, "learning_rate": 5.029940119760479e-06, "loss": 0.5376, "step": 360 }, { "epoch": 0.021659566808663825, "grad_norm": 1.4344936609268188, "learning_rate": 5.043912175648703e-06, "loss": 0.573, "step": 361 }, { "epoch": 0.021719565608687826, "grad_norm": 1.335785150527954, "learning_rate": 5.057884231536926e-06, "loss": 0.5369, "step": 362 }, { "epoch": 0.021779564408711826, "grad_norm": 1.2454403638839722, "learning_rate": 5.07185628742515e-06, "loss": 0.5033, "step": 363 }, { "epoch": 0.021839563208735827, "grad_norm": 1.2691898345947266, "learning_rate": 5.085828343313373e-06, "loss": 0.5955, "step": 364 }, { "epoch": 0.021899562008759824, "grad_norm": 1.303348183631897, "learning_rate": 5.099800399201597e-06, "loss": 0.5543, "step": 365 }, { "epoch": 0.021959560808783824, "grad_norm": 1.347428798675537, "learning_rate": 5.11377245508982e-06, "loss": 0.6159, "step": 366 }, { "epoch": 0.022019559608807825, "grad_norm": 1.5329909324645996, "learning_rate": 5.127744510978044e-06, "loss": 0.5967, "step": 367 }, { "epoch": 0.02207955840883182, "grad_norm": 1.2962491512298584, "learning_rate": 5.141716566866267e-06, "loss": 0.5464, "step": 368 }, { "epoch": 0.022139557208855822, "grad_norm": 1.380326271057129, "learning_rate": 5.155688622754491e-06, "loss": 0.576, "step": 369 }, { "epoch": 0.022199556008879823, "grad_norm": 1.3474278450012207, "learning_rate": 5.169660678642714e-06, "loss": 0.5919, "step": 370 }, { "epoch": 0.022259554808903823, "grad_norm": 1.503061056137085, "learning_rate": 5.183632734530938e-06, "loss": 0.5428, "step": 371 }, { "epoch": 0.02231955360892782, "grad_norm": 1.2903887033462524, "learning_rate": 5.197604790419162e-06, "loss": 0.5706, "step": 372 }, { "epoch": 0.02237955240895182, "grad_norm": 1.2997634410858154, "learning_rate": 5.211576846307385e-06, "loss": 0.5385, "step": 373 }, { "epoch": 0.02243955120897582, "grad_norm": 1.2579540014266968, "learning_rate": 5.225548902195609e-06, "loss": 0.5727, "step": 374 }, { "epoch": 0.02249955000899982, "grad_norm": 1.4388694763183594, "learning_rate": 5.239520958083832e-06, "loss": 0.5675, "step": 375 }, { "epoch": 0.02255954880902382, "grad_norm": 1.3284945487976074, "learning_rate": 5.253493013972056e-06, "loss": 0.5228, "step": 376 }, { "epoch": 0.02261954760904782, "grad_norm": 1.4117447137832642, "learning_rate": 5.267465069860279e-06, "loss": 0.4945, "step": 377 }, { "epoch": 0.02267954640907182, "grad_norm": 1.3722922801971436, "learning_rate": 5.2814371257485025e-06, "loss": 0.5357, "step": 378 }, { "epoch": 0.022739545209095817, "grad_norm": 1.1581660509109497, "learning_rate": 5.295409181636727e-06, "loss": 0.4905, "step": 379 }, { "epoch": 0.022799544009119817, "grad_norm": 1.417017936706543, "learning_rate": 5.3093812375249495e-06, "loss": 0.5863, "step": 380 }, { "epoch": 0.022859542809143818, "grad_norm": 2.679180860519409, "learning_rate": 5.323353293413174e-06, "loss": 0.5456, "step": 381 }, { "epoch": 0.022919541609167815, "grad_norm": 1.4995249509811401, "learning_rate": 5.337325349301397e-06, "loss": 0.5789, "step": 382 }, { "epoch": 0.022979540409191816, "grad_norm": 1.2755473852157593, "learning_rate": 5.351297405189621e-06, "loss": 0.5366, "step": 383 }, { "epoch": 0.023039539209215816, "grad_norm": 1.395322561264038, "learning_rate": 5.365269461077844e-06, "loss": 0.5329, "step": 384 }, { "epoch": 0.023099538009239817, "grad_norm": 1.332680106163025, "learning_rate": 5.379241516966068e-06, "loss": 0.572, "step": 385 }, { "epoch": 0.023159536809263814, "grad_norm": 1.464486002922058, "learning_rate": 5.393213572854292e-06, "loss": 0.5261, "step": 386 }, { "epoch": 0.023219535609287814, "grad_norm": 1.3546417951583862, "learning_rate": 5.407185628742515e-06, "loss": 0.512, "step": 387 }, { "epoch": 0.023279534409311815, "grad_norm": 1.431408405303955, "learning_rate": 5.421157684630739e-06, "loss": 0.6078, "step": 388 }, { "epoch": 0.02333953320933581, "grad_norm": 1.3991687297821045, "learning_rate": 5.435129740518962e-06, "loss": 0.5656, "step": 389 }, { "epoch": 0.023399532009359812, "grad_norm": 1.395599365234375, "learning_rate": 5.449101796407186e-06, "loss": 0.5358, "step": 390 }, { "epoch": 0.023459530809383813, "grad_norm": 1.3732776641845703, "learning_rate": 5.4630738522954085e-06, "loss": 0.4905, "step": 391 }, { "epoch": 0.023519529609407813, "grad_norm": 1.3630635738372803, "learning_rate": 5.477045908183633e-06, "loss": 0.5272, "step": 392 }, { "epoch": 0.02357952840943181, "grad_norm": 1.204935073852539, "learning_rate": 5.491017964071856e-06, "loss": 0.5459, "step": 393 }, { "epoch": 0.02363952720945581, "grad_norm": 1.3321785926818848, "learning_rate": 5.50499001996008e-06, "loss": 0.5832, "step": 394 }, { "epoch": 0.02369952600947981, "grad_norm": 1.28567373752594, "learning_rate": 5.518962075848303e-06, "loss": 0.5176, "step": 395 }, { "epoch": 0.02375952480950381, "grad_norm": 1.3592463731765747, "learning_rate": 5.532934131736527e-06, "loss": 0.5146, "step": 396 }, { "epoch": 0.02381952360952781, "grad_norm": 1.2929233312606812, "learning_rate": 5.54690618762475e-06, "loss": 0.5036, "step": 397 }, { "epoch": 0.02387952240955181, "grad_norm": 1.4294980764389038, "learning_rate": 5.560878243512974e-06, "loss": 0.5717, "step": 398 }, { "epoch": 0.02393952120957581, "grad_norm": 1.3124151229858398, "learning_rate": 5.574850299401197e-06, "loss": 0.5802, "step": 399 }, { "epoch": 0.023999520009599807, "grad_norm": 1.4800527095794678, "learning_rate": 5.5888223552894215e-06, "loss": 0.5055, "step": 400 }, { "epoch": 0.024059518809623807, "grad_norm": 1.3692280054092407, "learning_rate": 5.602794411177645e-06, "loss": 0.4703, "step": 401 }, { "epoch": 0.024119517609647808, "grad_norm": 1.5311262607574463, "learning_rate": 5.616766467065868e-06, "loss": 0.5732, "step": 402 }, { "epoch": 0.024179516409671805, "grad_norm": 1.3704875707626343, "learning_rate": 5.630738522954092e-06, "loss": 0.5229, "step": 403 }, { "epoch": 0.024239515209695806, "grad_norm": 1.3156622648239136, "learning_rate": 5.644710578842315e-06, "loss": 0.537, "step": 404 }, { "epoch": 0.024299514009719806, "grad_norm": 1.2395838499069214, "learning_rate": 5.658682634730539e-06, "loss": 0.5669, "step": 405 }, { "epoch": 0.024359512809743807, "grad_norm": 1.3817795515060425, "learning_rate": 5.672654690618762e-06, "loss": 0.5512, "step": 406 }, { "epoch": 0.024419511609767804, "grad_norm": 1.4151265621185303, "learning_rate": 5.686626746506986e-06, "loss": 0.5011, "step": 407 }, { "epoch": 0.024479510409791804, "grad_norm": 1.5964850187301636, "learning_rate": 5.700598802395209e-06, "loss": 0.5487, "step": 408 }, { "epoch": 0.024539509209815805, "grad_norm": 1.3732197284698486, "learning_rate": 5.714570858283434e-06, "loss": 0.5437, "step": 409 }, { "epoch": 0.0245995080098398, "grad_norm": 1.2757121324539185, "learning_rate": 5.728542914171656e-06, "loss": 0.5533, "step": 410 }, { "epoch": 0.024659506809863802, "grad_norm": 1.1656192541122437, "learning_rate": 5.7425149700598805e-06, "loss": 0.527, "step": 411 }, { "epoch": 0.024719505609887803, "grad_norm": 1.5346012115478516, "learning_rate": 5.756487025948103e-06, "loss": 0.5723, "step": 412 }, { "epoch": 0.024779504409911803, "grad_norm": 1.2927073240280151, "learning_rate": 5.7704590818363275e-06, "loss": 0.5754, "step": 413 }, { "epoch": 0.0248395032099358, "grad_norm": 1.316091775894165, "learning_rate": 5.784431137724551e-06, "loss": 0.5404, "step": 414 }, { "epoch": 0.0248995020099598, "grad_norm": 1.3097758293151855, "learning_rate": 5.798403193612774e-06, "loss": 0.5588, "step": 415 }, { "epoch": 0.0249595008099838, "grad_norm": 1.4136468172073364, "learning_rate": 5.812375249500998e-06, "loss": 0.5688, "step": 416 }, { "epoch": 0.0250194996100078, "grad_norm": 1.4428296089172363, "learning_rate": 5.826347305389221e-06, "loss": 0.5287, "step": 417 }, { "epoch": 0.0250794984100318, "grad_norm": 1.4804878234863281, "learning_rate": 5.840319361277445e-06, "loss": 0.5533, "step": 418 }, { "epoch": 0.0251394972100558, "grad_norm": 1.3418043851852417, "learning_rate": 5.854291417165668e-06, "loss": 0.5386, "step": 419 }, { "epoch": 0.0251994960100798, "grad_norm": 1.338141679763794, "learning_rate": 5.868263473053892e-06, "loss": 0.5276, "step": 420 }, { "epoch": 0.025259494810103797, "grad_norm": 1.7315783500671387, "learning_rate": 5.882235528942115e-06, "loss": 0.4896, "step": 421 }, { "epoch": 0.025319493610127797, "grad_norm": 1.376572608947754, "learning_rate": 5.89620758483034e-06, "loss": 0.5858, "step": 422 }, { "epoch": 0.025379492410151798, "grad_norm": 1.3931546211242676, "learning_rate": 5.910179640718563e-06, "loss": 0.5289, "step": 423 }, { "epoch": 0.025439491210175795, "grad_norm": 1.4221231937408447, "learning_rate": 5.9241516966067865e-06, "loss": 0.5726, "step": 424 }, { "epoch": 0.025499490010199796, "grad_norm": 1.348026156425476, "learning_rate": 5.93812375249501e-06, "loss": 0.4851, "step": 425 }, { "epoch": 0.025559488810223796, "grad_norm": 1.1808773279190063, "learning_rate": 5.9520958083832335e-06, "loss": 0.5357, "step": 426 }, { "epoch": 0.025619487610247797, "grad_norm": 1.3280466794967651, "learning_rate": 5.966067864271457e-06, "loss": 0.5083, "step": 427 }, { "epoch": 0.025679486410271794, "grad_norm": 1.3595224618911743, "learning_rate": 5.98003992015968e-06, "loss": 0.5714, "step": 428 }, { "epoch": 0.025739485210295794, "grad_norm": 1.3502541780471802, "learning_rate": 5.994011976047904e-06, "loss": 0.5817, "step": 429 }, { "epoch": 0.025799484010319795, "grad_norm": 1.4128973484039307, "learning_rate": 6.007984031936128e-06, "loss": 0.5575, "step": 430 }, { "epoch": 0.02585948281034379, "grad_norm": 1.43216872215271, "learning_rate": 6.021956087824351e-06, "loss": 0.4937, "step": 431 }, { "epoch": 0.025919481610367792, "grad_norm": 1.3422867059707642, "learning_rate": 6.035928143712575e-06, "loss": 0.5448, "step": 432 }, { "epoch": 0.025979480410391793, "grad_norm": 1.21773099899292, "learning_rate": 6.049900199600798e-06, "loss": 0.5206, "step": 433 }, { "epoch": 0.026039479210415793, "grad_norm": 1.215291142463684, "learning_rate": 6.063872255489022e-06, "loss": 0.5594, "step": 434 }, { "epoch": 0.02609947801043979, "grad_norm": 1.4873298406600952, "learning_rate": 6.077844311377245e-06, "loss": 0.591, "step": 435 }, { "epoch": 0.02615947681046379, "grad_norm": 1.3332710266113281, "learning_rate": 6.091816367265469e-06, "loss": 0.5697, "step": 436 }, { "epoch": 0.02621947561048779, "grad_norm": 1.2635142803192139, "learning_rate": 6.1057884231536925e-06, "loss": 0.5693, "step": 437 }, { "epoch": 0.02627947441051179, "grad_norm": 1.3265089988708496, "learning_rate": 6.119760479041916e-06, "loss": 0.5531, "step": 438 }, { "epoch": 0.02633947321053579, "grad_norm": 1.4830745458602905, "learning_rate": 6.1337325349301395e-06, "loss": 0.5701, "step": 439 }, { "epoch": 0.02639947201055979, "grad_norm": 1.3796252012252808, "learning_rate": 6.147704590818363e-06, "loss": 0.5354, "step": 440 }, { "epoch": 0.02645947081058379, "grad_norm": 1.3610963821411133, "learning_rate": 6.161676646706587e-06, "loss": 0.5485, "step": 441 }, { "epoch": 0.026519469610607787, "grad_norm": 1.271680474281311, "learning_rate": 6.17564870259481e-06, "loss": 0.5244, "step": 442 }, { "epoch": 0.026579468410631787, "grad_norm": 1.2831600904464722, "learning_rate": 6.189620758483034e-06, "loss": 0.5881, "step": 443 }, { "epoch": 0.026639467210655788, "grad_norm": 1.476460576057434, "learning_rate": 6.203592814371258e-06, "loss": 0.5819, "step": 444 }, { "epoch": 0.026699466010679785, "grad_norm": 1.4198575019836426, "learning_rate": 6.217564870259481e-06, "loss": 0.4344, "step": 445 }, { "epoch": 0.026759464810703786, "grad_norm": 1.3113822937011719, "learning_rate": 6.231536926147705e-06, "loss": 0.581, "step": 446 }, { "epoch": 0.026819463610727786, "grad_norm": 1.327959656715393, "learning_rate": 6.245508982035928e-06, "loss": 0.5691, "step": 447 }, { "epoch": 0.026879462410751787, "grad_norm": 1.604324221611023, "learning_rate": 6.259481037924152e-06, "loss": 0.6324, "step": 448 }, { "epoch": 0.026939461210775784, "grad_norm": 1.466412901878357, "learning_rate": 6.273453093812375e-06, "loss": 0.6034, "step": 449 }, { "epoch": 0.026999460010799784, "grad_norm": 1.3002980947494507, "learning_rate": 6.2874251497005985e-06, "loss": 0.4659, "step": 450 }, { "epoch": 0.027059458810823785, "grad_norm": 1.5699197053909302, "learning_rate": 6.301397205588823e-06, "loss": 0.5306, "step": 451 }, { "epoch": 0.02711945761084778, "grad_norm": 1.2288851737976074, "learning_rate": 6.3153692614770455e-06, "loss": 0.4961, "step": 452 }, { "epoch": 0.027179456410871782, "grad_norm": 1.4804662466049194, "learning_rate": 6.32934131736527e-06, "loss": 0.5405, "step": 453 }, { "epoch": 0.027239455210895783, "grad_norm": 1.4161081314086914, "learning_rate": 6.343313373253492e-06, "loss": 0.5822, "step": 454 }, { "epoch": 0.027299454010919783, "grad_norm": 1.4064979553222656, "learning_rate": 6.357285429141717e-06, "loss": 0.534, "step": 455 }, { "epoch": 0.02735945281094378, "grad_norm": 1.3755273818969727, "learning_rate": 6.371257485029939e-06, "loss": 0.5291, "step": 456 }, { "epoch": 0.02741945161096778, "grad_norm": 1.385231614112854, "learning_rate": 6.385229540918164e-06, "loss": 0.5732, "step": 457 }, { "epoch": 0.02747945041099178, "grad_norm": 1.4563345909118652, "learning_rate": 6.399201596806387e-06, "loss": 0.5874, "step": 458 }, { "epoch": 0.02753944921101578, "grad_norm": 1.28314208984375, "learning_rate": 6.413173652694611e-06, "loss": 0.5111, "step": 459 }, { "epoch": 0.02759944801103978, "grad_norm": 1.2848016023635864, "learning_rate": 6.427145708582834e-06, "loss": 0.5178, "step": 460 }, { "epoch": 0.02765944681106378, "grad_norm": 1.2730519771575928, "learning_rate": 6.441117764471058e-06, "loss": 0.545, "step": 461 }, { "epoch": 0.02771944561108778, "grad_norm": 1.3099678754806519, "learning_rate": 6.455089820359282e-06, "loss": 0.5134, "step": 462 }, { "epoch": 0.027779444411111777, "grad_norm": 1.4520412683486938, "learning_rate": 6.4690618762475045e-06, "loss": 0.5298, "step": 463 }, { "epoch": 0.027839443211135777, "grad_norm": 1.435386300086975, "learning_rate": 6.483033932135729e-06, "loss": 0.5096, "step": 464 }, { "epoch": 0.027899442011159778, "grad_norm": 1.4585649967193604, "learning_rate": 6.497005988023952e-06, "loss": 0.574, "step": 465 }, { "epoch": 0.027959440811183775, "grad_norm": 1.356292486190796, "learning_rate": 6.510978043912176e-06, "loss": 0.5372, "step": 466 }, { "epoch": 0.028019439611207776, "grad_norm": 1.3656244277954102, "learning_rate": 6.524950099800399e-06, "loss": 0.483, "step": 467 }, { "epoch": 0.028079438411231776, "grad_norm": 1.5167064666748047, "learning_rate": 6.538922155688623e-06, "loss": 0.5568, "step": 468 }, { "epoch": 0.028139437211255777, "grad_norm": 1.526760458946228, "learning_rate": 6.552894211576846e-06, "loss": 0.5929, "step": 469 }, { "epoch": 0.028199436011279774, "grad_norm": 1.3087354898452759, "learning_rate": 6.56686626746507e-06, "loss": 0.5547, "step": 470 }, { "epoch": 0.028259434811303774, "grad_norm": 1.480409026145935, "learning_rate": 6.580838323353293e-06, "loss": 0.519, "step": 471 }, { "epoch": 0.028319433611327775, "grad_norm": 1.3204160928726196, "learning_rate": 6.594810379241517e-06, "loss": 0.529, "step": 472 }, { "epoch": 0.02837943241135177, "grad_norm": 1.3826119899749756, "learning_rate": 6.60878243512974e-06, "loss": 0.5452, "step": 473 }, { "epoch": 0.028439431211375772, "grad_norm": 1.3143632411956787, "learning_rate": 6.6227544910179644e-06, "loss": 0.4982, "step": 474 }, { "epoch": 0.028499430011399773, "grad_norm": 1.330705165863037, "learning_rate": 6.636726546906187e-06, "loss": 0.5659, "step": 475 }, { "epoch": 0.028559428811423773, "grad_norm": 1.4741685390472412, "learning_rate": 6.650698602794411e-06, "loss": 0.5684, "step": 476 }, { "epoch": 0.02861942761144777, "grad_norm": 1.222646713256836, "learning_rate": 6.664670658682634e-06, "loss": 0.5209, "step": 477 }, { "epoch": 0.02867942641147177, "grad_norm": 1.2617549896240234, "learning_rate": 6.678642714570858e-06, "loss": 0.5909, "step": 478 }, { "epoch": 0.02873942521149577, "grad_norm": 1.2416387796401978, "learning_rate": 6.692614770459082e-06, "loss": 0.4525, "step": 479 }, { "epoch": 0.02879942401151977, "grad_norm": 1.4645830392837524, "learning_rate": 6.706586826347305e-06, "loss": 0.5533, "step": 480 }, { "epoch": 0.02885942281154377, "grad_norm": 1.4160593748092651, "learning_rate": 6.72055888223553e-06, "loss": 0.5122, "step": 481 }, { "epoch": 0.02891942161156777, "grad_norm": 1.2880598306655884, "learning_rate": 6.734530938123752e-06, "loss": 0.5427, "step": 482 }, { "epoch": 0.02897942041159177, "grad_norm": 1.2619454860687256, "learning_rate": 6.7485029940119765e-06, "loss": 0.5378, "step": 483 }, { "epoch": 0.029039419211615767, "grad_norm": 1.3686878681182861, "learning_rate": 6.762475049900199e-06, "loss": 0.5354, "step": 484 }, { "epoch": 0.029099418011639767, "grad_norm": 1.3959450721740723, "learning_rate": 6.7764471057884235e-06, "loss": 0.5111, "step": 485 }, { "epoch": 0.029159416811663768, "grad_norm": 1.2900882959365845, "learning_rate": 6.790419161676646e-06, "loss": 0.5096, "step": 486 }, { "epoch": 0.029219415611687765, "grad_norm": 1.3389127254486084, "learning_rate": 6.80439121756487e-06, "loss": 0.5196, "step": 487 }, { "epoch": 0.029279414411711766, "grad_norm": 1.3318572044372559, "learning_rate": 6.818363273453094e-06, "loss": 0.624, "step": 488 }, { "epoch": 0.029339413211735766, "grad_norm": 1.2576119899749756, "learning_rate": 6.832335329341317e-06, "loss": 0.5798, "step": 489 }, { "epoch": 0.029399412011759767, "grad_norm": 1.2764872312545776, "learning_rate": 6.846307385229541e-06, "loss": 0.5484, "step": 490 }, { "epoch": 0.029459410811783764, "grad_norm": 1.4727654457092285, "learning_rate": 6.860279441117764e-06, "loss": 0.6691, "step": 491 }, { "epoch": 0.029519409611807764, "grad_norm": 1.2081209421157837, "learning_rate": 6.874251497005988e-06, "loss": 0.5232, "step": 492 }, { "epoch": 0.029579408411831765, "grad_norm": 1.323933482170105, "learning_rate": 6.888223552894211e-06, "loss": 0.4824, "step": 493 }, { "epoch": 0.02963940721185576, "grad_norm": 1.371349811553955, "learning_rate": 6.902195608782435e-06, "loss": 0.5502, "step": 494 }, { "epoch": 0.029699406011879762, "grad_norm": 1.5051307678222656, "learning_rate": 6.916167664670659e-06, "loss": 0.522, "step": 495 }, { "epoch": 0.029759404811903763, "grad_norm": 1.4457638263702393, "learning_rate": 6.930139720558882e-06, "loss": 0.5908, "step": 496 }, { "epoch": 0.02981940361192776, "grad_norm": 1.2833924293518066, "learning_rate": 6.944111776447106e-06, "loss": 0.5915, "step": 497 }, { "epoch": 0.02987940241195176, "grad_norm": 1.3697950839996338, "learning_rate": 6.958083832335329e-06, "loss": 0.5495, "step": 498 }, { "epoch": 0.02993940121197576, "grad_norm": 1.3013577461242676, "learning_rate": 6.972055888223553e-06, "loss": 0.4926, "step": 499 }, { "epoch": 0.02999940001199976, "grad_norm": 1.4489400386810303, "learning_rate": 6.9860279441117756e-06, "loss": 0.5207, "step": 500 }, { "epoch": 0.03005939881202376, "grad_norm": 1.3100686073303223, "learning_rate": 7e-06, "loss": 0.4989, "step": 501 }, { "epoch": 0.03011939761204776, "grad_norm": 1.2235376834869385, "learning_rate": 6.999999933910473e-06, "loss": 0.5058, "step": 502 }, { "epoch": 0.03017939641207176, "grad_norm": 1.1886225938796997, "learning_rate": 6.999999735641895e-06, "loss": 0.4777, "step": 503 }, { "epoch": 0.030239395212095756, "grad_norm": 1.2328940629959106, "learning_rate": 6.999999405194271e-06, "loss": 0.5022, "step": 504 }, { "epoch": 0.030299394012119757, "grad_norm": 1.5182559490203857, "learning_rate": 6.999998942567618e-06, "loss": 0.5513, "step": 505 }, { "epoch": 0.030359392812143757, "grad_norm": 1.365206003189087, "learning_rate": 6.999998347761949e-06, "loss": 0.5504, "step": 506 }, { "epoch": 0.030419391612167758, "grad_norm": 1.2584086656570435, "learning_rate": 6.999997620777289e-06, "loss": 0.5073, "step": 507 }, { "epoch": 0.030479390412191755, "grad_norm": 1.5513336658477783, "learning_rate": 6.9999967616136655e-06, "loss": 0.5901, "step": 508 }, { "epoch": 0.030539389212215756, "grad_norm": 1.3015873432159424, "learning_rate": 6.9999957702711086e-06, "loss": 0.5158, "step": 509 }, { "epoch": 0.030599388012239756, "grad_norm": 1.41486394405365, "learning_rate": 6.99999464674966e-06, "loss": 0.5026, "step": 510 }, { "epoch": 0.030659386812263753, "grad_norm": 1.2551966905593872, "learning_rate": 6.999993391049357e-06, "loss": 0.5609, "step": 511 }, { "epoch": 0.030719385612287754, "grad_norm": 1.2526534795761108, "learning_rate": 6.999992003170251e-06, "loss": 0.5271, "step": 512 }, { "epoch": 0.030779384412311754, "grad_norm": 1.6100177764892578, "learning_rate": 6.999990483112392e-06, "loss": 0.6562, "step": 513 }, { "epoch": 0.030839383212335755, "grad_norm": 1.439218521118164, "learning_rate": 6.999988830875838e-06, "loss": 0.5034, "step": 514 }, { "epoch": 0.03089938201235975, "grad_norm": 1.5849571228027344, "learning_rate": 6.9999870464606535e-06, "loss": 0.5284, "step": 515 }, { "epoch": 0.030959380812383752, "grad_norm": 1.3446741104125977, "learning_rate": 6.999985129866903e-06, "loss": 0.5678, "step": 516 }, { "epoch": 0.031019379612407753, "grad_norm": 1.3330668210983276, "learning_rate": 6.99998308109466e-06, "loss": 0.5553, "step": 517 }, { "epoch": 0.03107937841243175, "grad_norm": 1.3332020044326782, "learning_rate": 6.999980900144002e-06, "loss": 0.5078, "step": 518 }, { "epoch": 0.03113937721245575, "grad_norm": 1.3455966711044312, "learning_rate": 6.999978587015012e-06, "loss": 0.6187, "step": 519 }, { "epoch": 0.03119937601247975, "grad_norm": 1.2101399898529053, "learning_rate": 6.999976141707776e-06, "loss": 0.4826, "step": 520 }, { "epoch": 0.03125937481250375, "grad_norm": 1.317331075668335, "learning_rate": 6.999973564222388e-06, "loss": 0.5213, "step": 521 }, { "epoch": 0.03131937361252775, "grad_norm": 1.358681559562683, "learning_rate": 6.999970854558942e-06, "loss": 0.5227, "step": 522 }, { "epoch": 0.03137937241255175, "grad_norm": 1.319206714630127, "learning_rate": 6.999968012717546e-06, "loss": 0.5451, "step": 523 }, { "epoch": 0.03143937121257575, "grad_norm": 1.2241791486740112, "learning_rate": 6.999965038698301e-06, "loss": 0.4588, "step": 524 }, { "epoch": 0.031499370012599746, "grad_norm": 1.4955490827560425, "learning_rate": 6.9999619325013235e-06, "loss": 0.5183, "step": 525 }, { "epoch": 0.03155936881262375, "grad_norm": 1.3012906312942505, "learning_rate": 6.999958694126729e-06, "loss": 0.5375, "step": 526 }, { "epoch": 0.03161936761264775, "grad_norm": 1.3435192108154297, "learning_rate": 6.999955323574641e-06, "loss": 0.5408, "step": 527 }, { "epoch": 0.031679366412671744, "grad_norm": 1.2921597957611084, "learning_rate": 6.9999518208451865e-06, "loss": 0.5164, "step": 528 }, { "epoch": 0.03173936521269575, "grad_norm": 1.2238010168075562, "learning_rate": 6.999948185938497e-06, "loss": 0.5181, "step": 529 }, { "epoch": 0.031799364012719745, "grad_norm": 1.2654507160186768, "learning_rate": 6.99994441885471e-06, "loss": 0.5423, "step": 530 }, { "epoch": 0.03185936281274374, "grad_norm": 1.5498539209365845, "learning_rate": 6.999940519593969e-06, "loss": 0.5211, "step": 531 }, { "epoch": 0.031919361612767747, "grad_norm": 1.2426462173461914, "learning_rate": 6.999936488156419e-06, "loss": 0.5323, "step": 532 }, { "epoch": 0.031979360412791744, "grad_norm": 1.308100938796997, "learning_rate": 6.999932324542214e-06, "loss": 0.5324, "step": 533 }, { "epoch": 0.03203935921281574, "grad_norm": 1.4815964698791504, "learning_rate": 6.999928028751511e-06, "loss": 0.5424, "step": 534 }, { "epoch": 0.032099358012839745, "grad_norm": 1.439534068107605, "learning_rate": 6.999923600784471e-06, "loss": 0.5626, "step": 535 }, { "epoch": 0.03215935681286374, "grad_norm": 1.244315505027771, "learning_rate": 6.999919040641263e-06, "loss": 0.5053, "step": 536 }, { "epoch": 0.032219355612887746, "grad_norm": 1.5092804431915283, "learning_rate": 6.9999143483220585e-06, "loss": 0.5648, "step": 537 }, { "epoch": 0.03227935441291174, "grad_norm": 1.3518118858337402, "learning_rate": 6.999909523827035e-06, "loss": 0.5819, "step": 538 }, { "epoch": 0.03233935321293574, "grad_norm": 1.432836651802063, "learning_rate": 6.999904567156373e-06, "loss": 0.571, "step": 539 }, { "epoch": 0.032399352012959744, "grad_norm": 1.2972978353500366, "learning_rate": 6.999899478310262e-06, "loss": 0.5469, "step": 540 }, { "epoch": 0.03245935081298374, "grad_norm": 1.287229061126709, "learning_rate": 6.999894257288894e-06, "loss": 0.5788, "step": 541 }, { "epoch": 0.03251934961300774, "grad_norm": 1.4336271286010742, "learning_rate": 6.999888904092464e-06, "loss": 0.5098, "step": 542 }, { "epoch": 0.03257934841303174, "grad_norm": 1.303281307220459, "learning_rate": 6.999883418721175e-06, "loss": 0.5619, "step": 543 }, { "epoch": 0.03263934721305574, "grad_norm": 1.3279154300689697, "learning_rate": 6.999877801175236e-06, "loss": 0.5201, "step": 544 }, { "epoch": 0.032699346013079736, "grad_norm": 1.2826552391052246, "learning_rate": 6.9998720514548584e-06, "loss": 0.5173, "step": 545 }, { "epoch": 0.03275934481310374, "grad_norm": 1.4995832443237305, "learning_rate": 6.999866169560258e-06, "loss": 0.5295, "step": 546 }, { "epoch": 0.03281934361312774, "grad_norm": 1.3608797788619995, "learning_rate": 6.999860155491657e-06, "loss": 0.4787, "step": 547 }, { "epoch": 0.032879342413151734, "grad_norm": 1.2484714984893799, "learning_rate": 6.999854009249284e-06, "loss": 0.4877, "step": 548 }, { "epoch": 0.03293934121317574, "grad_norm": 1.2819926738739014, "learning_rate": 6.999847730833371e-06, "loss": 0.4787, "step": 549 }, { "epoch": 0.032999340013199735, "grad_norm": 1.4429630041122437, "learning_rate": 6.999841320244154e-06, "loss": 0.5667, "step": 550 }, { "epoch": 0.03305933881322374, "grad_norm": 1.4703683853149414, "learning_rate": 6.999834777481875e-06, "loss": 0.5898, "step": 551 }, { "epoch": 0.033119337613247736, "grad_norm": 1.4229215383529663, "learning_rate": 6.999828102546782e-06, "loss": 0.5397, "step": 552 }, { "epoch": 0.03317933641327173, "grad_norm": 1.2938849925994873, "learning_rate": 6.9998212954391274e-06, "loss": 0.5283, "step": 553 }, { "epoch": 0.03323933521329574, "grad_norm": 1.5335382223129272, "learning_rate": 6.999814356159167e-06, "loss": 0.4945, "step": 554 }, { "epoch": 0.033299334013319734, "grad_norm": 1.250177025794983, "learning_rate": 6.9998072847071624e-06, "loss": 0.5291, "step": 555 }, { "epoch": 0.03335933281334373, "grad_norm": 1.2454102039337158, "learning_rate": 6.999800081083383e-06, "loss": 0.496, "step": 556 }, { "epoch": 0.033419331613367735, "grad_norm": 1.4484354257583618, "learning_rate": 6.999792745288099e-06, "loss": 0.5392, "step": 557 }, { "epoch": 0.03347933041339173, "grad_norm": 1.352350115776062, "learning_rate": 6.999785277321589e-06, "loss": 0.4791, "step": 558 }, { "epoch": 0.03353932921341573, "grad_norm": 1.31168794631958, "learning_rate": 6.999777677184134e-06, "loss": 0.5917, "step": 559 }, { "epoch": 0.03359932801343973, "grad_norm": 1.3011642694473267, "learning_rate": 6.999769944876021e-06, "loss": 0.4867, "step": 560 }, { "epoch": 0.03365932681346373, "grad_norm": 1.3663713932037354, "learning_rate": 6.999762080397541e-06, "loss": 0.5294, "step": 561 }, { "epoch": 0.03371932561348773, "grad_norm": 1.2955385446548462, "learning_rate": 6.999754083748992e-06, "loss": 0.5513, "step": 562 }, { "epoch": 0.03377932441351173, "grad_norm": 1.3263733386993408, "learning_rate": 6.999745954930677e-06, "loss": 0.5138, "step": 563 }, { "epoch": 0.03383932321353573, "grad_norm": 1.9568041563034058, "learning_rate": 6.999737693942902e-06, "loss": 0.555, "step": 564 }, { "epoch": 0.033899322013559725, "grad_norm": 1.3127338886260986, "learning_rate": 6.99972930078598e-06, "loss": 0.5453, "step": 565 }, { "epoch": 0.03395932081358373, "grad_norm": 1.3049442768096924, "learning_rate": 6.999720775460226e-06, "loss": 0.5101, "step": 566 }, { "epoch": 0.034019319613607726, "grad_norm": 1.238508939743042, "learning_rate": 6.999712117965962e-06, "loss": 0.4642, "step": 567 }, { "epoch": 0.03407931841363173, "grad_norm": 1.4236454963684082, "learning_rate": 6.999703328303518e-06, "loss": 0.5444, "step": 568 }, { "epoch": 0.03413931721365573, "grad_norm": 1.3783758878707886, "learning_rate": 6.999694406473222e-06, "loss": 0.5579, "step": 569 }, { "epoch": 0.034199316013679724, "grad_norm": 1.469786524772644, "learning_rate": 6.999685352475414e-06, "loss": 0.6323, "step": 570 }, { "epoch": 0.03425931481370373, "grad_norm": 1.347352385520935, "learning_rate": 6.9996761663104336e-06, "loss": 0.5254, "step": 571 }, { "epoch": 0.034319313613727725, "grad_norm": 1.3332070112228394, "learning_rate": 6.999666847978631e-06, "loss": 0.5116, "step": 572 }, { "epoch": 0.03437931241375172, "grad_norm": 1.35901939868927, "learning_rate": 6.9996573974803545e-06, "loss": 0.5761, "step": 573 }, { "epoch": 0.034439311213775727, "grad_norm": 1.2420443296432495, "learning_rate": 6.999647814815963e-06, "loss": 0.532, "step": 574 }, { "epoch": 0.034499310013799724, "grad_norm": 1.3664308786392212, "learning_rate": 6.999638099985817e-06, "loss": 0.5748, "step": 575 }, { "epoch": 0.03455930881382372, "grad_norm": 1.251624584197998, "learning_rate": 6.999628252990285e-06, "loss": 0.5518, "step": 576 }, { "epoch": 0.034619307613847725, "grad_norm": 1.2624403238296509, "learning_rate": 6.999618273829739e-06, "loss": 0.5543, "step": 577 }, { "epoch": 0.03467930641387172, "grad_norm": 1.3157174587249756, "learning_rate": 6.9996081625045534e-06, "loss": 0.4607, "step": 578 }, { "epoch": 0.03473930521389572, "grad_norm": 1.3560078144073486, "learning_rate": 6.999597919015112e-06, "loss": 0.5245, "step": 579 }, { "epoch": 0.03479930401391972, "grad_norm": 1.442065954208374, "learning_rate": 6.999587543361802e-06, "loss": 0.5392, "step": 580 }, { "epoch": 0.03485930281394372, "grad_norm": 1.2962541580200195, "learning_rate": 6.999577035545015e-06, "loss": 0.5738, "step": 581 }, { "epoch": 0.034919301613967724, "grad_norm": 1.4613116979599, "learning_rate": 6.9995663955651455e-06, "loss": 0.4959, "step": 582 }, { "epoch": 0.03497930041399172, "grad_norm": 1.3565943241119385, "learning_rate": 6.9995556234225995e-06, "loss": 0.5875, "step": 583 }, { "epoch": 0.03503929921401572, "grad_norm": 1.2788574695587158, "learning_rate": 6.9995447191177805e-06, "loss": 0.5504, "step": 584 }, { "epoch": 0.03509929801403972, "grad_norm": 1.2603182792663574, "learning_rate": 6.999533682651101e-06, "loss": 0.5624, "step": 585 }, { "epoch": 0.03515929681406372, "grad_norm": 1.3058297634124756, "learning_rate": 6.999522514022979e-06, "loss": 0.5152, "step": 586 }, { "epoch": 0.035219295614087716, "grad_norm": 1.3667396306991577, "learning_rate": 6.999511213233835e-06, "loss": 0.6028, "step": 587 }, { "epoch": 0.03527929441411172, "grad_norm": 1.2605785131454468, "learning_rate": 6.999499780284095e-06, "loss": 0.5876, "step": 588 }, { "epoch": 0.03533929321413572, "grad_norm": 1.3677319288253784, "learning_rate": 6.999488215174194e-06, "loss": 0.5148, "step": 589 }, { "epoch": 0.035399292014159714, "grad_norm": 1.1308717727661133, "learning_rate": 6.999476517904565e-06, "loss": 0.5415, "step": 590 }, { "epoch": 0.03545929081418372, "grad_norm": 1.3839055299758911, "learning_rate": 6.9994646884756525e-06, "loss": 0.5274, "step": 591 }, { "epoch": 0.035519289614207715, "grad_norm": 1.3578506708145142, "learning_rate": 6.999452726887903e-06, "loss": 0.5578, "step": 592 }, { "epoch": 0.03557928841423171, "grad_norm": 1.4691654443740845, "learning_rate": 6.999440633141766e-06, "loss": 0.5144, "step": 593 }, { "epoch": 0.035639287214255716, "grad_norm": 1.3564848899841309, "learning_rate": 6.999428407237699e-06, "loss": 0.5746, "step": 594 }, { "epoch": 0.03569928601427971, "grad_norm": 1.4937633275985718, "learning_rate": 6.999416049176166e-06, "loss": 0.5464, "step": 595 }, { "epoch": 0.03575928481430372, "grad_norm": 1.2426583766937256, "learning_rate": 6.999403558957632e-06, "loss": 0.5109, "step": 596 }, { "epoch": 0.035819283614327714, "grad_norm": 1.2007067203521729, "learning_rate": 6.999390936582569e-06, "loss": 0.5329, "step": 597 }, { "epoch": 0.03587928241435171, "grad_norm": 1.4599841833114624, "learning_rate": 6.999378182051453e-06, "loss": 0.5805, "step": 598 }, { "epoch": 0.035939281214375715, "grad_norm": 1.2941123247146606, "learning_rate": 6.9993652953647665e-06, "loss": 0.5811, "step": 599 }, { "epoch": 0.03599928001439971, "grad_norm": 1.3429677486419678, "learning_rate": 6.999352276522995e-06, "loss": 0.5915, "step": 600 }, { "epoch": 0.03605927881442371, "grad_norm": 1.37906014919281, "learning_rate": 6.999339125526632e-06, "loss": 0.566, "step": 601 }, { "epoch": 0.03611927761444771, "grad_norm": 1.456714391708374, "learning_rate": 6.999325842376172e-06, "loss": 0.4722, "step": 602 }, { "epoch": 0.03617927641447171, "grad_norm": 1.3735039234161377, "learning_rate": 6.999312427072119e-06, "loss": 0.4917, "step": 603 }, { "epoch": 0.03623927521449571, "grad_norm": 1.1422139406204224, "learning_rate": 6.999298879614979e-06, "loss": 0.454, "step": 604 }, { "epoch": 0.03629927401451971, "grad_norm": 1.3340764045715332, "learning_rate": 6.9992852000052615e-06, "loss": 0.5968, "step": 605 }, { "epoch": 0.03635927281454371, "grad_norm": 1.3873076438903809, "learning_rate": 6.9992713882434856e-06, "loss": 0.5206, "step": 606 }, { "epoch": 0.036419271614567705, "grad_norm": 1.4108704328536987, "learning_rate": 6.999257444330171e-06, "loss": 0.5528, "step": 607 }, { "epoch": 0.03647927041459171, "grad_norm": 1.2669475078582764, "learning_rate": 6.999243368265847e-06, "loss": 0.548, "step": 608 }, { "epoch": 0.036539269214615706, "grad_norm": 1.498673439025879, "learning_rate": 6.999229160051042e-06, "loss": 0.5871, "step": 609 }, { "epoch": 0.03659926801463971, "grad_norm": 1.370618224143982, "learning_rate": 6.999214819686295e-06, "loss": 0.5715, "step": 610 }, { "epoch": 0.03665926681466371, "grad_norm": 1.2899749279022217, "learning_rate": 6.999200347172146e-06, "loss": 0.524, "step": 611 }, { "epoch": 0.036719265614687704, "grad_norm": 1.3147510290145874, "learning_rate": 6.999185742509143e-06, "loss": 0.5027, "step": 612 }, { "epoch": 0.03677926441471171, "grad_norm": 1.2292128801345825, "learning_rate": 6.999171005697836e-06, "loss": 0.4967, "step": 613 }, { "epoch": 0.036839263214735705, "grad_norm": 1.205132007598877, "learning_rate": 6.999156136738783e-06, "loss": 0.4657, "step": 614 }, { "epoch": 0.0368992620147597, "grad_norm": 1.3556499481201172, "learning_rate": 6.999141135632545e-06, "loss": 0.4797, "step": 615 }, { "epoch": 0.036959260814783707, "grad_norm": 1.2104159593582153, "learning_rate": 6.999126002379687e-06, "loss": 0.4928, "step": 616 }, { "epoch": 0.037019259614807704, "grad_norm": 1.3225085735321045, "learning_rate": 6.9991107369807824e-06, "loss": 0.5207, "step": 617 }, { "epoch": 0.0370792584148317, "grad_norm": 1.3248034715652466, "learning_rate": 6.999095339436407e-06, "loss": 0.5779, "step": 618 }, { "epoch": 0.037139257214855705, "grad_norm": 1.3512637615203857, "learning_rate": 6.9990798097471444e-06, "loss": 0.5284, "step": 619 }, { "epoch": 0.0371992560148797, "grad_norm": 1.3302295207977295, "learning_rate": 6.999064147913577e-06, "loss": 0.4752, "step": 620 }, { "epoch": 0.0372592548149037, "grad_norm": 1.4087058305740356, "learning_rate": 6.999048353936299e-06, "loss": 0.5194, "step": 621 }, { "epoch": 0.0373192536149277, "grad_norm": 1.3349730968475342, "learning_rate": 6.9990324278159056e-06, "loss": 0.5212, "step": 622 }, { "epoch": 0.0373792524149517, "grad_norm": 1.3049330711364746, "learning_rate": 6.9990163695530005e-06, "loss": 0.5157, "step": 623 }, { "epoch": 0.037439251214975704, "grad_norm": 1.2581250667572021, "learning_rate": 6.999000179148187e-06, "loss": 0.5583, "step": 624 }, { "epoch": 0.0374992500149997, "grad_norm": 1.2041324377059937, "learning_rate": 6.99898385660208e-06, "loss": 0.4782, "step": 625 }, { "epoch": 0.0375592488150237, "grad_norm": 1.4047043323516846, "learning_rate": 6.998967401915293e-06, "loss": 0.5688, "step": 626 }, { "epoch": 0.0376192476150477, "grad_norm": 1.4908888339996338, "learning_rate": 6.998950815088448e-06, "loss": 0.5079, "step": 627 }, { "epoch": 0.0376792464150717, "grad_norm": 1.3511461019515991, "learning_rate": 6.998934096122173e-06, "loss": 0.5554, "step": 628 }, { "epoch": 0.037739245215095696, "grad_norm": 1.2153483629226685, "learning_rate": 6.998917245017097e-06, "loss": 0.5222, "step": 629 }, { "epoch": 0.0377992440151197, "grad_norm": 1.334070086479187, "learning_rate": 6.998900261773859e-06, "loss": 0.6123, "step": 630 }, { "epoch": 0.0378592428151437, "grad_norm": 1.2521353960037231, "learning_rate": 6.998883146393098e-06, "loss": 0.4682, "step": 631 }, { "epoch": 0.037919241615167694, "grad_norm": 1.3269730806350708, "learning_rate": 6.998865898875462e-06, "loss": 0.5478, "step": 632 }, { "epoch": 0.0379792404151917, "grad_norm": 1.5577975511550903, "learning_rate": 6.998848519221602e-06, "loss": 0.5415, "step": 633 }, { "epoch": 0.038039239215215695, "grad_norm": 1.272480845451355, "learning_rate": 6.998831007432173e-06, "loss": 0.5391, "step": 634 }, { "epoch": 0.03809923801523969, "grad_norm": 1.400222897529602, "learning_rate": 6.998813363507839e-06, "loss": 0.5336, "step": 635 }, { "epoch": 0.038159236815263696, "grad_norm": 1.2625300884246826, "learning_rate": 6.998795587449264e-06, "loss": 0.5597, "step": 636 }, { "epoch": 0.03821923561528769, "grad_norm": 1.327709436416626, "learning_rate": 6.99877767925712e-06, "loss": 0.5454, "step": 637 }, { "epoch": 0.0382792344153117, "grad_norm": 1.4783390760421753, "learning_rate": 6.998759638932084e-06, "loss": 0.4924, "step": 638 }, { "epoch": 0.038339233215335694, "grad_norm": 1.3608602285385132, "learning_rate": 6.998741466474836e-06, "loss": 0.5008, "step": 639 }, { "epoch": 0.03839923201535969, "grad_norm": 1.3914543390274048, "learning_rate": 6.9987231618860634e-06, "loss": 0.596, "step": 640 }, { "epoch": 0.038459230815383695, "grad_norm": 1.2898826599121094, "learning_rate": 6.998704725166458e-06, "loss": 0.4826, "step": 641 }, { "epoch": 0.03851922961540769, "grad_norm": 1.2154895067214966, "learning_rate": 6.998686156316714e-06, "loss": 0.5177, "step": 642 }, { "epoch": 0.03857922841543169, "grad_norm": 1.346131443977356, "learning_rate": 6.998667455337534e-06, "loss": 0.5737, "step": 643 }, { "epoch": 0.03863922721545569, "grad_norm": 1.279171109199524, "learning_rate": 6.9986486222296245e-06, "loss": 0.5354, "step": 644 }, { "epoch": 0.03869922601547969, "grad_norm": 1.2156002521514893, "learning_rate": 6.998629656993697e-06, "loss": 0.4638, "step": 645 }, { "epoch": 0.03875922481550369, "grad_norm": 1.3993005752563477, "learning_rate": 6.998610559630466e-06, "loss": 0.5619, "step": 646 }, { "epoch": 0.03881922361552769, "grad_norm": 1.189034104347229, "learning_rate": 6.998591330140653e-06, "loss": 0.4942, "step": 647 }, { "epoch": 0.03887922241555169, "grad_norm": 1.376818060874939, "learning_rate": 6.998571968524987e-06, "loss": 0.5155, "step": 648 }, { "epoch": 0.038939221215575685, "grad_norm": 1.2523258924484253, "learning_rate": 6.998552474784196e-06, "loss": 0.4992, "step": 649 }, { "epoch": 0.03899922001559969, "grad_norm": 1.3732378482818604, "learning_rate": 6.998532848919018e-06, "loss": 0.5081, "step": 650 }, { "epoch": 0.039059218815623686, "grad_norm": 1.3332825899124146, "learning_rate": 6.998513090930194e-06, "loss": 0.5569, "step": 651 }, { "epoch": 0.03911921761564769, "grad_norm": 1.3240107297897339, "learning_rate": 6.9984932008184675e-06, "loss": 0.4342, "step": 652 }, { "epoch": 0.03917921641567169, "grad_norm": 1.193057656288147, "learning_rate": 6.998473178584593e-06, "loss": 0.5006, "step": 653 }, { "epoch": 0.039239215215695684, "grad_norm": 1.2352242469787598, "learning_rate": 6.998453024229326e-06, "loss": 0.4557, "step": 654 }, { "epoch": 0.03929921401571969, "grad_norm": 1.167138695716858, "learning_rate": 6.998432737753428e-06, "loss": 0.5178, "step": 655 }, { "epoch": 0.039359212815743685, "grad_norm": 1.4442358016967773, "learning_rate": 6.998412319157663e-06, "loss": 0.5222, "step": 656 }, { "epoch": 0.03941921161576768, "grad_norm": 1.5091968774795532, "learning_rate": 6.998391768442804e-06, "loss": 0.5404, "step": 657 }, { "epoch": 0.039479210415791686, "grad_norm": 1.3152202367782593, "learning_rate": 6.998371085609626e-06, "loss": 0.5306, "step": 658 }, { "epoch": 0.039539209215815684, "grad_norm": 1.3763744831085205, "learning_rate": 6.998350270658911e-06, "loss": 0.5051, "step": 659 }, { "epoch": 0.03959920801583968, "grad_norm": 1.3971436023712158, "learning_rate": 6.998329323591444e-06, "loss": 0.5319, "step": 660 }, { "epoch": 0.039659206815863685, "grad_norm": 1.3222448825836182, "learning_rate": 6.9983082444080185e-06, "loss": 0.5086, "step": 661 }, { "epoch": 0.03971920561588768, "grad_norm": 1.3185101747512817, "learning_rate": 6.998287033109428e-06, "loss": 0.5724, "step": 662 }, { "epoch": 0.03977920441591168, "grad_norm": 1.424615502357483, "learning_rate": 6.9982656896964745e-06, "loss": 0.5276, "step": 663 }, { "epoch": 0.03983920321593568, "grad_norm": 1.3389155864715576, "learning_rate": 6.998244214169964e-06, "loss": 0.5207, "step": 664 }, { "epoch": 0.03989920201595968, "grad_norm": 1.3123505115509033, "learning_rate": 6.998222606530707e-06, "loss": 0.538, "step": 665 }, { "epoch": 0.039959200815983684, "grad_norm": 1.3251723051071167, "learning_rate": 6.998200866779521e-06, "loss": 0.5161, "step": 666 }, { "epoch": 0.04001919961600768, "grad_norm": 1.3357402086257935, "learning_rate": 6.998178994917224e-06, "loss": 0.5135, "step": 667 }, { "epoch": 0.04007919841603168, "grad_norm": 1.487449288368225, "learning_rate": 6.998156990944646e-06, "loss": 0.5368, "step": 668 }, { "epoch": 0.04013919721605568, "grad_norm": 1.3311059474945068, "learning_rate": 6.998134854862615e-06, "loss": 0.5424, "step": 669 }, { "epoch": 0.04019919601607968, "grad_norm": 1.3158137798309326, "learning_rate": 6.998112586671969e-06, "loss": 0.5701, "step": 670 }, { "epoch": 0.040259194816103676, "grad_norm": 1.3794207572937012, "learning_rate": 6.998090186373547e-06, "loss": 0.51, "step": 671 }, { "epoch": 0.04031919361612768, "grad_norm": 1.3265833854675293, "learning_rate": 6.998067653968195e-06, "loss": 0.5823, "step": 672 }, { "epoch": 0.04037919241615168, "grad_norm": 1.389890193939209, "learning_rate": 6.998044989456768e-06, "loss": 0.5243, "step": 673 }, { "epoch": 0.040439191216175674, "grad_norm": 1.3857120275497437, "learning_rate": 6.998022192840116e-06, "loss": 0.5333, "step": 674 }, { "epoch": 0.04049919001619968, "grad_norm": 1.3366442918777466, "learning_rate": 6.997999264119104e-06, "loss": 0.5463, "step": 675 }, { "epoch": 0.040559188816223675, "grad_norm": 1.306968331336975, "learning_rate": 6.997976203294597e-06, "loss": 0.4557, "step": 676 }, { "epoch": 0.04061918761624767, "grad_norm": 1.4104442596435547, "learning_rate": 6.997953010367464e-06, "loss": 0.5269, "step": 677 }, { "epoch": 0.040679186416271676, "grad_norm": 1.2832245826721191, "learning_rate": 6.997929685338584e-06, "loss": 0.4852, "step": 678 }, { "epoch": 0.04073918521629567, "grad_norm": 1.3738113641738892, "learning_rate": 6.997906228208835e-06, "loss": 0.4927, "step": 679 }, { "epoch": 0.04079918401631968, "grad_norm": 1.4492037296295166, "learning_rate": 6.997882638979104e-06, "loss": 0.5879, "step": 680 }, { "epoch": 0.040859182816343674, "grad_norm": 1.3259153366088867, "learning_rate": 6.997858917650284e-06, "loss": 0.524, "step": 681 }, { "epoch": 0.04091918161636767, "grad_norm": 1.2639960050582886, "learning_rate": 6.997835064223267e-06, "loss": 0.564, "step": 682 }, { "epoch": 0.040979180416391675, "grad_norm": 1.3978016376495361, "learning_rate": 6.997811078698957e-06, "loss": 0.5345, "step": 683 }, { "epoch": 0.04103917921641567, "grad_norm": 1.2763028144836426, "learning_rate": 6.997786961078257e-06, "loss": 0.5557, "step": 684 }, { "epoch": 0.04109917801643967, "grad_norm": 1.2711018323898315, "learning_rate": 6.99776271136208e-06, "loss": 0.5323, "step": 685 }, { "epoch": 0.04115917681646367, "grad_norm": 1.3554152250289917, "learning_rate": 6.997738329551342e-06, "loss": 0.5173, "step": 686 }, { "epoch": 0.04121917561648767, "grad_norm": 1.308406949043274, "learning_rate": 6.997713815646963e-06, "loss": 0.5, "step": 687 }, { "epoch": 0.04127917441651167, "grad_norm": 1.3156546354293823, "learning_rate": 6.997689169649867e-06, "loss": 0.5558, "step": 688 }, { "epoch": 0.04133917321653567, "grad_norm": 1.2197850942611694, "learning_rate": 6.997664391560989e-06, "loss": 0.4832, "step": 689 }, { "epoch": 0.04139917201655967, "grad_norm": 1.2977200746536255, "learning_rate": 6.997639481381261e-06, "loss": 0.4986, "step": 690 }, { "epoch": 0.041459170816583665, "grad_norm": 1.3441215753555298, "learning_rate": 6.997614439111625e-06, "loss": 0.4607, "step": 691 }, { "epoch": 0.04151916961660767, "grad_norm": 1.2608047723770142, "learning_rate": 6.9975892647530256e-06, "loss": 0.5398, "step": 692 }, { "epoch": 0.041579168416631666, "grad_norm": 1.3051687479019165, "learning_rate": 6.9975639583064155e-06, "loss": 0.5128, "step": 693 }, { "epoch": 0.04163916721665567, "grad_norm": 1.4074631929397583, "learning_rate": 6.99753851977275e-06, "loss": 0.569, "step": 694 }, { "epoch": 0.04169916601667967, "grad_norm": 1.3627737760543823, "learning_rate": 6.997512949152989e-06, "loss": 0.5323, "step": 695 }, { "epoch": 0.041759164816703664, "grad_norm": 1.2544273138046265, "learning_rate": 6.997487246448098e-06, "loss": 0.4719, "step": 696 }, { "epoch": 0.04181916361672767, "grad_norm": 1.2817349433898926, "learning_rate": 6.9974614116590495e-06, "loss": 0.5096, "step": 697 }, { "epoch": 0.041879162416751665, "grad_norm": 1.2367947101593018, "learning_rate": 6.997435444786818e-06, "loss": 0.535, "step": 698 }, { "epoch": 0.04193916121677566, "grad_norm": 1.2477823495864868, "learning_rate": 6.997409345832382e-06, "loss": 0.494, "step": 699 }, { "epoch": 0.041999160016799666, "grad_norm": 1.4026823043823242, "learning_rate": 6.997383114796731e-06, "loss": 0.519, "step": 700 }, { "epoch": 0.042059158816823664, "grad_norm": 1.3076211214065552, "learning_rate": 6.997356751680853e-06, "loss": 0.5501, "step": 701 }, { "epoch": 0.04211915761684766, "grad_norm": 1.217833399772644, "learning_rate": 6.997330256485744e-06, "loss": 0.4736, "step": 702 }, { "epoch": 0.042179156416871665, "grad_norm": 1.313565731048584, "learning_rate": 6.9973036292124065e-06, "loss": 0.5126, "step": 703 }, { "epoch": 0.04223915521689566, "grad_norm": 1.312721848487854, "learning_rate": 6.997276869861843e-06, "loss": 0.5253, "step": 704 }, { "epoch": 0.04229915401691966, "grad_norm": 1.2127457857131958, "learning_rate": 6.997249978435067e-06, "loss": 0.4802, "step": 705 }, { "epoch": 0.04235915281694366, "grad_norm": 1.4592642784118652, "learning_rate": 6.997222954933092e-06, "loss": 0.5416, "step": 706 }, { "epoch": 0.04241915161696766, "grad_norm": 1.2856518030166626, "learning_rate": 6.997195799356939e-06, "loss": 0.4915, "step": 707 }, { "epoch": 0.04247915041699166, "grad_norm": 1.1969194412231445, "learning_rate": 6.997168511707635e-06, "loss": 0.5585, "step": 708 }, { "epoch": 0.04253914921701566, "grad_norm": 1.3218095302581787, "learning_rate": 6.997141091986208e-06, "loss": 0.5548, "step": 709 }, { "epoch": 0.04259914801703966, "grad_norm": 1.2492772340774536, "learning_rate": 6.997113540193696e-06, "loss": 0.4765, "step": 710 }, { "epoch": 0.04265914681706366, "grad_norm": 1.3942843675613403, "learning_rate": 6.997085856331138e-06, "loss": 0.5968, "step": 711 }, { "epoch": 0.04271914561708766, "grad_norm": 1.4488935470581055, "learning_rate": 6.99705804039958e-06, "loss": 0.5406, "step": 712 }, { "epoch": 0.042779144417111656, "grad_norm": 1.3665968179702759, "learning_rate": 6.997030092400072e-06, "loss": 0.5522, "step": 713 }, { "epoch": 0.04283914321713566, "grad_norm": 1.311470627784729, "learning_rate": 6.99700201233367e-06, "loss": 0.5584, "step": 714 }, { "epoch": 0.04289914201715966, "grad_norm": 1.3768731355667114, "learning_rate": 6.996973800201434e-06, "loss": 0.5091, "step": 715 }, { "epoch": 0.042959140817183654, "grad_norm": 1.566356897354126, "learning_rate": 6.99694545600443e-06, "loss": 0.5669, "step": 716 }, { "epoch": 0.04301913961720766, "grad_norm": 1.456204891204834, "learning_rate": 6.9969169797437285e-06, "loss": 0.5629, "step": 717 }, { "epoch": 0.043079138417231655, "grad_norm": 1.2716645002365112, "learning_rate": 6.996888371420405e-06, "loss": 0.542, "step": 718 }, { "epoch": 0.04313913721725565, "grad_norm": 1.3092966079711914, "learning_rate": 6.996859631035539e-06, "loss": 0.4749, "step": 719 }, { "epoch": 0.043199136017279656, "grad_norm": 1.2977348566055298, "learning_rate": 6.996830758590216e-06, "loss": 0.5799, "step": 720 }, { "epoch": 0.04325913481730365, "grad_norm": 1.4752153158187866, "learning_rate": 6.9968017540855265e-06, "loss": 0.5561, "step": 721 }, { "epoch": 0.04331913361732765, "grad_norm": 1.3538241386413574, "learning_rate": 6.9967726175225666e-06, "loss": 0.4865, "step": 722 }, { "epoch": 0.043379132417351654, "grad_norm": 1.3715746402740479, "learning_rate": 6.996743348902436e-06, "loss": 0.5033, "step": 723 }, { "epoch": 0.04343913121737565, "grad_norm": 1.4615790843963623, "learning_rate": 6.9967139482262404e-06, "loss": 0.5651, "step": 724 }, { "epoch": 0.043499130017399655, "grad_norm": 1.3488208055496216, "learning_rate": 6.996684415495091e-06, "loss": 0.5805, "step": 725 }, { "epoch": 0.04355912881742365, "grad_norm": 1.253905177116394, "learning_rate": 6.9966547507101e-06, "loss": 0.4698, "step": 726 }, { "epoch": 0.04361912761744765, "grad_norm": 1.3142186403274536, "learning_rate": 6.996624953872391e-06, "loss": 0.5343, "step": 727 }, { "epoch": 0.04367912641747165, "grad_norm": 1.3330172300338745, "learning_rate": 6.996595024983088e-06, "loss": 0.486, "step": 728 }, { "epoch": 0.04373912521749565, "grad_norm": 1.5463203191757202, "learning_rate": 6.996564964043321e-06, "loss": 0.6099, "step": 729 }, { "epoch": 0.04379912401751965, "grad_norm": 1.2473597526550293, "learning_rate": 6.996534771054226e-06, "loss": 0.5, "step": 730 }, { "epoch": 0.04385912281754365, "grad_norm": 1.310286283493042, "learning_rate": 6.9965044460169424e-06, "loss": 0.5566, "step": 731 }, { "epoch": 0.04391912161756765, "grad_norm": 1.3469955921173096, "learning_rate": 6.996473988932617e-06, "loss": 0.5527, "step": 732 }, { "epoch": 0.043979120417591645, "grad_norm": 1.3549875020980835, "learning_rate": 6.996443399802398e-06, "loss": 0.5226, "step": 733 }, { "epoch": 0.04403911921761565, "grad_norm": 1.2235618829727173, "learning_rate": 6.996412678627442e-06, "loss": 0.4917, "step": 734 }, { "epoch": 0.044099118017639646, "grad_norm": 1.3639779090881348, "learning_rate": 6.996381825408908e-06, "loss": 0.5279, "step": 735 }, { "epoch": 0.04415911681766364, "grad_norm": 1.3935056924819946, "learning_rate": 6.996350840147963e-06, "loss": 0.5226, "step": 736 }, { "epoch": 0.04421911561768765, "grad_norm": 1.2197083234786987, "learning_rate": 6.996319722845775e-06, "loss": 0.5179, "step": 737 }, { "epoch": 0.044279114417711644, "grad_norm": 1.425742745399475, "learning_rate": 6.9962884735035215e-06, "loss": 0.5151, "step": 738 }, { "epoch": 0.04433911321773565, "grad_norm": 1.3423452377319336, "learning_rate": 6.99625709212238e-06, "loss": 0.54, "step": 739 }, { "epoch": 0.044399112017759645, "grad_norm": 1.1975425481796265, "learning_rate": 6.996225578703538e-06, "loss": 0.5742, "step": 740 }, { "epoch": 0.04445911081778364, "grad_norm": 1.305208444595337, "learning_rate": 6.996193933248185e-06, "loss": 0.5301, "step": 741 }, { "epoch": 0.044519109617807646, "grad_norm": 1.5444016456604004, "learning_rate": 6.996162155757515e-06, "loss": 0.5391, "step": 742 }, { "epoch": 0.044579108417831644, "grad_norm": 1.2119715213775635, "learning_rate": 6.996130246232729e-06, "loss": 0.504, "step": 743 }, { "epoch": 0.04463910721785564, "grad_norm": 1.2402998208999634, "learning_rate": 6.9960982046750315e-06, "loss": 0.5473, "step": 744 }, { "epoch": 0.044699106017879645, "grad_norm": 1.2896944284439087, "learning_rate": 6.996066031085634e-06, "loss": 0.5245, "step": 745 }, { "epoch": 0.04475910481790364, "grad_norm": 1.3029683828353882, "learning_rate": 6.99603372546575e-06, "loss": 0.5872, "step": 746 }, { "epoch": 0.04481910361792764, "grad_norm": 1.382485032081604, "learning_rate": 6.9960012878166005e-06, "loss": 0.5467, "step": 747 }, { "epoch": 0.04487910241795164, "grad_norm": 1.351680040359497, "learning_rate": 6.995968718139409e-06, "loss": 0.5068, "step": 748 }, { "epoch": 0.04493910121797564, "grad_norm": 1.4984773397445679, "learning_rate": 6.995936016435408e-06, "loss": 0.5485, "step": 749 }, { "epoch": 0.04499910001799964, "grad_norm": 1.3183543682098389, "learning_rate": 6.99590318270583e-06, "loss": 0.5332, "step": 750 }, { "epoch": 0.04505909881802364, "grad_norm": 1.230234146118164, "learning_rate": 6.9958702169519166e-06, "loss": 0.4807, "step": 751 }, { "epoch": 0.04511909761804764, "grad_norm": 1.2272886037826538, "learning_rate": 6.995837119174912e-06, "loss": 0.4738, "step": 752 }, { "epoch": 0.04517909641807164, "grad_norm": 1.3017442226409912, "learning_rate": 6.9958038893760666e-06, "loss": 0.5428, "step": 753 }, { "epoch": 0.04523909521809564, "grad_norm": 1.4009737968444824, "learning_rate": 6.995770527556635e-06, "loss": 0.516, "step": 754 }, { "epoch": 0.045299094018119636, "grad_norm": 1.2338224649429321, "learning_rate": 6.995737033717878e-06, "loss": 0.4919, "step": 755 }, { "epoch": 0.04535909281814364, "grad_norm": 1.212044358253479, "learning_rate": 6.995703407861058e-06, "loss": 0.4798, "step": 756 }, { "epoch": 0.04541909161816764, "grad_norm": 1.2938967943191528, "learning_rate": 6.995669649987448e-06, "loss": 0.5324, "step": 757 }, { "epoch": 0.045479090418191634, "grad_norm": 1.5130561590194702, "learning_rate": 6.995635760098322e-06, "loss": 0.6159, "step": 758 }, { "epoch": 0.04553908921821564, "grad_norm": 1.352429986000061, "learning_rate": 6.995601738194959e-06, "loss": 0.5473, "step": 759 }, { "epoch": 0.045599088018239635, "grad_norm": 1.3392469882965088, "learning_rate": 6.995567584278644e-06, "loss": 0.4938, "step": 760 }, { "epoch": 0.04565908681826363, "grad_norm": 1.399222731590271, "learning_rate": 6.995533298350666e-06, "loss": 0.5146, "step": 761 }, { "epoch": 0.045719085618287636, "grad_norm": 1.4636374711990356, "learning_rate": 6.9954988804123225e-06, "loss": 0.5571, "step": 762 }, { "epoch": 0.04577908441831163, "grad_norm": 1.2947982549667358, "learning_rate": 6.995464330464912e-06, "loss": 0.4464, "step": 763 }, { "epoch": 0.04583908321833563, "grad_norm": 1.2384401559829712, "learning_rate": 6.995429648509738e-06, "loss": 0.4745, "step": 764 }, { "epoch": 0.045899082018359634, "grad_norm": 1.3476409912109375, "learning_rate": 6.995394834548112e-06, "loss": 0.527, "step": 765 }, { "epoch": 0.04595908081838363, "grad_norm": 1.3202143907546997, "learning_rate": 6.995359888581348e-06, "loss": 0.4848, "step": 766 }, { "epoch": 0.046019079618407635, "grad_norm": 1.2412084341049194, "learning_rate": 6.9953248106107656e-06, "loss": 0.4895, "step": 767 }, { "epoch": 0.04607907841843163, "grad_norm": 1.3893996477127075, "learning_rate": 6.99528960063769e-06, "loss": 0.5328, "step": 768 }, { "epoch": 0.04613907721845563, "grad_norm": 1.4491032361984253, "learning_rate": 6.995254258663449e-06, "loss": 0.5358, "step": 769 }, { "epoch": 0.04619907601847963, "grad_norm": 1.4207390546798706, "learning_rate": 6.995218784689381e-06, "loss": 0.5041, "step": 770 }, { "epoch": 0.04625907481850363, "grad_norm": 1.310045599937439, "learning_rate": 6.995183178716823e-06, "loss": 0.5712, "step": 771 }, { "epoch": 0.04631907361852763, "grad_norm": 1.2247371673583984, "learning_rate": 6.995147440747121e-06, "loss": 0.5591, "step": 772 }, { "epoch": 0.04637907241855163, "grad_norm": 1.4057798385620117, "learning_rate": 6.995111570781623e-06, "loss": 0.524, "step": 773 }, { "epoch": 0.04643907121857563, "grad_norm": 1.269821047782898, "learning_rate": 6.995075568821685e-06, "loss": 0.5224, "step": 774 }, { "epoch": 0.046499070018599625, "grad_norm": 1.351027011871338, "learning_rate": 6.995039434868667e-06, "loss": 0.5144, "step": 775 }, { "epoch": 0.04655906881862363, "grad_norm": 1.3230100870132446, "learning_rate": 6.995003168923931e-06, "loss": 0.4939, "step": 776 }, { "epoch": 0.046619067618647626, "grad_norm": 1.2080178260803223, "learning_rate": 6.994966770988851e-06, "loss": 0.5311, "step": 777 }, { "epoch": 0.04667906641867162, "grad_norm": 1.2083410024642944, "learning_rate": 6.9949302410647985e-06, "loss": 0.461, "step": 778 }, { "epoch": 0.04673906521869563, "grad_norm": 1.207929253578186, "learning_rate": 6.994893579153152e-06, "loss": 0.4839, "step": 779 }, { "epoch": 0.046799064018719624, "grad_norm": 1.264149785041809, "learning_rate": 6.994856785255299e-06, "loss": 0.4972, "step": 780 }, { "epoch": 0.04685906281874363, "grad_norm": 1.3320231437683105, "learning_rate": 6.994819859372628e-06, "loss": 0.538, "step": 781 }, { "epoch": 0.046919061618767625, "grad_norm": 1.1152012348175049, "learning_rate": 6.994782801506533e-06, "loss": 0.449, "step": 782 }, { "epoch": 0.04697906041879162, "grad_norm": 1.3660714626312256, "learning_rate": 6.994745611658413e-06, "loss": 0.5082, "step": 783 }, { "epoch": 0.047039059218815626, "grad_norm": 1.2198632955551147, "learning_rate": 6.994708289829674e-06, "loss": 0.523, "step": 784 }, { "epoch": 0.047099058018839624, "grad_norm": 1.2635202407836914, "learning_rate": 6.9946708360217245e-06, "loss": 0.4931, "step": 785 }, { "epoch": 0.04715905681886362, "grad_norm": 1.2651549577713013, "learning_rate": 6.99463325023598e-06, "loss": 0.503, "step": 786 }, { "epoch": 0.047219055618887625, "grad_norm": 1.3960734605789185, "learning_rate": 6.9945955324738576e-06, "loss": 0.4783, "step": 787 }, { "epoch": 0.04727905441891162, "grad_norm": 1.3389012813568115, "learning_rate": 6.994557682736784e-06, "loss": 0.5096, "step": 788 }, { "epoch": 0.04733905321893562, "grad_norm": 1.4387904405593872, "learning_rate": 6.9945197010261885e-06, "loss": 0.5888, "step": 789 }, { "epoch": 0.04739905201895962, "grad_norm": 1.3553533554077148, "learning_rate": 6.994481587343504e-06, "loss": 0.5544, "step": 790 }, { "epoch": 0.04745905081898362, "grad_norm": 1.286298155784607, "learning_rate": 6.994443341690172e-06, "loss": 0.4962, "step": 791 }, { "epoch": 0.04751904961900762, "grad_norm": 1.2585546970367432, "learning_rate": 6.994404964067635e-06, "loss": 0.4445, "step": 792 }, { "epoch": 0.04757904841903162, "grad_norm": 1.5108516216278076, "learning_rate": 6.994366454477342e-06, "loss": 0.58, "step": 793 }, { "epoch": 0.04763904721905562, "grad_norm": 1.3567999601364136, "learning_rate": 6.994327812920749e-06, "loss": 0.5215, "step": 794 }, { "epoch": 0.04769904601907962, "grad_norm": 1.2255074977874756, "learning_rate": 6.994289039399315e-06, "loss": 0.5303, "step": 795 }, { "epoch": 0.04775904481910362, "grad_norm": 1.3909106254577637, "learning_rate": 6.994250133914503e-06, "loss": 0.5171, "step": 796 }, { "epoch": 0.047819043619127616, "grad_norm": 1.3081343173980713, "learning_rate": 6.9942110964677844e-06, "loss": 0.5826, "step": 797 }, { "epoch": 0.04787904241915162, "grad_norm": 1.2949200868606567, "learning_rate": 6.994171927060632e-06, "loss": 0.5543, "step": 798 }, { "epoch": 0.04793904121917562, "grad_norm": 1.244994878768921, "learning_rate": 6.994132625694525e-06, "loss": 0.5087, "step": 799 }, { "epoch": 0.047999040019199614, "grad_norm": 1.4921956062316895, "learning_rate": 6.9940931923709474e-06, "loss": 0.5538, "step": 800 }, { "epoch": 0.04805903881922362, "grad_norm": 1.2807940244674683, "learning_rate": 6.99405362709139e-06, "loss": 0.5168, "step": 801 }, { "epoch": 0.048119037619247615, "grad_norm": 1.4176850318908691, "learning_rate": 6.994013929857346e-06, "loss": 0.5442, "step": 802 }, { "epoch": 0.04817903641927161, "grad_norm": 1.323738932609558, "learning_rate": 6.993974100670314e-06, "loss": 0.5277, "step": 803 }, { "epoch": 0.048239035219295616, "grad_norm": 1.4242743253707886, "learning_rate": 6.993934139531798e-06, "loss": 0.51, "step": 804 }, { "epoch": 0.04829903401931961, "grad_norm": 1.400254249572754, "learning_rate": 6.993894046443309e-06, "loss": 0.5506, "step": 805 }, { "epoch": 0.04835903281934361, "grad_norm": 1.3679649829864502, "learning_rate": 6.99385382140636e-06, "loss": 0.5192, "step": 806 }, { "epoch": 0.048419031619367614, "grad_norm": 1.3343507051467896, "learning_rate": 6.99381346442247e-06, "loss": 0.5188, "step": 807 }, { "epoch": 0.04847903041939161, "grad_norm": 1.3980538845062256, "learning_rate": 6.9937729754931635e-06, "loss": 0.5415, "step": 808 }, { "epoch": 0.048539029219415615, "grad_norm": 1.360253930091858, "learning_rate": 6.993732354619968e-06, "loss": 0.5548, "step": 809 }, { "epoch": 0.04859902801943961, "grad_norm": 1.2246748208999634, "learning_rate": 6.99369160180442e-06, "loss": 0.5051, "step": 810 }, { "epoch": 0.04865902681946361, "grad_norm": 1.3259949684143066, "learning_rate": 6.993650717048057e-06, "loss": 0.521, "step": 811 }, { "epoch": 0.04871902561948761, "grad_norm": 1.4854531288146973, "learning_rate": 6.993609700352424e-06, "loss": 0.5328, "step": 812 }, { "epoch": 0.04877902441951161, "grad_norm": 1.2895179986953735, "learning_rate": 6.9935685517190685e-06, "loss": 0.5411, "step": 813 }, { "epoch": 0.04883902321953561, "grad_norm": 1.2922269105911255, "learning_rate": 6.993527271149547e-06, "loss": 0.4642, "step": 814 }, { "epoch": 0.04889902201955961, "grad_norm": 1.2334058284759521, "learning_rate": 6.993485858645416e-06, "loss": 0.6062, "step": 815 }, { "epoch": 0.04895902081958361, "grad_norm": 1.2012187242507935, "learning_rate": 6.993444314208239e-06, "loss": 0.4292, "step": 816 }, { "epoch": 0.049019019619607605, "grad_norm": 1.2289416790008545, "learning_rate": 6.993402637839588e-06, "loss": 0.5503, "step": 817 }, { "epoch": 0.04907901841963161, "grad_norm": 1.2441409826278687, "learning_rate": 6.993360829541035e-06, "loss": 0.5882, "step": 818 }, { "epoch": 0.049139017219655606, "grad_norm": 1.3840571641921997, "learning_rate": 6.993318889314159e-06, "loss": 0.5031, "step": 819 }, { "epoch": 0.0491990160196796, "grad_norm": 1.1793981790542603, "learning_rate": 6.993276817160545e-06, "loss": 0.4969, "step": 820 }, { "epoch": 0.04925901481970361, "grad_norm": 1.2240214347839355, "learning_rate": 6.9932346130817805e-06, "loss": 0.5093, "step": 821 }, { "epoch": 0.049319013619727604, "grad_norm": 1.492555856704712, "learning_rate": 6.9931922770794585e-06, "loss": 0.5779, "step": 822 }, { "epoch": 0.04937901241975161, "grad_norm": 1.599731683731079, "learning_rate": 6.99314980915518e-06, "loss": 0.5632, "step": 823 }, { "epoch": 0.049439011219775605, "grad_norm": 1.518170952796936, "learning_rate": 6.993107209310548e-06, "loss": 0.5674, "step": 824 }, { "epoch": 0.0494990100197996, "grad_norm": 1.3021414279937744, "learning_rate": 6.993064477547172e-06, "loss": 0.531, "step": 825 }, { "epoch": 0.049559008819823606, "grad_norm": 1.4016010761260986, "learning_rate": 6.993021613866665e-06, "loss": 0.4969, "step": 826 }, { "epoch": 0.049619007619847603, "grad_norm": 1.1445047855377197, "learning_rate": 6.992978618270645e-06, "loss": 0.4355, "step": 827 }, { "epoch": 0.0496790064198716, "grad_norm": 1.4451795816421509, "learning_rate": 6.992935490760737e-06, "loss": 0.5517, "step": 828 }, { "epoch": 0.049739005219895605, "grad_norm": 1.356535792350769, "learning_rate": 6.992892231338568e-06, "loss": 0.5283, "step": 829 }, { "epoch": 0.0497990040199196, "grad_norm": 1.259595274925232, "learning_rate": 6.9928488400057746e-06, "loss": 0.5694, "step": 830 }, { "epoch": 0.0498590028199436, "grad_norm": 1.3504951000213623, "learning_rate": 6.992805316763993e-06, "loss": 0.4448, "step": 831 }, { "epoch": 0.0499190016199676, "grad_norm": 1.3164551258087158, "learning_rate": 6.992761661614869e-06, "loss": 0.5289, "step": 832 }, { "epoch": 0.0499790004199916, "grad_norm": 1.3929243087768555, "learning_rate": 6.992717874560048e-06, "loss": 0.5536, "step": 833 }, { "epoch": 0.0500389992200156, "grad_norm": 1.2969763278961182, "learning_rate": 6.992673955601187e-06, "loss": 0.5422, "step": 834 }, { "epoch": 0.0500989980200396, "grad_norm": 1.2261779308319092, "learning_rate": 6.992629904739944e-06, "loss": 0.5401, "step": 835 }, { "epoch": 0.0501589968200636, "grad_norm": 1.2396657466888428, "learning_rate": 6.99258572197798e-06, "loss": 0.5344, "step": 836 }, { "epoch": 0.0502189956200876, "grad_norm": 1.6246984004974365, "learning_rate": 6.992541407316966e-06, "loss": 0.511, "step": 837 }, { "epoch": 0.0502789944201116, "grad_norm": 1.1697872877120972, "learning_rate": 6.992496960758576e-06, "loss": 0.4985, "step": 838 }, { "epoch": 0.050338993220135596, "grad_norm": 1.3560078144073486, "learning_rate": 6.992452382304488e-06, "loss": 0.5391, "step": 839 }, { "epoch": 0.0503989920201596, "grad_norm": 1.4213351011276245, "learning_rate": 6.992407671956383e-06, "loss": 0.4716, "step": 840 }, { "epoch": 0.0504589908201836, "grad_norm": 1.3918343782424927, "learning_rate": 6.992362829715954e-06, "loss": 0.5029, "step": 841 }, { "epoch": 0.050518989620207594, "grad_norm": 1.3475455045700073, "learning_rate": 6.99231785558489e-06, "loss": 0.5525, "step": 842 }, { "epoch": 0.0505789884202316, "grad_norm": 1.2333465814590454, "learning_rate": 6.992272749564893e-06, "loss": 0.5326, "step": 843 }, { "epoch": 0.050638987220255595, "grad_norm": 1.2935023307800293, "learning_rate": 6.992227511657664e-06, "loss": 0.53, "step": 844 }, { "epoch": 0.05069898602027959, "grad_norm": 1.2523059844970703, "learning_rate": 6.992182141864913e-06, "loss": 0.5065, "step": 845 }, { "epoch": 0.050758984820303596, "grad_norm": 1.1964410543441772, "learning_rate": 6.992136640188352e-06, "loss": 0.5569, "step": 846 }, { "epoch": 0.05081898362032759, "grad_norm": 1.2408137321472168, "learning_rate": 6.9920910066297005e-06, "loss": 0.5085, "step": 847 }, { "epoch": 0.05087898242035159, "grad_norm": 1.2482928037643433, "learning_rate": 6.992045241190682e-06, "loss": 0.4825, "step": 848 }, { "epoch": 0.050938981220375594, "grad_norm": 1.2949601411819458, "learning_rate": 6.991999343873025e-06, "loss": 0.4857, "step": 849 }, { "epoch": 0.05099898002039959, "grad_norm": 1.4532890319824219, "learning_rate": 6.991953314678462e-06, "loss": 0.5301, "step": 850 }, { "epoch": 0.05105897882042359, "grad_norm": 1.3897895812988281, "learning_rate": 6.991907153608731e-06, "loss": 0.4915, "step": 851 }, { "epoch": 0.05111897762044759, "grad_norm": 1.2915116548538208, "learning_rate": 6.991860860665576e-06, "loss": 0.5107, "step": 852 }, { "epoch": 0.05117897642047159, "grad_norm": 1.283645510673523, "learning_rate": 6.991814435850745e-06, "loss": 0.5361, "step": 853 }, { "epoch": 0.05123897522049559, "grad_norm": 1.2745981216430664, "learning_rate": 6.991767879165993e-06, "loss": 0.5049, "step": 854 }, { "epoch": 0.05129897402051959, "grad_norm": 1.1895300149917603, "learning_rate": 6.991721190613075e-06, "loss": 0.4697, "step": 855 }, { "epoch": 0.05135897282054359, "grad_norm": 1.2638505697250366, "learning_rate": 6.9916743701937566e-06, "loss": 0.5298, "step": 856 }, { "epoch": 0.05141897162056759, "grad_norm": 1.2978359460830688, "learning_rate": 6.991627417909805e-06, "loss": 0.5742, "step": 857 }, { "epoch": 0.05147897042059159, "grad_norm": 1.2300610542297363, "learning_rate": 6.9915803337629935e-06, "loss": 0.5586, "step": 858 }, { "epoch": 0.051538969220615585, "grad_norm": 1.255302906036377, "learning_rate": 6.991533117755101e-06, "loss": 0.4842, "step": 859 }, { "epoch": 0.05159896802063959, "grad_norm": 1.4611632823944092, "learning_rate": 6.991485769887909e-06, "loss": 0.502, "step": 860 }, { "epoch": 0.051658966820663586, "grad_norm": 1.3888967037200928, "learning_rate": 6.9914382901632074e-06, "loss": 0.5149, "step": 861 }, { "epoch": 0.05171896562068758, "grad_norm": 1.2516591548919678, "learning_rate": 6.991390678582788e-06, "loss": 0.5329, "step": 862 }, { "epoch": 0.05177896442071159, "grad_norm": 1.2655879259109497, "learning_rate": 6.99134293514845e-06, "loss": 0.467, "step": 863 }, { "epoch": 0.051838963220735584, "grad_norm": 1.2316104173660278, "learning_rate": 6.991295059861996e-06, "loss": 0.5199, "step": 864 }, { "epoch": 0.05189896202075958, "grad_norm": 1.3382257223129272, "learning_rate": 6.991247052725234e-06, "loss": 0.4736, "step": 865 }, { "epoch": 0.051958960820783585, "grad_norm": 1.3001338243484497, "learning_rate": 6.9911989137399754e-06, "loss": 0.5038, "step": 866 }, { "epoch": 0.05201895962080758, "grad_norm": 1.6623636484146118, "learning_rate": 6.991150642908042e-06, "loss": 0.5594, "step": 867 }, { "epoch": 0.052078958420831586, "grad_norm": 1.2675998210906982, "learning_rate": 6.991102240231252e-06, "loss": 0.5164, "step": 868 }, { "epoch": 0.052138957220855583, "grad_norm": 1.3367234468460083, "learning_rate": 6.991053705711437e-06, "loss": 0.485, "step": 869 }, { "epoch": 0.05219895602087958, "grad_norm": 1.2677749395370483, "learning_rate": 6.991005039350429e-06, "loss": 0.5073, "step": 870 }, { "epoch": 0.052258954820903585, "grad_norm": 1.3711626529693604, "learning_rate": 6.990956241150065e-06, "loss": 0.5362, "step": 871 }, { "epoch": 0.05231895362092758, "grad_norm": 1.5332353115081787, "learning_rate": 6.99090731111219e-06, "loss": 0.5909, "step": 872 }, { "epoch": 0.05237895242095158, "grad_norm": 1.2614067792892456, "learning_rate": 6.990858249238648e-06, "loss": 0.4931, "step": 873 }, { "epoch": 0.05243895122097558, "grad_norm": 1.4993412494659424, "learning_rate": 6.990809055531296e-06, "loss": 0.5006, "step": 874 }, { "epoch": 0.05249895002099958, "grad_norm": 1.202035665512085, "learning_rate": 6.99075972999199e-06, "loss": 0.4619, "step": 875 }, { "epoch": 0.05255894882102358, "grad_norm": 1.3000659942626953, "learning_rate": 6.990710272622592e-06, "loss": 0.5201, "step": 876 }, { "epoch": 0.05261894762104758, "grad_norm": 1.2197049856185913, "learning_rate": 6.990660683424971e-06, "loss": 0.4993, "step": 877 }, { "epoch": 0.05267894642107158, "grad_norm": 1.4056278467178345, "learning_rate": 6.990610962401e-06, "loss": 0.5016, "step": 878 }, { "epoch": 0.052738945221095575, "grad_norm": 1.4721492528915405, "learning_rate": 6.990561109552556e-06, "loss": 0.5847, "step": 879 }, { "epoch": 0.05279894402111958, "grad_norm": 1.285626769065857, "learning_rate": 6.990511124881521e-06, "loss": 0.5431, "step": 880 }, { "epoch": 0.052858942821143576, "grad_norm": 1.322486162185669, "learning_rate": 6.990461008389784e-06, "loss": 0.512, "step": 881 }, { "epoch": 0.05291894162116758, "grad_norm": 1.5003936290740967, "learning_rate": 6.9904107600792375e-06, "loss": 0.5803, "step": 882 }, { "epoch": 0.05297894042119158, "grad_norm": 1.4008221626281738, "learning_rate": 6.990360379951779e-06, "loss": 0.5059, "step": 883 }, { "epoch": 0.053038939221215574, "grad_norm": 1.3668962717056274, "learning_rate": 6.99030986800931e-06, "loss": 0.5283, "step": 884 }, { "epoch": 0.05309893802123958, "grad_norm": 1.314928650856018, "learning_rate": 6.990259224253739e-06, "loss": 0.5114, "step": 885 }, { "epoch": 0.053158936821263575, "grad_norm": 1.3716119527816772, "learning_rate": 6.99020844868698e-06, "loss": 0.5632, "step": 886 }, { "epoch": 0.05321893562128757, "grad_norm": 1.3847767114639282, "learning_rate": 6.990157541310948e-06, "loss": 0.4797, "step": 887 }, { "epoch": 0.053278934421311576, "grad_norm": 1.355457067489624, "learning_rate": 6.990106502127568e-06, "loss": 0.5215, "step": 888 }, { "epoch": 0.05333893322133557, "grad_norm": 1.2526450157165527, "learning_rate": 6.990055331138765e-06, "loss": 0.4821, "step": 889 }, { "epoch": 0.05339893202135957, "grad_norm": 1.344834566116333, "learning_rate": 6.990004028346474e-06, "loss": 0.564, "step": 890 }, { "epoch": 0.053458930821383574, "grad_norm": 1.2290171384811401, "learning_rate": 6.9899525937526306e-06, "loss": 0.5069, "step": 891 }, { "epoch": 0.05351892962140757, "grad_norm": 1.3521116971969604, "learning_rate": 6.989901027359178e-06, "loss": 0.5007, "step": 892 }, { "epoch": 0.05357892842143157, "grad_norm": 1.376163125038147, "learning_rate": 6.989849329168064e-06, "loss": 0.5528, "step": 893 }, { "epoch": 0.05363892722145557, "grad_norm": 1.3377867937088013, "learning_rate": 6.98979749918124e-06, "loss": 0.4727, "step": 894 }, { "epoch": 0.05369892602147957, "grad_norm": 1.3262579441070557, "learning_rate": 6.989745537400665e-06, "loss": 0.5919, "step": 895 }, { "epoch": 0.05375892482150357, "grad_norm": 1.2918978929519653, "learning_rate": 6.9896934438282985e-06, "loss": 0.5078, "step": 896 }, { "epoch": 0.05381892362152757, "grad_norm": 1.1883485317230225, "learning_rate": 6.9896412184661106e-06, "loss": 0.5291, "step": 897 }, { "epoch": 0.05387892242155157, "grad_norm": 1.3445186614990234, "learning_rate": 6.989588861316073e-06, "loss": 0.5428, "step": 898 }, { "epoch": 0.05393892122157557, "grad_norm": 1.3699411153793335, "learning_rate": 6.989536372380162e-06, "loss": 0.5757, "step": 899 }, { "epoch": 0.05399892002159957, "grad_norm": 1.1818398237228394, "learning_rate": 6.98948375166036e-06, "loss": 0.4874, "step": 900 }, { "epoch": 0.054058918821623565, "grad_norm": 1.3860747814178467, "learning_rate": 6.989430999158656e-06, "loss": 0.4962, "step": 901 }, { "epoch": 0.05411891762164757, "grad_norm": 1.4059020280838013, "learning_rate": 6.98937811487704e-06, "loss": 0.5234, "step": 902 }, { "epoch": 0.054178916421671566, "grad_norm": 1.2728922367095947, "learning_rate": 6.989325098817511e-06, "loss": 0.4858, "step": 903 }, { "epoch": 0.05423891522169556, "grad_norm": 1.2805123329162598, "learning_rate": 6.98927195098207e-06, "loss": 0.5137, "step": 904 }, { "epoch": 0.05429891402171957, "grad_norm": 1.2914457321166992, "learning_rate": 6.9892186713727246e-06, "loss": 0.5101, "step": 905 }, { "epoch": 0.054358912821743564, "grad_norm": 1.3603579998016357, "learning_rate": 6.989165259991486e-06, "loss": 0.51, "step": 906 }, { "epoch": 0.05441891162176756, "grad_norm": 1.21491277217865, "learning_rate": 6.989111716840373e-06, "loss": 0.5348, "step": 907 }, { "epoch": 0.054478910421791565, "grad_norm": 1.2007031440734863, "learning_rate": 6.989058041921407e-06, "loss": 0.4723, "step": 908 }, { "epoch": 0.05453890922181556, "grad_norm": 1.3583301305770874, "learning_rate": 6.989004235236614e-06, "loss": 0.5044, "step": 909 }, { "epoch": 0.054598908021839566, "grad_norm": 1.301643967628479, "learning_rate": 6.988950296788028e-06, "loss": 0.4684, "step": 910 }, { "epoch": 0.054658906821863563, "grad_norm": 1.361447811126709, "learning_rate": 6.988896226577684e-06, "loss": 0.5169, "step": 911 }, { "epoch": 0.05471890562188756, "grad_norm": 1.2316865921020508, "learning_rate": 6.988842024607625e-06, "loss": 0.4885, "step": 912 }, { "epoch": 0.054778904421911565, "grad_norm": 1.2708836793899536, "learning_rate": 6.988787690879898e-06, "loss": 0.5154, "step": 913 }, { "epoch": 0.05483890322193556, "grad_norm": 1.1807211637496948, "learning_rate": 6.988733225396555e-06, "loss": 0.5078, "step": 914 }, { "epoch": 0.05489890202195956, "grad_norm": 1.3050910234451294, "learning_rate": 6.9886786281596535e-06, "loss": 0.5153, "step": 915 }, { "epoch": 0.05495890082198356, "grad_norm": 1.2193655967712402, "learning_rate": 6.988623899171253e-06, "loss": 0.518, "step": 916 }, { "epoch": 0.05501889962200756, "grad_norm": 1.2265849113464355, "learning_rate": 6.9885690384334225e-06, "loss": 0.5438, "step": 917 }, { "epoch": 0.05507889842203156, "grad_norm": 1.425850749015808, "learning_rate": 6.988514045948233e-06, "loss": 0.5467, "step": 918 }, { "epoch": 0.05513889722205556, "grad_norm": 1.281959891319275, "learning_rate": 6.988458921717763e-06, "loss": 0.5068, "step": 919 }, { "epoch": 0.05519889602207956, "grad_norm": 1.532535433769226, "learning_rate": 6.988403665744091e-06, "loss": 0.5269, "step": 920 }, { "epoch": 0.055258894822103555, "grad_norm": 1.2597277164459229, "learning_rate": 6.988348278029307e-06, "loss": 0.5383, "step": 921 }, { "epoch": 0.05531889362212756, "grad_norm": 1.3425123691558838, "learning_rate": 6.988292758575501e-06, "loss": 0.5495, "step": 922 }, { "epoch": 0.055378892422151556, "grad_norm": 1.274692177772522, "learning_rate": 6.98823710738477e-06, "loss": 0.5075, "step": 923 }, { "epoch": 0.05543889122217556, "grad_norm": 1.228792667388916, "learning_rate": 6.988181324459216e-06, "loss": 0.4762, "step": 924 }, { "epoch": 0.05549889002219956, "grad_norm": 1.3413430452346802, "learning_rate": 6.988125409800945e-06, "loss": 0.5367, "step": 925 }, { "epoch": 0.055558888822223554, "grad_norm": 1.3134260177612305, "learning_rate": 6.98806936341207e-06, "loss": 0.4681, "step": 926 }, { "epoch": 0.05561888762224756, "grad_norm": 1.3727997541427612, "learning_rate": 6.988013185294707e-06, "loss": 0.517, "step": 927 }, { "epoch": 0.055678886422271555, "grad_norm": 1.279618501663208, "learning_rate": 6.987956875450976e-06, "loss": 0.5545, "step": 928 }, { "epoch": 0.05573888522229555, "grad_norm": 1.299426555633545, "learning_rate": 6.987900433883005e-06, "loss": 0.5255, "step": 929 }, { "epoch": 0.055798884022319556, "grad_norm": 1.2547582387924194, "learning_rate": 6.987843860592927e-06, "loss": 0.55, "step": 930 }, { "epoch": 0.05585888282234355, "grad_norm": 1.491783857345581, "learning_rate": 6.987787155582876e-06, "loss": 0.55, "step": 931 }, { "epoch": 0.05591888162236755, "grad_norm": 1.3539471626281738, "learning_rate": 6.987730318854994e-06, "loss": 0.499, "step": 932 }, { "epoch": 0.055978880422391554, "grad_norm": 1.380778193473816, "learning_rate": 6.987673350411428e-06, "loss": 0.5047, "step": 933 }, { "epoch": 0.05603887922241555, "grad_norm": 1.2560011148452759, "learning_rate": 6.98761625025433e-06, "loss": 0.4765, "step": 934 }, { "epoch": 0.05609887802243955, "grad_norm": 1.2306584119796753, "learning_rate": 6.987559018385855e-06, "loss": 0.5433, "step": 935 }, { "epoch": 0.05615887682246355, "grad_norm": 1.5604612827301025, "learning_rate": 6.987501654808167e-06, "loss": 0.4892, "step": 936 }, { "epoch": 0.05621887562248755, "grad_norm": 1.3049476146697998, "learning_rate": 6.987444159523428e-06, "loss": 0.536, "step": 937 }, { "epoch": 0.05627887442251155, "grad_norm": 1.2324564456939697, "learning_rate": 6.987386532533813e-06, "loss": 0.4956, "step": 938 }, { "epoch": 0.05633887322253555, "grad_norm": 1.32008695602417, "learning_rate": 6.987328773841497e-06, "loss": 0.5348, "step": 939 }, { "epoch": 0.05639887202255955, "grad_norm": 1.224793553352356, "learning_rate": 6.987270883448662e-06, "loss": 0.46, "step": 940 }, { "epoch": 0.05645887082258355, "grad_norm": 1.3143543004989624, "learning_rate": 6.987212861357494e-06, "loss": 0.551, "step": 941 }, { "epoch": 0.05651886962260755, "grad_norm": 1.3780816793441772, "learning_rate": 6.987154707570183e-06, "loss": 0.5394, "step": 942 }, { "epoch": 0.056578868422631545, "grad_norm": 1.3488836288452148, "learning_rate": 6.987096422088926e-06, "loss": 0.5479, "step": 943 }, { "epoch": 0.05663886722265555, "grad_norm": 1.378463625907898, "learning_rate": 6.987038004915925e-06, "loss": 0.5343, "step": 944 }, { "epoch": 0.056698866022679546, "grad_norm": 1.3988796472549438, "learning_rate": 6.9869794560533845e-06, "loss": 0.5044, "step": 945 }, { "epoch": 0.05675886482270354, "grad_norm": 1.2999886274337769, "learning_rate": 6.9869207755035175e-06, "loss": 0.5373, "step": 946 }, { "epoch": 0.05681886362272755, "grad_norm": 1.2865480184555054, "learning_rate": 6.986861963268539e-06, "loss": 0.525, "step": 947 }, { "epoch": 0.056878862422751544, "grad_norm": 1.1855520009994507, "learning_rate": 6.98680301935067e-06, "loss": 0.4787, "step": 948 }, { "epoch": 0.05693886122277554, "grad_norm": 1.2874600887298584, "learning_rate": 6.986743943752137e-06, "loss": 0.476, "step": 949 }, { "epoch": 0.056998860022799545, "grad_norm": 1.271754503250122, "learning_rate": 6.98668473647517e-06, "loss": 0.4347, "step": 950 }, { "epoch": 0.05705885882282354, "grad_norm": 1.3546653985977173, "learning_rate": 6.986625397522007e-06, "loss": 0.4951, "step": 951 }, { "epoch": 0.057118857622847546, "grad_norm": 1.2312633991241455, "learning_rate": 6.986565926894887e-06, "loss": 0.5678, "step": 952 }, { "epoch": 0.05717885642287154, "grad_norm": 1.3985581398010254, "learning_rate": 6.9865063245960575e-06, "loss": 0.5544, "step": 953 }, { "epoch": 0.05723885522289554, "grad_norm": 1.1880232095718384, "learning_rate": 6.986446590627769e-06, "loss": 0.4755, "step": 954 }, { "epoch": 0.057298854022919544, "grad_norm": 1.229134202003479, "learning_rate": 6.986386724992275e-06, "loss": 0.4803, "step": 955 }, { "epoch": 0.05735885282294354, "grad_norm": 1.1825168132781982, "learning_rate": 6.986326727691839e-06, "loss": 0.5598, "step": 956 }, { "epoch": 0.05741885162296754, "grad_norm": 1.2473721504211426, "learning_rate": 6.9862665987287275e-06, "loss": 0.4593, "step": 957 }, { "epoch": 0.05747885042299154, "grad_norm": 1.308220624923706, "learning_rate": 6.986206338105209e-06, "loss": 0.533, "step": 958 }, { "epoch": 0.05753884922301554, "grad_norm": 1.3292491436004639, "learning_rate": 6.98614594582356e-06, "loss": 0.5122, "step": 959 }, { "epoch": 0.05759884802303954, "grad_norm": 1.2471693754196167, "learning_rate": 6.986085421886062e-06, "loss": 0.4862, "step": 960 }, { "epoch": 0.05765884682306354, "grad_norm": 1.2488577365875244, "learning_rate": 6.9860247662949995e-06, "loss": 0.4685, "step": 961 }, { "epoch": 0.05771884562308754, "grad_norm": 1.2692763805389404, "learning_rate": 6.985963979052665e-06, "loss": 0.47, "step": 962 }, { "epoch": 0.057778844423111535, "grad_norm": 1.3198379278182983, "learning_rate": 6.985903060161352e-06, "loss": 0.5261, "step": 963 }, { "epoch": 0.05783884322313554, "grad_norm": 1.3675355911254883, "learning_rate": 6.985842009623363e-06, "loss": 0.5164, "step": 964 }, { "epoch": 0.057898842023159536, "grad_norm": 1.275410771369934, "learning_rate": 6.985780827441002e-06, "loss": 0.4423, "step": 965 }, { "epoch": 0.05795884082318354, "grad_norm": 1.2064987421035767, "learning_rate": 6.985719513616581e-06, "loss": 0.5328, "step": 966 }, { "epoch": 0.05801883962320754, "grad_norm": 1.2239779233932495, "learning_rate": 6.985658068152414e-06, "loss": 0.4788, "step": 967 }, { "epoch": 0.058078838423231534, "grad_norm": 1.2442313432693481, "learning_rate": 6.9855964910508226e-06, "loss": 0.5162, "step": 968 }, { "epoch": 0.05813883722325554, "grad_norm": 1.2984651327133179, "learning_rate": 6.985534782314132e-06, "loss": 0.478, "step": 969 }, { "epoch": 0.058198836023279535, "grad_norm": 1.333554744720459, "learning_rate": 6.985472941944673e-06, "loss": 0.5864, "step": 970 }, { "epoch": 0.05825883482330353, "grad_norm": 1.3258966207504272, "learning_rate": 6.985410969944781e-06, "loss": 0.5007, "step": 971 }, { "epoch": 0.058318833623327536, "grad_norm": 1.4062252044677734, "learning_rate": 6.985348866316796e-06, "loss": 0.5388, "step": 972 }, { "epoch": 0.05837883242335153, "grad_norm": 1.2936748266220093, "learning_rate": 6.9852866310630635e-06, "loss": 0.501, "step": 973 }, { "epoch": 0.05843883122337553, "grad_norm": 1.3050878047943115, "learning_rate": 6.985224264185934e-06, "loss": 0.4742, "step": 974 }, { "epoch": 0.058498830023399534, "grad_norm": 1.1668165922164917, "learning_rate": 6.9851617656877625e-06, "loss": 0.4875, "step": 975 }, { "epoch": 0.05855882882342353, "grad_norm": 1.2315796613693237, "learning_rate": 6.98509913557091e-06, "loss": 0.5755, "step": 976 }, { "epoch": 0.05861882762344753, "grad_norm": 1.3279706239700317, "learning_rate": 6.985036373837741e-06, "loss": 0.4795, "step": 977 }, { "epoch": 0.05867882642347153, "grad_norm": 1.1676361560821533, "learning_rate": 6.984973480490625e-06, "loss": 0.5321, "step": 978 }, { "epoch": 0.05873882522349553, "grad_norm": 1.3579643964767456, "learning_rate": 6.9849104555319385e-06, "loss": 0.5439, "step": 979 }, { "epoch": 0.05879882402351953, "grad_norm": 1.2034059762954712, "learning_rate": 6.984847298964061e-06, "loss": 0.4894, "step": 980 }, { "epoch": 0.05885882282354353, "grad_norm": 1.4397177696228027, "learning_rate": 6.9847840107893795e-06, "loss": 0.5582, "step": 981 }, { "epoch": 0.05891882162356753, "grad_norm": 1.168708086013794, "learning_rate": 6.984720591010281e-06, "loss": 0.4739, "step": 982 }, { "epoch": 0.05897882042359153, "grad_norm": 1.378183126449585, "learning_rate": 6.984657039629163e-06, "loss": 0.5001, "step": 983 }, { "epoch": 0.05903881922361553, "grad_norm": 1.4030709266662598, "learning_rate": 6.984593356648424e-06, "loss": 0.5407, "step": 984 }, { "epoch": 0.059098818023639525, "grad_norm": 1.3249201774597168, "learning_rate": 6.98452954207047e-06, "loss": 0.5149, "step": 985 }, { "epoch": 0.05915881682366353, "grad_norm": 1.4872829914093018, "learning_rate": 6.9844655958977105e-06, "loss": 0.5509, "step": 986 }, { "epoch": 0.059218815623687526, "grad_norm": 1.2599014043807983, "learning_rate": 6.984401518132562e-06, "loss": 0.5202, "step": 987 }, { "epoch": 0.05927881442371152, "grad_norm": 1.2808337211608887, "learning_rate": 6.984337308777441e-06, "loss": 0.5092, "step": 988 }, { "epoch": 0.05933881322373553, "grad_norm": 1.20990788936615, "learning_rate": 6.9842729678347765e-06, "loss": 0.4989, "step": 989 }, { "epoch": 0.059398812023759524, "grad_norm": 1.2614914178848267, "learning_rate": 6.984208495306994e-06, "loss": 0.5249, "step": 990 }, { "epoch": 0.05945881082378352, "grad_norm": 1.317718267440796, "learning_rate": 6.984143891196533e-06, "loss": 0.461, "step": 991 }, { "epoch": 0.059518809623807525, "grad_norm": 1.2595362663269043, "learning_rate": 6.98407915550583e-06, "loss": 0.4768, "step": 992 }, { "epoch": 0.05957880842383152, "grad_norm": 1.290573239326477, "learning_rate": 6.98401428823733e-06, "loss": 0.5029, "step": 993 }, { "epoch": 0.05963880722385552, "grad_norm": 1.5231027603149414, "learning_rate": 6.983949289393484e-06, "loss": 0.535, "step": 994 }, { "epoch": 0.05969880602387952, "grad_norm": 1.2555676698684692, "learning_rate": 6.9838841589767455e-06, "loss": 0.5373, "step": 995 }, { "epoch": 0.05975880482390352, "grad_norm": 1.435463547706604, "learning_rate": 6.983818896989576e-06, "loss": 0.5464, "step": 996 }, { "epoch": 0.059818803623927524, "grad_norm": 1.198462963104248, "learning_rate": 6.9837535034344385e-06, "loss": 0.4787, "step": 997 }, { "epoch": 0.05987880242395152, "grad_norm": 1.306161642074585, "learning_rate": 6.983687978313804e-06, "loss": 0.5418, "step": 998 }, { "epoch": 0.05993880122397552, "grad_norm": 1.3617497682571411, "learning_rate": 6.9836223216301446e-06, "loss": 0.5021, "step": 999 }, { "epoch": 0.05999880002399952, "grad_norm": 1.3977539539337158, "learning_rate": 6.983556533385942e-06, "loss": 0.4754, "step": 1000 }, { "epoch": 0.06005879882402352, "grad_norm": 1.0947048664093018, "learning_rate": 6.98349061358368e-06, "loss": 0.5036, "step": 1001 }, { "epoch": 0.06011879762404752, "grad_norm": 1.3939979076385498, "learning_rate": 6.983424562225849e-06, "loss": 0.5301, "step": 1002 }, { "epoch": 0.06017879642407152, "grad_norm": 1.2763004302978516, "learning_rate": 6.983358379314942e-06, "loss": 0.496, "step": 1003 }, { "epoch": 0.06023879522409552, "grad_norm": 1.2727710008621216, "learning_rate": 6.98329206485346e-06, "loss": 0.5244, "step": 1004 }, { "epoch": 0.060298794024119515, "grad_norm": 1.1831767559051514, "learning_rate": 6.9832256188439056e-06, "loss": 0.4161, "step": 1005 }, { "epoch": 0.06035879282414352, "grad_norm": 1.346927523612976, "learning_rate": 6.98315904128879e-06, "loss": 0.564, "step": 1006 }, { "epoch": 0.060418791624167516, "grad_norm": 1.3169313669204712, "learning_rate": 6.983092332190626e-06, "loss": 0.523, "step": 1007 }, { "epoch": 0.06047879042419151, "grad_norm": 1.3263370990753174, "learning_rate": 6.9830254915519344e-06, "loss": 0.5144, "step": 1008 }, { "epoch": 0.06053878922421552, "grad_norm": 1.3170673847198486, "learning_rate": 6.982958519375238e-06, "loss": 0.4705, "step": 1009 }, { "epoch": 0.060598788024239514, "grad_norm": 1.2038850784301758, "learning_rate": 6.982891415663066e-06, "loss": 0.4885, "step": 1010 }, { "epoch": 0.06065878682426352, "grad_norm": 1.2672035694122314, "learning_rate": 6.982824180417954e-06, "loss": 0.497, "step": 1011 }, { "epoch": 0.060718785624287515, "grad_norm": 1.1950486898422241, "learning_rate": 6.982756813642441e-06, "loss": 0.4788, "step": 1012 }, { "epoch": 0.06077878442431151, "grad_norm": 1.2106897830963135, "learning_rate": 6.982689315339069e-06, "loss": 0.4686, "step": 1013 }, { "epoch": 0.060838783224335516, "grad_norm": 1.3738032579421997, "learning_rate": 6.9826216855103895e-06, "loss": 0.5451, "step": 1014 }, { "epoch": 0.06089878202435951, "grad_norm": 1.256719708442688, "learning_rate": 6.982553924158956e-06, "loss": 0.4923, "step": 1015 }, { "epoch": 0.06095878082438351, "grad_norm": 1.308576226234436, "learning_rate": 6.982486031287327e-06, "loss": 0.5143, "step": 1016 }, { "epoch": 0.061018779624407514, "grad_norm": 1.2526359558105469, "learning_rate": 6.982418006898067e-06, "loss": 0.5124, "step": 1017 }, { "epoch": 0.06107877842443151, "grad_norm": 1.3991310596466064, "learning_rate": 6.9823498509937435e-06, "loss": 0.5395, "step": 1018 }, { "epoch": 0.06113877722445551, "grad_norm": 1.160304069519043, "learning_rate": 6.982281563576933e-06, "loss": 0.4786, "step": 1019 }, { "epoch": 0.06119877602447951, "grad_norm": 1.4528499841690063, "learning_rate": 6.982213144650213e-06, "loss": 0.5231, "step": 1020 }, { "epoch": 0.06125877482450351, "grad_norm": 1.3033400774002075, "learning_rate": 6.982144594216167e-06, "loss": 0.5505, "step": 1021 }, { "epoch": 0.061318773624527506, "grad_norm": 1.2801557779312134, "learning_rate": 6.982075912277385e-06, "loss": 0.5267, "step": 1022 }, { "epoch": 0.06137877242455151, "grad_norm": 1.340146541595459, "learning_rate": 6.982007098836459e-06, "loss": 0.4829, "step": 1023 }, { "epoch": 0.06143877122457551, "grad_norm": 1.3762165307998657, "learning_rate": 6.98193815389599e-06, "loss": 0.5565, "step": 1024 }, { "epoch": 0.06149877002459951, "grad_norm": 1.2167155742645264, "learning_rate": 6.98186907745858e-06, "loss": 0.5005, "step": 1025 }, { "epoch": 0.06155876882462351, "grad_norm": 1.4074180126190186, "learning_rate": 6.981799869526838e-06, "loss": 0.5383, "step": 1026 }, { "epoch": 0.061618767624647505, "grad_norm": 1.3605260848999023, "learning_rate": 6.981730530103378e-06, "loss": 0.4985, "step": 1027 }, { "epoch": 0.06167876642467151, "grad_norm": 1.2745665311813354, "learning_rate": 6.981661059190819e-06, "loss": 0.5137, "step": 1028 }, { "epoch": 0.061738765224695506, "grad_norm": 1.2050024271011353, "learning_rate": 6.981591456791784e-06, "loss": 0.4393, "step": 1029 }, { "epoch": 0.0617987640247195, "grad_norm": 1.2944527864456177, "learning_rate": 6.9815217229089026e-06, "loss": 0.5205, "step": 1030 }, { "epoch": 0.06185876282474351, "grad_norm": 1.335858941078186, "learning_rate": 6.981451857544807e-06, "loss": 0.5025, "step": 1031 }, { "epoch": 0.061918761624767504, "grad_norm": 1.386613368988037, "learning_rate": 6.981381860702135e-06, "loss": 0.5618, "step": 1032 }, { "epoch": 0.0619787604247915, "grad_norm": 1.4617657661437988, "learning_rate": 6.9813117323835325e-06, "loss": 0.527, "step": 1033 }, { "epoch": 0.062038759224815505, "grad_norm": 1.1922320127487183, "learning_rate": 6.9812414725916464e-06, "loss": 0.4809, "step": 1034 }, { "epoch": 0.0620987580248395, "grad_norm": 1.3053593635559082, "learning_rate": 6.98117108132913e-06, "loss": 0.5259, "step": 1035 }, { "epoch": 0.0621587568248635, "grad_norm": 1.3449044227600098, "learning_rate": 6.981100558598643e-06, "loss": 0.5117, "step": 1036 }, { "epoch": 0.0622187556248875, "grad_norm": 1.2399381399154663, "learning_rate": 6.981029904402847e-06, "loss": 0.5006, "step": 1037 }, { "epoch": 0.0622787544249115, "grad_norm": 1.30055832862854, "learning_rate": 6.980959118744411e-06, "loss": 0.4819, "step": 1038 }, { "epoch": 0.062338753224935504, "grad_norm": 1.2926747798919678, "learning_rate": 6.980888201626008e-06, "loss": 0.5071, "step": 1039 }, { "epoch": 0.0623987520249595, "grad_norm": 1.3484702110290527, "learning_rate": 6.980817153050317e-06, "loss": 0.5019, "step": 1040 }, { "epoch": 0.0624587508249835, "grad_norm": 1.2417960166931152, "learning_rate": 6.980745973020022e-06, "loss": 0.4806, "step": 1041 }, { "epoch": 0.0625187496250075, "grad_norm": 1.4808526039123535, "learning_rate": 6.9806746615378086e-06, "loss": 0.5494, "step": 1042 }, { "epoch": 0.0625787484250315, "grad_norm": 1.3822028636932373, "learning_rate": 6.98060321860637e-06, "loss": 0.4876, "step": 1043 }, { "epoch": 0.0626387472250555, "grad_norm": 1.3513041734695435, "learning_rate": 6.980531644228406e-06, "loss": 0.5568, "step": 1044 }, { "epoch": 0.0626987460250795, "grad_norm": 1.2548773288726807, "learning_rate": 6.980459938406621e-06, "loss": 0.5008, "step": 1045 }, { "epoch": 0.0627587448251035, "grad_norm": 1.1970282793045044, "learning_rate": 6.980388101143719e-06, "loss": 0.4846, "step": 1046 }, { "epoch": 0.0628187436251275, "grad_norm": 1.266236424446106, "learning_rate": 6.980316132442416e-06, "loss": 0.544, "step": 1047 }, { "epoch": 0.0628787424251515, "grad_norm": 1.2529233694076538, "learning_rate": 6.980244032305428e-06, "loss": 0.481, "step": 1048 }, { "epoch": 0.0629387412251755, "grad_norm": 1.1691032648086548, "learning_rate": 6.9801718007354794e-06, "loss": 0.507, "step": 1049 }, { "epoch": 0.06299874002519949, "grad_norm": 1.2883775234222412, "learning_rate": 6.980099437735298e-06, "loss": 0.5005, "step": 1050 }, { "epoch": 0.06305873882522349, "grad_norm": 1.4166184663772583, "learning_rate": 6.980026943307615e-06, "loss": 0.5049, "step": 1051 }, { "epoch": 0.0631187376252475, "grad_norm": 1.3308558464050293, "learning_rate": 6.979954317455171e-06, "loss": 0.4881, "step": 1052 }, { "epoch": 0.0631787364252715, "grad_norm": 1.3535516262054443, "learning_rate": 6.979881560180706e-06, "loss": 0.5487, "step": 1053 }, { "epoch": 0.0632387352252955, "grad_norm": 1.517737865447998, "learning_rate": 6.979808671486969e-06, "loss": 0.5331, "step": 1054 }, { "epoch": 0.06329873402531949, "grad_norm": 1.2321786880493164, "learning_rate": 6.979735651376713e-06, "loss": 0.5091, "step": 1055 }, { "epoch": 0.06335873282534349, "grad_norm": 1.5433779954910278, "learning_rate": 6.979662499852694e-06, "loss": 0.5186, "step": 1056 }, { "epoch": 0.06341873162536749, "grad_norm": 1.2738962173461914, "learning_rate": 6.979589216917675e-06, "loss": 0.4761, "step": 1057 }, { "epoch": 0.0634787304253915, "grad_norm": 1.3336875438690186, "learning_rate": 6.979515802574426e-06, "loss": 0.4609, "step": 1058 }, { "epoch": 0.0635387292254155, "grad_norm": 1.1935368776321411, "learning_rate": 6.979442256825717e-06, "loss": 0.4571, "step": 1059 }, { "epoch": 0.06359872802543949, "grad_norm": 1.389861822128296, "learning_rate": 6.979368579674326e-06, "loss": 0.5432, "step": 1060 }, { "epoch": 0.06365872682546349, "grad_norm": 1.3240100145339966, "learning_rate": 6.979294771123037e-06, "loss": 0.5096, "step": 1061 }, { "epoch": 0.06371872562548749, "grad_norm": 1.3198648691177368, "learning_rate": 6.979220831174634e-06, "loss": 0.5007, "step": 1062 }, { "epoch": 0.0637787244255115, "grad_norm": 1.2734047174453735, "learning_rate": 6.979146759831913e-06, "loss": 0.4871, "step": 1063 }, { "epoch": 0.06383872322553549, "grad_norm": 1.3632440567016602, "learning_rate": 6.97907255709767e-06, "loss": 0.5018, "step": 1064 }, { "epoch": 0.06389872202555949, "grad_norm": 1.2400717735290527, "learning_rate": 6.978998222974706e-06, "loss": 0.5468, "step": 1065 }, { "epoch": 0.06395872082558349, "grad_norm": 1.5227690935134888, "learning_rate": 6.978923757465831e-06, "loss": 0.5515, "step": 1066 }, { "epoch": 0.06401871962560748, "grad_norm": 1.3579046726226807, "learning_rate": 6.978849160573854e-06, "loss": 0.5478, "step": 1067 }, { "epoch": 0.06407871842563148, "grad_norm": 1.3465546369552612, "learning_rate": 6.978774432301595e-06, "loss": 0.5287, "step": 1068 }, { "epoch": 0.06413871722565549, "grad_norm": 1.3137699365615845, "learning_rate": 6.978699572651875e-06, "loss": 0.5155, "step": 1069 }, { "epoch": 0.06419871602567949, "grad_norm": 1.3297522068023682, "learning_rate": 6.978624581627521e-06, "loss": 0.4757, "step": 1070 }, { "epoch": 0.06425871482570349, "grad_norm": 1.2408428192138672, "learning_rate": 6.978549459231364e-06, "loss": 0.459, "step": 1071 }, { "epoch": 0.06431871362572748, "grad_norm": 1.222593069076538, "learning_rate": 6.978474205466241e-06, "loss": 0.4823, "step": 1072 }, { "epoch": 0.06437871242575148, "grad_norm": 1.3948620557785034, "learning_rate": 6.9783988203349985e-06, "loss": 0.5238, "step": 1073 }, { "epoch": 0.06443871122577549, "grad_norm": 1.3341566324234009, "learning_rate": 6.978323303840479e-06, "loss": 0.5435, "step": 1074 }, { "epoch": 0.06449871002579949, "grad_norm": 1.2030284404754639, "learning_rate": 6.978247655985534e-06, "loss": 0.4623, "step": 1075 }, { "epoch": 0.06455870882582349, "grad_norm": 1.206649661064148, "learning_rate": 6.978171876773024e-06, "loss": 0.4847, "step": 1076 }, { "epoch": 0.06461870762584748, "grad_norm": 1.3702151775360107, "learning_rate": 6.978095966205807e-06, "loss": 0.529, "step": 1077 }, { "epoch": 0.06467870642587148, "grad_norm": 1.3164066076278687, "learning_rate": 6.978019924286752e-06, "loss": 0.5459, "step": 1078 }, { "epoch": 0.06473870522589548, "grad_norm": 1.316447138786316, "learning_rate": 6.97794375101873e-06, "loss": 0.4867, "step": 1079 }, { "epoch": 0.06479870402591949, "grad_norm": 1.2744375467300415, "learning_rate": 6.977867446404619e-06, "loss": 0.4631, "step": 1080 }, { "epoch": 0.06485870282594348, "grad_norm": 1.3726763725280762, "learning_rate": 6.977791010447299e-06, "loss": 0.5334, "step": 1081 }, { "epoch": 0.06491870162596748, "grad_norm": 1.4039050340652466, "learning_rate": 6.977714443149658e-06, "loss": 0.4823, "step": 1082 }, { "epoch": 0.06497870042599148, "grad_norm": 1.3878450393676758, "learning_rate": 6.977637744514586e-06, "loss": 0.5388, "step": 1083 }, { "epoch": 0.06503869922601548, "grad_norm": 1.221630573272705, "learning_rate": 6.97756091454498e-06, "loss": 0.5072, "step": 1084 }, { "epoch": 0.06509869802603947, "grad_norm": 1.2494375705718994, "learning_rate": 6.977483953243743e-06, "loss": 0.552, "step": 1085 }, { "epoch": 0.06515869682606348, "grad_norm": 1.2801345586776733, "learning_rate": 6.9774068606137815e-06, "loss": 0.4876, "step": 1086 }, { "epoch": 0.06521869562608748, "grad_norm": 1.2420021295547485, "learning_rate": 6.9773296366580044e-06, "loss": 0.4935, "step": 1087 }, { "epoch": 0.06527869442611148, "grad_norm": 1.2839365005493164, "learning_rate": 6.977252281379329e-06, "loss": 0.5007, "step": 1088 }, { "epoch": 0.06533869322613547, "grad_norm": 1.3200569152832031, "learning_rate": 6.977174794780679e-06, "loss": 0.4937, "step": 1089 }, { "epoch": 0.06539869202615947, "grad_norm": 1.3370643854141235, "learning_rate": 6.977097176864978e-06, "loss": 0.4581, "step": 1090 }, { "epoch": 0.06545869082618348, "grad_norm": 1.1867599487304688, "learning_rate": 6.977019427635158e-06, "loss": 0.5123, "step": 1091 }, { "epoch": 0.06551868962620748, "grad_norm": 1.2798017263412476, "learning_rate": 6.976941547094156e-06, "loss": 0.4775, "step": 1092 }, { "epoch": 0.06557868842623148, "grad_norm": 1.3487589359283447, "learning_rate": 6.976863535244912e-06, "loss": 0.5356, "step": 1093 }, { "epoch": 0.06563868722625547, "grad_norm": 1.417845368385315, "learning_rate": 6.9767853920903736e-06, "loss": 0.5622, "step": 1094 }, { "epoch": 0.06569868602627947, "grad_norm": 1.2970536947250366, "learning_rate": 6.976707117633491e-06, "loss": 0.5358, "step": 1095 }, { "epoch": 0.06575868482630347, "grad_norm": 1.4746845960617065, "learning_rate": 6.976628711877221e-06, "loss": 0.6285, "step": 1096 }, { "epoch": 0.06581868362632748, "grad_norm": 1.3045355081558228, "learning_rate": 6.976550174824522e-06, "loss": 0.4945, "step": 1097 }, { "epoch": 0.06587868242635148, "grad_norm": 1.2419027090072632, "learning_rate": 6.976471506478364e-06, "loss": 0.4962, "step": 1098 }, { "epoch": 0.06593868122637547, "grad_norm": 1.3486597537994385, "learning_rate": 6.9763927068417155e-06, "loss": 0.5109, "step": 1099 }, { "epoch": 0.06599868002639947, "grad_norm": 1.3832216262817383, "learning_rate": 6.976313775917553e-06, "loss": 0.5923, "step": 1100 }, { "epoch": 0.06605867882642347, "grad_norm": 1.2882611751556396, "learning_rate": 6.976234713708856e-06, "loss": 0.5118, "step": 1101 }, { "epoch": 0.06611867762644748, "grad_norm": 1.3491950035095215, "learning_rate": 6.976155520218613e-06, "loss": 0.5128, "step": 1102 }, { "epoch": 0.06617867642647147, "grad_norm": 1.2813316583633423, "learning_rate": 6.976076195449813e-06, "loss": 0.5065, "step": 1103 }, { "epoch": 0.06623867522649547, "grad_norm": 1.3274668455123901, "learning_rate": 6.975996739405452e-06, "loss": 0.5533, "step": 1104 }, { "epoch": 0.06629867402651947, "grad_norm": 1.1431825160980225, "learning_rate": 6.975917152088529e-06, "loss": 0.495, "step": 1105 }, { "epoch": 0.06635867282654347, "grad_norm": 1.2809579372406006, "learning_rate": 6.975837433502054e-06, "loss": 0.4731, "step": 1106 }, { "epoch": 0.06641867162656746, "grad_norm": 1.2729321718215942, "learning_rate": 6.975757583649034e-06, "loss": 0.5227, "step": 1107 }, { "epoch": 0.06647867042659147, "grad_norm": 1.3723152875900269, "learning_rate": 6.975677602532485e-06, "loss": 0.5065, "step": 1108 }, { "epoch": 0.06653866922661547, "grad_norm": 1.404586672782898, "learning_rate": 6.9755974901554295e-06, "loss": 0.5023, "step": 1109 }, { "epoch": 0.06659866802663947, "grad_norm": 1.303188681602478, "learning_rate": 6.97551724652089e-06, "loss": 0.4997, "step": 1110 }, { "epoch": 0.06665866682666347, "grad_norm": 1.3602811098098755, "learning_rate": 6.9754368716319e-06, "loss": 0.5032, "step": 1111 }, { "epoch": 0.06671866562668746, "grad_norm": 1.3841373920440674, "learning_rate": 6.975356365491493e-06, "loss": 0.5541, "step": 1112 }, { "epoch": 0.06677866442671146, "grad_norm": 1.327083945274353, "learning_rate": 6.975275728102709e-06, "loss": 0.5288, "step": 1113 }, { "epoch": 0.06683866322673547, "grad_norm": 1.328644871711731, "learning_rate": 6.9751949594685945e-06, "loss": 0.479, "step": 1114 }, { "epoch": 0.06689866202675947, "grad_norm": 1.1827548742294312, "learning_rate": 6.9751140595921995e-06, "loss": 0.5327, "step": 1115 }, { "epoch": 0.06695866082678346, "grad_norm": 1.1649459600448608, "learning_rate": 6.975033028476579e-06, "loss": 0.5024, "step": 1116 }, { "epoch": 0.06701865962680746, "grad_norm": 1.481513261795044, "learning_rate": 6.974951866124792e-06, "loss": 0.54, "step": 1117 }, { "epoch": 0.06707865842683146, "grad_norm": 1.3351209163665771, "learning_rate": 6.974870572539907e-06, "loss": 0.4757, "step": 1118 }, { "epoch": 0.06713865722685547, "grad_norm": 1.2167335748672485, "learning_rate": 6.974789147724991e-06, "loss": 0.4912, "step": 1119 }, { "epoch": 0.06719865602687947, "grad_norm": 1.4070227146148682, "learning_rate": 6.974707591683119e-06, "loss": 0.4993, "step": 1120 }, { "epoch": 0.06725865482690346, "grad_norm": 1.422378420829773, "learning_rate": 6.974625904417374e-06, "loss": 0.5851, "step": 1121 }, { "epoch": 0.06731865362692746, "grad_norm": 1.3527679443359375, "learning_rate": 6.974544085930837e-06, "loss": 0.5285, "step": 1122 }, { "epoch": 0.06737865242695146, "grad_norm": 1.2987260818481445, "learning_rate": 6.974462136226601e-06, "loss": 0.5962, "step": 1123 }, { "epoch": 0.06743865122697545, "grad_norm": 1.4427727460861206, "learning_rate": 6.974380055307759e-06, "loss": 0.5171, "step": 1124 }, { "epoch": 0.06749865002699947, "grad_norm": 1.3876718282699585, "learning_rate": 6.974297843177413e-06, "loss": 0.4745, "step": 1125 }, { "epoch": 0.06755864882702346, "grad_norm": 1.2227727174758911, "learning_rate": 6.974215499838664e-06, "loss": 0.5056, "step": 1126 }, { "epoch": 0.06761864762704746, "grad_norm": 1.3009655475616455, "learning_rate": 6.9741330252946266e-06, "loss": 0.4967, "step": 1127 }, { "epoch": 0.06767864642707146, "grad_norm": 1.2290648221969604, "learning_rate": 6.974050419548412e-06, "loss": 0.4909, "step": 1128 }, { "epoch": 0.06773864522709545, "grad_norm": 1.1707078218460083, "learning_rate": 6.97396768260314e-06, "loss": 0.4476, "step": 1129 }, { "epoch": 0.06779864402711945, "grad_norm": 1.5121163129806519, "learning_rate": 6.973884814461936e-06, "loss": 0.5495, "step": 1130 }, { "epoch": 0.06785864282714346, "grad_norm": 1.2158187627792358, "learning_rate": 6.97380181512793e-06, "loss": 0.4987, "step": 1131 }, { "epoch": 0.06791864162716746, "grad_norm": 1.3312513828277588, "learning_rate": 6.973718684604257e-06, "loss": 0.5537, "step": 1132 }, { "epoch": 0.06797864042719146, "grad_norm": 1.3003196716308594, "learning_rate": 6.973635422894054e-06, "loss": 0.584, "step": 1133 }, { "epoch": 0.06803863922721545, "grad_norm": 1.214893102645874, "learning_rate": 6.973552030000467e-06, "loss": 0.4565, "step": 1134 }, { "epoch": 0.06809863802723945, "grad_norm": 1.2881264686584473, "learning_rate": 6.973468505926645e-06, "loss": 0.5114, "step": 1135 }, { "epoch": 0.06815863682726346, "grad_norm": 1.4028987884521484, "learning_rate": 6.973384850675744e-06, "loss": 0.5263, "step": 1136 }, { "epoch": 0.06821863562728746, "grad_norm": 1.3478137254714966, "learning_rate": 6.973301064250922e-06, "loss": 0.5298, "step": 1137 }, { "epoch": 0.06827863442731145, "grad_norm": 1.2070733308792114, "learning_rate": 6.973217146655342e-06, "loss": 0.4747, "step": 1138 }, { "epoch": 0.06833863322733545, "grad_norm": 1.300981879234314, "learning_rate": 6.973133097892175e-06, "loss": 0.4754, "step": 1139 }, { "epoch": 0.06839863202735945, "grad_norm": 1.2311533689498901, "learning_rate": 6.973048917964595e-06, "loss": 0.4895, "step": 1140 }, { "epoch": 0.06845863082738345, "grad_norm": 1.2937365770339966, "learning_rate": 6.972964606875778e-06, "loss": 0.5387, "step": 1141 }, { "epoch": 0.06851862962740746, "grad_norm": 1.348813533782959, "learning_rate": 6.972880164628914e-06, "loss": 0.5151, "step": 1142 }, { "epoch": 0.06857862842743145, "grad_norm": 1.2976845502853394, "learning_rate": 6.972795591227186e-06, "loss": 0.4609, "step": 1143 }, { "epoch": 0.06863862722745545, "grad_norm": 1.3996132612228394, "learning_rate": 6.972710886673791e-06, "loss": 0.56, "step": 1144 }, { "epoch": 0.06869862602747945, "grad_norm": 1.3475478887557983, "learning_rate": 6.972626050971929e-06, "loss": 0.5112, "step": 1145 }, { "epoch": 0.06875862482750345, "grad_norm": 1.2673391103744507, "learning_rate": 6.972541084124801e-06, "loss": 0.472, "step": 1146 }, { "epoch": 0.06881862362752746, "grad_norm": 1.189274549484253, "learning_rate": 6.972455986135617e-06, "loss": 0.4917, "step": 1147 }, { "epoch": 0.06887862242755145, "grad_norm": 1.2221068143844604, "learning_rate": 6.97237075700759e-06, "loss": 0.4899, "step": 1148 }, { "epoch": 0.06893862122757545, "grad_norm": 1.2682762145996094, "learning_rate": 6.9722853967439415e-06, "loss": 0.5572, "step": 1149 }, { "epoch": 0.06899862002759945, "grad_norm": 1.2519116401672363, "learning_rate": 6.972199905347893e-06, "loss": 0.5514, "step": 1150 }, { "epoch": 0.06905861882762344, "grad_norm": 1.3018335103988647, "learning_rate": 6.972114282822673e-06, "loss": 0.4761, "step": 1151 }, { "epoch": 0.06911861762764744, "grad_norm": 1.3755558729171753, "learning_rate": 6.972028529171515e-06, "loss": 0.5258, "step": 1152 }, { "epoch": 0.06917861642767145, "grad_norm": 1.2618976831436157, "learning_rate": 6.9719426443976585e-06, "loss": 0.5126, "step": 1153 }, { "epoch": 0.06923861522769545, "grad_norm": 1.5133416652679443, "learning_rate": 6.971856628504346e-06, "loss": 0.5076, "step": 1154 }, { "epoch": 0.06929861402771945, "grad_norm": 1.2957236766815186, "learning_rate": 6.971770481494826e-06, "loss": 0.5535, "step": 1155 }, { "epoch": 0.06935861282774344, "grad_norm": 1.3255811929702759, "learning_rate": 6.971684203372354e-06, "loss": 0.5642, "step": 1156 }, { "epoch": 0.06941861162776744, "grad_norm": 1.1857852935791016, "learning_rate": 6.971597794140185e-06, "loss": 0.5029, "step": 1157 }, { "epoch": 0.06947861042779144, "grad_norm": 1.270963191986084, "learning_rate": 6.971511253801585e-06, "loss": 0.4572, "step": 1158 }, { "epoch": 0.06953860922781545, "grad_norm": 1.239213466644287, "learning_rate": 6.971424582359819e-06, "loss": 0.4782, "step": 1159 }, { "epoch": 0.06959860802783945, "grad_norm": 1.4199655055999756, "learning_rate": 6.971337779818165e-06, "loss": 0.5181, "step": 1160 }, { "epoch": 0.06965860682786344, "grad_norm": 1.2928823232650757, "learning_rate": 6.9712508461798965e-06, "loss": 0.5412, "step": 1161 }, { "epoch": 0.06971860562788744, "grad_norm": 1.301857352256775, "learning_rate": 6.9711637814483e-06, "loss": 0.5337, "step": 1162 }, { "epoch": 0.06977860442791144, "grad_norm": 1.2203365564346313, "learning_rate": 6.971076585626661e-06, "loss": 0.4818, "step": 1163 }, { "epoch": 0.06983860322793545, "grad_norm": 1.2068382501602173, "learning_rate": 6.970989258718273e-06, "loss": 0.5335, "step": 1164 }, { "epoch": 0.06989860202795944, "grad_norm": 1.1672532558441162, "learning_rate": 6.970901800726436e-06, "loss": 0.4977, "step": 1165 }, { "epoch": 0.06995860082798344, "grad_norm": 1.3691167831420898, "learning_rate": 6.970814211654451e-06, "loss": 0.5394, "step": 1166 }, { "epoch": 0.07001859962800744, "grad_norm": 1.3084765672683716, "learning_rate": 6.970726491505626e-06, "loss": 0.4639, "step": 1167 }, { "epoch": 0.07007859842803144, "grad_norm": 1.3706367015838623, "learning_rate": 6.9706386402832734e-06, "loss": 0.5533, "step": 1168 }, { "epoch": 0.07013859722805543, "grad_norm": 1.1983354091644287, "learning_rate": 6.9705506579907125e-06, "loss": 0.4947, "step": 1169 }, { "epoch": 0.07019859602807944, "grad_norm": 1.3075580596923828, "learning_rate": 6.970462544631265e-06, "loss": 0.5417, "step": 1170 }, { "epoch": 0.07025859482810344, "grad_norm": 1.4059425592422485, "learning_rate": 6.970374300208257e-06, "loss": 0.5355, "step": 1171 }, { "epoch": 0.07031859362812744, "grad_norm": 1.4097543954849243, "learning_rate": 6.970285924725024e-06, "loss": 0.5502, "step": 1172 }, { "epoch": 0.07037859242815143, "grad_norm": 1.231876254081726, "learning_rate": 6.9701974181849026e-06, "loss": 0.4713, "step": 1173 }, { "epoch": 0.07043859122817543, "grad_norm": 1.4006441831588745, "learning_rate": 6.970108780591234e-06, "loss": 0.4953, "step": 1174 }, { "epoch": 0.07049859002819944, "grad_norm": 1.1709238290786743, "learning_rate": 6.9700200119473666e-06, "loss": 0.4658, "step": 1175 }, { "epoch": 0.07055858882822344, "grad_norm": 1.2032265663146973, "learning_rate": 6.969931112256654e-06, "loss": 0.518, "step": 1176 }, { "epoch": 0.07061858762824744, "grad_norm": 1.2161364555358887, "learning_rate": 6.969842081522451e-06, "loss": 0.542, "step": 1177 }, { "epoch": 0.07067858642827143, "grad_norm": 1.4233583211898804, "learning_rate": 6.969752919748121e-06, "loss": 0.5913, "step": 1178 }, { "epoch": 0.07073858522829543, "grad_norm": 1.4988044500350952, "learning_rate": 6.969663626937031e-06, "loss": 0.5353, "step": 1179 }, { "epoch": 0.07079858402831943, "grad_norm": 1.3313838243484497, "learning_rate": 6.969574203092555e-06, "loss": 0.555, "step": 1180 }, { "epoch": 0.07085858282834344, "grad_norm": 1.3691275119781494, "learning_rate": 6.969484648218068e-06, "loss": 0.5626, "step": 1181 }, { "epoch": 0.07091858162836744, "grad_norm": 1.3483033180236816, "learning_rate": 6.969394962316953e-06, "loss": 0.4816, "step": 1182 }, { "epoch": 0.07097858042839143, "grad_norm": 1.214531421661377, "learning_rate": 6.969305145392597e-06, "loss": 0.5583, "step": 1183 }, { "epoch": 0.07103857922841543, "grad_norm": 1.2754732370376587, "learning_rate": 6.969215197448391e-06, "loss": 0.5269, "step": 1184 }, { "epoch": 0.07109857802843943, "grad_norm": 1.3625003099441528, "learning_rate": 6.969125118487733e-06, "loss": 0.5978, "step": 1185 }, { "epoch": 0.07115857682846342, "grad_norm": 1.2766093015670776, "learning_rate": 6.969034908514025e-06, "loss": 0.5117, "step": 1186 }, { "epoch": 0.07121857562848743, "grad_norm": 1.231447696685791, "learning_rate": 6.968944567530674e-06, "loss": 0.5515, "step": 1187 }, { "epoch": 0.07127857442851143, "grad_norm": 1.4045498371124268, "learning_rate": 6.96885409554109e-06, "loss": 0.5306, "step": 1188 }, { "epoch": 0.07133857322853543, "grad_norm": 1.234536051750183, "learning_rate": 6.96876349254869e-06, "loss": 0.5568, "step": 1189 }, { "epoch": 0.07139857202855943, "grad_norm": 1.2395278215408325, "learning_rate": 6.968672758556898e-06, "loss": 0.5174, "step": 1190 }, { "epoch": 0.07145857082858342, "grad_norm": 1.2935009002685547, "learning_rate": 6.968581893569138e-06, "loss": 0.4889, "step": 1191 }, { "epoch": 0.07151856962860743, "grad_norm": 1.200943946838379, "learning_rate": 6.968490897588844e-06, "loss": 0.4809, "step": 1192 }, { "epoch": 0.07157856842863143, "grad_norm": 1.317759394645691, "learning_rate": 6.96839977061945e-06, "loss": 0.422, "step": 1193 }, { "epoch": 0.07163856722865543, "grad_norm": 1.2835687398910522, "learning_rate": 6.9683085126644e-06, "loss": 0.5716, "step": 1194 }, { "epoch": 0.07169856602867943, "grad_norm": 1.3122692108154297, "learning_rate": 6.968217123727138e-06, "loss": 0.488, "step": 1195 }, { "epoch": 0.07175856482870342, "grad_norm": 1.383578896522522, "learning_rate": 6.968125603811118e-06, "loss": 0.5553, "step": 1196 }, { "epoch": 0.07181856362872742, "grad_norm": 1.3008979558944702, "learning_rate": 6.968033952919793e-06, "loss": 0.5114, "step": 1197 }, { "epoch": 0.07187856242875143, "grad_norm": 1.2178407907485962, "learning_rate": 6.9679421710566254e-06, "loss": 0.4921, "step": 1198 }, { "epoch": 0.07193856122877543, "grad_norm": 1.290745735168457, "learning_rate": 6.967850258225083e-06, "loss": 0.5009, "step": 1199 }, { "epoch": 0.07199856002879942, "grad_norm": 1.2286590337753296, "learning_rate": 6.967758214428636e-06, "loss": 0.5326, "step": 1200 }, { "epoch": 0.07205855882882342, "grad_norm": 1.243743658065796, "learning_rate": 6.967666039670761e-06, "loss": 0.4674, "step": 1201 }, { "epoch": 0.07211855762884742, "grad_norm": 1.2126044034957886, "learning_rate": 6.967573733954937e-06, "loss": 0.4911, "step": 1202 }, { "epoch": 0.07217855642887143, "grad_norm": 1.3314496278762817, "learning_rate": 6.9674812972846505e-06, "loss": 0.5073, "step": 1203 }, { "epoch": 0.07223855522889543, "grad_norm": 1.2596652507781982, "learning_rate": 6.967388729663395e-06, "loss": 0.4978, "step": 1204 }, { "epoch": 0.07229855402891942, "grad_norm": 1.5593982934951782, "learning_rate": 6.967296031094664e-06, "loss": 0.514, "step": 1205 }, { "epoch": 0.07235855282894342, "grad_norm": 1.4169886112213135, "learning_rate": 6.967203201581958e-06, "loss": 0.5266, "step": 1206 }, { "epoch": 0.07241855162896742, "grad_norm": 1.3523601293563843, "learning_rate": 6.9671102411287845e-06, "loss": 0.5192, "step": 1207 }, { "epoch": 0.07247855042899141, "grad_norm": 1.2426661252975464, "learning_rate": 6.967017149738653e-06, "loss": 0.4724, "step": 1208 }, { "epoch": 0.07253854922901543, "grad_norm": 1.356772780418396, "learning_rate": 6.96692392741508e-06, "loss": 0.4693, "step": 1209 }, { "epoch": 0.07259854802903942, "grad_norm": 1.3093667030334473, "learning_rate": 6.966830574161585e-06, "loss": 0.4869, "step": 1210 }, { "epoch": 0.07265854682906342, "grad_norm": 1.1607123613357544, "learning_rate": 6.966737089981695e-06, "loss": 0.5172, "step": 1211 }, { "epoch": 0.07271854562908742, "grad_norm": 1.4991861581802368, "learning_rate": 6.966643474878939e-06, "loss": 0.5002, "step": 1212 }, { "epoch": 0.07277854442911141, "grad_norm": 1.540201187133789, "learning_rate": 6.966549728856852e-06, "loss": 0.5662, "step": 1213 }, { "epoch": 0.07283854322913541, "grad_norm": 1.2717233896255493, "learning_rate": 6.966455851918976e-06, "loss": 0.5067, "step": 1214 }, { "epoch": 0.07289854202915942, "grad_norm": 1.5206224918365479, "learning_rate": 6.966361844068855e-06, "loss": 0.5036, "step": 1215 }, { "epoch": 0.07295854082918342, "grad_norm": 1.3841402530670166, "learning_rate": 6.966267705310042e-06, "loss": 0.5503, "step": 1216 }, { "epoch": 0.07301853962920742, "grad_norm": 1.3070062398910522, "learning_rate": 6.966173435646087e-06, "loss": 0.5475, "step": 1217 }, { "epoch": 0.07307853842923141, "grad_norm": 1.3320788145065308, "learning_rate": 6.966079035080555e-06, "loss": 0.5123, "step": 1218 }, { "epoch": 0.07313853722925541, "grad_norm": 1.4137874841690063, "learning_rate": 6.965984503617009e-06, "loss": 0.5053, "step": 1219 }, { "epoch": 0.07319853602927942, "grad_norm": 1.2752033472061157, "learning_rate": 6.96588984125902e-06, "loss": 0.5082, "step": 1220 }, { "epoch": 0.07325853482930342, "grad_norm": 1.3040941953659058, "learning_rate": 6.965795048010161e-06, "loss": 0.5174, "step": 1221 }, { "epoch": 0.07331853362932741, "grad_norm": 1.2959513664245605, "learning_rate": 6.965700123874013e-06, "loss": 0.5274, "step": 1222 }, { "epoch": 0.07337853242935141, "grad_norm": 1.2694953680038452, "learning_rate": 6.9656050688541624e-06, "loss": 0.5083, "step": 1223 }, { "epoch": 0.07343853122937541, "grad_norm": 1.2128627300262451, "learning_rate": 6.965509882954197e-06, "loss": 0.5424, "step": 1224 }, { "epoch": 0.0734985300293994, "grad_norm": 1.3075655698776245, "learning_rate": 6.965414566177712e-06, "loss": 0.4988, "step": 1225 }, { "epoch": 0.07355852882942342, "grad_norm": 1.326385736465454, "learning_rate": 6.965319118528307e-06, "loss": 0.5076, "step": 1226 }, { "epoch": 0.07361852762944741, "grad_norm": 1.4594253301620483, "learning_rate": 6.965223540009588e-06, "loss": 0.5059, "step": 1227 }, { "epoch": 0.07367852642947141, "grad_norm": 1.325941562652588, "learning_rate": 6.965127830625161e-06, "loss": 0.5224, "step": 1228 }, { "epoch": 0.07373852522949541, "grad_norm": 1.2976899147033691, "learning_rate": 6.965031990378645e-06, "loss": 0.4736, "step": 1229 }, { "epoch": 0.0737985240295194, "grad_norm": 1.225500226020813, "learning_rate": 6.964936019273657e-06, "loss": 0.4436, "step": 1230 }, { "epoch": 0.07385852282954342, "grad_norm": 1.4029208421707153, "learning_rate": 6.964839917313821e-06, "loss": 0.5284, "step": 1231 }, { "epoch": 0.07391852162956741, "grad_norm": 1.4949653148651123, "learning_rate": 6.964743684502768e-06, "loss": 0.5388, "step": 1232 }, { "epoch": 0.07397852042959141, "grad_norm": 1.3092917203903198, "learning_rate": 6.964647320844131e-06, "loss": 0.4663, "step": 1233 }, { "epoch": 0.07403851922961541, "grad_norm": 1.3193334341049194, "learning_rate": 6.9645508263415494e-06, "loss": 0.5236, "step": 1234 }, { "epoch": 0.0740985180296394, "grad_norm": 1.4564399719238281, "learning_rate": 6.964454200998668e-06, "loss": 0.4901, "step": 1235 }, { "epoch": 0.0741585168296634, "grad_norm": 1.322189450263977, "learning_rate": 6.964357444819135e-06, "loss": 0.5146, "step": 1236 }, { "epoch": 0.07421851562968741, "grad_norm": 1.4254353046417236, "learning_rate": 6.964260557806605e-06, "loss": 0.4964, "step": 1237 }, { "epoch": 0.07427851442971141, "grad_norm": 1.2309828996658325, "learning_rate": 6.964163539964738e-06, "loss": 0.4611, "step": 1238 }, { "epoch": 0.0743385132297354, "grad_norm": 1.2878289222717285, "learning_rate": 6.964066391297196e-06, "loss": 0.5953, "step": 1239 }, { "epoch": 0.0743985120297594, "grad_norm": 1.1783806085586548, "learning_rate": 6.963969111807647e-06, "loss": 0.4806, "step": 1240 }, { "epoch": 0.0744585108297834, "grad_norm": 1.4045032262802124, "learning_rate": 6.963871701499768e-06, "loss": 0.5711, "step": 1241 }, { "epoch": 0.0745185096298074, "grad_norm": 1.3004597425460815, "learning_rate": 6.963774160377236e-06, "loss": 0.4913, "step": 1242 }, { "epoch": 0.07457850842983141, "grad_norm": 1.211363673210144, "learning_rate": 6.963676488443735e-06, "loss": 0.5277, "step": 1243 }, { "epoch": 0.0746385072298554, "grad_norm": 1.2854082584381104, "learning_rate": 6.963578685702953e-06, "loss": 0.5034, "step": 1244 }, { "epoch": 0.0746985060298794, "grad_norm": 1.3706867694854736, "learning_rate": 6.963480752158585e-06, "loss": 0.5005, "step": 1245 }, { "epoch": 0.0747585048299034, "grad_norm": 1.45496666431427, "learning_rate": 6.963382687814328e-06, "loss": 0.4801, "step": 1246 }, { "epoch": 0.0748185036299274, "grad_norm": 1.3738418817520142, "learning_rate": 6.963284492673886e-06, "loss": 0.5729, "step": 1247 }, { "epoch": 0.07487850242995141, "grad_norm": 1.1563255786895752, "learning_rate": 6.963186166740968e-06, "loss": 0.4894, "step": 1248 }, { "epoch": 0.0749385012299754, "grad_norm": 1.1841264963150024, "learning_rate": 6.963087710019285e-06, "loss": 0.4821, "step": 1249 }, { "epoch": 0.0749985000299994, "grad_norm": 1.3551278114318848, "learning_rate": 6.9629891225125585e-06, "loss": 0.4822, "step": 1250 }, { "epoch": 0.0750584988300234, "grad_norm": 1.212369680404663, "learning_rate": 6.96289040422451e-06, "loss": 0.5057, "step": 1251 }, { "epoch": 0.0751184976300474, "grad_norm": 1.3323487043380737, "learning_rate": 6.962791555158868e-06, "loss": 0.4991, "step": 1252 }, { "epoch": 0.07517849643007139, "grad_norm": 1.2369328737258911, "learning_rate": 6.962692575319365e-06, "loss": 0.4532, "step": 1253 }, { "epoch": 0.0752384952300954, "grad_norm": 1.3044246435165405, "learning_rate": 6.962593464709739e-06, "loss": 0.484, "step": 1254 }, { "epoch": 0.0752984940301194, "grad_norm": 1.2923847436904907, "learning_rate": 6.962494223333734e-06, "loss": 0.4611, "step": 1255 }, { "epoch": 0.0753584928301434, "grad_norm": 1.309836745262146, "learning_rate": 6.962394851195097e-06, "loss": 0.5181, "step": 1256 }, { "epoch": 0.0754184916301674, "grad_norm": 1.299484372138977, "learning_rate": 6.962295348297581e-06, "loss": 0.5355, "step": 1257 }, { "epoch": 0.07547849043019139, "grad_norm": 1.2524536848068237, "learning_rate": 6.962195714644945e-06, "loss": 0.5097, "step": 1258 }, { "epoch": 0.0755384892302154, "grad_norm": 1.2135223150253296, "learning_rate": 6.962095950240949e-06, "loss": 0.5033, "step": 1259 }, { "epoch": 0.0755984880302394, "grad_norm": 1.35247004032135, "learning_rate": 6.961996055089363e-06, "loss": 0.5334, "step": 1260 }, { "epoch": 0.0756584868302634, "grad_norm": 1.3272594213485718, "learning_rate": 6.961896029193959e-06, "loss": 0.4884, "step": 1261 }, { "epoch": 0.0757184856302874, "grad_norm": 1.2085591554641724, "learning_rate": 6.9617958725585135e-06, "loss": 0.4994, "step": 1262 }, { "epoch": 0.07577848443031139, "grad_norm": 1.1501096487045288, "learning_rate": 6.961695585186811e-06, "loss": 0.4856, "step": 1263 }, { "epoch": 0.07583848323033539, "grad_norm": 1.280519962310791, "learning_rate": 6.961595167082636e-06, "loss": 0.4868, "step": 1264 }, { "epoch": 0.0758984820303594, "grad_norm": 1.3105417490005493, "learning_rate": 6.961494618249784e-06, "loss": 0.5492, "step": 1265 }, { "epoch": 0.0759584808303834, "grad_norm": 1.3033232688903809, "learning_rate": 6.961393938692051e-06, "loss": 0.532, "step": 1266 }, { "epoch": 0.07601847963040739, "grad_norm": 1.3299156427383423, "learning_rate": 6.961293128413238e-06, "loss": 0.4471, "step": 1267 }, { "epoch": 0.07607847843043139, "grad_norm": 1.3121764659881592, "learning_rate": 6.961192187417154e-06, "loss": 0.5254, "step": 1268 }, { "epoch": 0.07613847723045539, "grad_norm": 1.1819287538528442, "learning_rate": 6.961091115707609e-06, "loss": 0.4579, "step": 1269 }, { "epoch": 0.07619847603047938, "grad_norm": 1.2243441343307495, "learning_rate": 6.960989913288422e-06, "loss": 0.4918, "step": 1270 }, { "epoch": 0.0762584748305034, "grad_norm": 1.3371330499649048, "learning_rate": 6.960888580163415e-06, "loss": 0.4894, "step": 1271 }, { "epoch": 0.07631847363052739, "grad_norm": 1.162027359008789, "learning_rate": 6.960787116336413e-06, "loss": 0.5053, "step": 1272 }, { "epoch": 0.07637847243055139, "grad_norm": 1.4171252250671387, "learning_rate": 6.960685521811249e-06, "loss": 0.5114, "step": 1273 }, { "epoch": 0.07643847123057539, "grad_norm": 1.2675955295562744, "learning_rate": 6.96058379659176e-06, "loss": 0.47, "step": 1274 }, { "epoch": 0.07649847003059938, "grad_norm": 1.3695943355560303, "learning_rate": 6.960481940681788e-06, "loss": 0.5276, "step": 1275 }, { "epoch": 0.0765584688306234, "grad_norm": 1.2731232643127441, "learning_rate": 6.960379954085177e-06, "loss": 0.4644, "step": 1276 }, { "epoch": 0.07661846763064739, "grad_norm": 1.385469675064087, "learning_rate": 6.960277836805782e-06, "loss": 0.5498, "step": 1277 }, { "epoch": 0.07667846643067139, "grad_norm": 1.1665443181991577, "learning_rate": 6.960175588847459e-06, "loss": 0.4783, "step": 1278 }, { "epoch": 0.07673846523069539, "grad_norm": 1.3257983922958374, "learning_rate": 6.960073210214067e-06, "loss": 0.5092, "step": 1279 }, { "epoch": 0.07679846403071938, "grad_norm": 1.3758904933929443, "learning_rate": 6.959970700909475e-06, "loss": 0.5348, "step": 1280 }, { "epoch": 0.07685846283074338, "grad_norm": 1.0870906114578247, "learning_rate": 6.959868060937553e-06, "loss": 0.4801, "step": 1281 }, { "epoch": 0.07691846163076739, "grad_norm": 1.1860045194625854, "learning_rate": 6.9597652903021755e-06, "loss": 0.5117, "step": 1282 }, { "epoch": 0.07697846043079139, "grad_norm": 1.2535357475280762, "learning_rate": 6.959662389007228e-06, "loss": 0.5002, "step": 1283 }, { "epoch": 0.07703845923081538, "grad_norm": 1.1810160875320435, "learning_rate": 6.959559357056594e-06, "loss": 0.5136, "step": 1284 }, { "epoch": 0.07709845803083938, "grad_norm": 1.295444369316101, "learning_rate": 6.959456194454164e-06, "loss": 0.5306, "step": 1285 }, { "epoch": 0.07715845683086338, "grad_norm": 1.3424739837646484, "learning_rate": 6.959352901203835e-06, "loss": 0.5377, "step": 1286 }, { "epoch": 0.07721845563088738, "grad_norm": 1.373428225517273, "learning_rate": 6.9592494773095076e-06, "loss": 0.5167, "step": 1287 }, { "epoch": 0.07727845443091139, "grad_norm": 1.272725224494934, "learning_rate": 6.959145922775087e-06, "loss": 0.5182, "step": 1288 }, { "epoch": 0.07733845323093538, "grad_norm": 1.3164465427398682, "learning_rate": 6.959042237604486e-06, "loss": 0.5051, "step": 1289 }, { "epoch": 0.07739845203095938, "grad_norm": 1.4054090976715088, "learning_rate": 6.95893842180162e-06, "loss": 0.4975, "step": 1290 }, { "epoch": 0.07745845083098338, "grad_norm": 1.4760550260543823, "learning_rate": 6.958834475370407e-06, "loss": 0.5226, "step": 1291 }, { "epoch": 0.07751844963100737, "grad_norm": 1.2509589195251465, "learning_rate": 6.958730398314775e-06, "loss": 0.479, "step": 1292 }, { "epoch": 0.07757844843103139, "grad_norm": 1.373125672340393, "learning_rate": 6.958626190638652e-06, "loss": 0.5167, "step": 1293 }, { "epoch": 0.07763844723105538, "grad_norm": 1.2087458372116089, "learning_rate": 6.958521852345977e-06, "loss": 0.4296, "step": 1294 }, { "epoch": 0.07769844603107938, "grad_norm": 1.3945369720458984, "learning_rate": 6.9584173834406894e-06, "loss": 0.4953, "step": 1295 }, { "epoch": 0.07775844483110338, "grad_norm": 1.1905229091644287, "learning_rate": 6.958312783926732e-06, "loss": 0.4696, "step": 1296 }, { "epoch": 0.07781844363112737, "grad_norm": 1.5383471250534058, "learning_rate": 6.958208053808058e-06, "loss": 0.5669, "step": 1297 }, { "epoch": 0.07787844243115137, "grad_norm": 1.3093794584274292, "learning_rate": 6.95810319308862e-06, "loss": 0.4911, "step": 1298 }, { "epoch": 0.07793844123117538, "grad_norm": 1.309945821762085, "learning_rate": 6.95799820177238e-06, "loss": 0.444, "step": 1299 }, { "epoch": 0.07799844003119938, "grad_norm": 1.303745150566101, "learning_rate": 6.9578930798633035e-06, "loss": 0.4828, "step": 1300 }, { "epoch": 0.07805843883122338, "grad_norm": 1.3280235528945923, "learning_rate": 6.957787827365359e-06, "loss": 0.4571, "step": 1301 }, { "epoch": 0.07811843763124737, "grad_norm": 1.2002931833267212, "learning_rate": 6.957682444282521e-06, "loss": 0.4661, "step": 1302 }, { "epoch": 0.07817843643127137, "grad_norm": 1.5369153022766113, "learning_rate": 6.957576930618771e-06, "loss": 0.5341, "step": 1303 }, { "epoch": 0.07823843523129538, "grad_norm": 1.3424023389816284, "learning_rate": 6.957471286378094e-06, "loss": 0.5382, "step": 1304 }, { "epoch": 0.07829843403131938, "grad_norm": 1.2844496965408325, "learning_rate": 6.957365511564477e-06, "loss": 0.4729, "step": 1305 }, { "epoch": 0.07835843283134337, "grad_norm": 1.2418055534362793, "learning_rate": 6.957259606181917e-06, "loss": 0.525, "step": 1306 }, { "epoch": 0.07841843163136737, "grad_norm": 1.3625637292861938, "learning_rate": 6.957153570234413e-06, "loss": 0.5427, "step": 1307 }, { "epoch": 0.07847843043139137, "grad_norm": 1.3701037168502808, "learning_rate": 6.95704740372597e-06, "loss": 0.4923, "step": 1308 }, { "epoch": 0.07853842923141537, "grad_norm": 1.262550950050354, "learning_rate": 6.956941106660595e-06, "loss": 0.5608, "step": 1309 }, { "epoch": 0.07859842803143938, "grad_norm": 1.3197569847106934, "learning_rate": 6.956834679042306e-06, "loss": 0.5324, "step": 1310 }, { "epoch": 0.07865842683146337, "grad_norm": 1.3056007623672485, "learning_rate": 6.956728120875119e-06, "loss": 0.523, "step": 1311 }, { "epoch": 0.07871842563148737, "grad_norm": 1.1453989744186401, "learning_rate": 6.956621432163061e-06, "loss": 0.526, "step": 1312 }, { "epoch": 0.07877842443151137, "grad_norm": 1.2644730806350708, "learning_rate": 6.956514612910159e-06, "loss": 0.4885, "step": 1313 }, { "epoch": 0.07883842323153537, "grad_norm": 1.8731826543807983, "learning_rate": 6.956407663120449e-06, "loss": 0.5642, "step": 1314 }, { "epoch": 0.07889842203155936, "grad_norm": 1.215999960899353, "learning_rate": 6.956300582797968e-06, "loss": 0.5076, "step": 1315 }, { "epoch": 0.07895842083158337, "grad_norm": 1.4267934560775757, "learning_rate": 6.95619337194676e-06, "loss": 0.4964, "step": 1316 }, { "epoch": 0.07901841963160737, "grad_norm": 1.3831284046173096, "learning_rate": 6.956086030570877e-06, "loss": 0.4336, "step": 1317 }, { "epoch": 0.07907841843163137, "grad_norm": 1.3301266431808472, "learning_rate": 6.955978558674369e-06, "loss": 0.4477, "step": 1318 }, { "epoch": 0.07913841723165536, "grad_norm": 1.2845171689987183, "learning_rate": 6.955870956261296e-06, "loss": 0.5012, "step": 1319 }, { "epoch": 0.07919841603167936, "grad_norm": 1.3378139734268188, "learning_rate": 6.955763223335724e-06, "loss": 0.5237, "step": 1320 }, { "epoch": 0.07925841483170337, "grad_norm": 1.482195496559143, "learning_rate": 6.955655359901717e-06, "loss": 0.5129, "step": 1321 }, { "epoch": 0.07931841363172737, "grad_norm": 1.5031769275665283, "learning_rate": 6.955547365963351e-06, "loss": 0.5626, "step": 1322 }, { "epoch": 0.07937841243175137, "grad_norm": 1.2463765144348145, "learning_rate": 6.9554392415247054e-06, "loss": 0.4826, "step": 1323 }, { "epoch": 0.07943841123177536, "grad_norm": 1.341370701789856, "learning_rate": 6.955330986589863e-06, "loss": 0.5249, "step": 1324 }, { "epoch": 0.07949841003179936, "grad_norm": 1.3723270893096924, "learning_rate": 6.955222601162911e-06, "loss": 0.54, "step": 1325 }, { "epoch": 0.07955840883182336, "grad_norm": 1.2128883600234985, "learning_rate": 6.955114085247943e-06, "loss": 0.4425, "step": 1326 }, { "epoch": 0.07961840763184737, "grad_norm": 1.2687920331954956, "learning_rate": 6.955005438849058e-06, "loss": 0.4992, "step": 1327 }, { "epoch": 0.07967840643187137, "grad_norm": 1.3527755737304688, "learning_rate": 6.954896661970359e-06, "loss": 0.4724, "step": 1328 }, { "epoch": 0.07973840523189536, "grad_norm": 1.4341036081314087, "learning_rate": 6.954787754615953e-06, "loss": 0.5494, "step": 1329 }, { "epoch": 0.07979840403191936, "grad_norm": 1.3299334049224854, "learning_rate": 6.954678716789953e-06, "loss": 0.4692, "step": 1330 }, { "epoch": 0.07985840283194336, "grad_norm": 1.3600481748580933, "learning_rate": 6.9545695484964786e-06, "loss": 0.4795, "step": 1331 }, { "epoch": 0.07991840163196737, "grad_norm": 1.24569833278656, "learning_rate": 6.9544602497396505e-06, "loss": 0.5138, "step": 1332 }, { "epoch": 0.07997840043199136, "grad_norm": 1.2601150274276733, "learning_rate": 6.9543508205235966e-06, "loss": 0.5358, "step": 1333 }, { "epoch": 0.08003839923201536, "grad_norm": 1.4008508920669556, "learning_rate": 6.954241260852451e-06, "loss": 0.5604, "step": 1334 }, { "epoch": 0.08009839803203936, "grad_norm": 1.2876390218734741, "learning_rate": 6.954131570730352e-06, "loss": 0.5389, "step": 1335 }, { "epoch": 0.08015839683206336, "grad_norm": 1.329495906829834, "learning_rate": 6.954021750161438e-06, "loss": 0.4887, "step": 1336 }, { "epoch": 0.08021839563208735, "grad_norm": 1.2504723072052002, "learning_rate": 6.95391179914986e-06, "loss": 0.5079, "step": 1337 }, { "epoch": 0.08027839443211136, "grad_norm": 1.170859456062317, "learning_rate": 6.953801717699769e-06, "loss": 0.4965, "step": 1338 }, { "epoch": 0.08033839323213536, "grad_norm": 1.2669835090637207, "learning_rate": 6.9536915058153234e-06, "loss": 0.5253, "step": 1339 }, { "epoch": 0.08039839203215936, "grad_norm": 1.2507436275482178, "learning_rate": 6.953581163500683e-06, "loss": 0.4349, "step": 1340 }, { "epoch": 0.08045839083218335, "grad_norm": 1.3243494033813477, "learning_rate": 6.953470690760018e-06, "loss": 0.4714, "step": 1341 }, { "epoch": 0.08051838963220735, "grad_norm": 1.2777451276779175, "learning_rate": 6.953360087597499e-06, "loss": 0.4779, "step": 1342 }, { "epoch": 0.08057838843223135, "grad_norm": 1.2871060371398926, "learning_rate": 6.953249354017302e-06, "loss": 0.4904, "step": 1343 }, { "epoch": 0.08063838723225536, "grad_norm": 1.2799060344696045, "learning_rate": 6.953138490023611e-06, "loss": 0.5024, "step": 1344 }, { "epoch": 0.08069838603227936, "grad_norm": 1.2531769275665283, "learning_rate": 6.9530274956206105e-06, "loss": 0.4241, "step": 1345 }, { "epoch": 0.08075838483230335, "grad_norm": 1.400254726409912, "learning_rate": 6.952916370812494e-06, "loss": 0.5382, "step": 1346 }, { "epoch": 0.08081838363232735, "grad_norm": 1.2553058862686157, "learning_rate": 6.952805115603458e-06, "loss": 0.4864, "step": 1347 }, { "epoch": 0.08087838243235135, "grad_norm": 1.168061375617981, "learning_rate": 6.952693729997703e-06, "loss": 0.5174, "step": 1348 }, { "epoch": 0.08093838123237536, "grad_norm": 1.2653599977493286, "learning_rate": 6.952582213999436e-06, "loss": 0.5297, "step": 1349 }, { "epoch": 0.08099838003239936, "grad_norm": 1.3411884307861328, "learning_rate": 6.952470567612869e-06, "loss": 0.5325, "step": 1350 }, { "epoch": 0.08105837883242335, "grad_norm": 1.3429101705551147, "learning_rate": 6.95235879084222e-06, "loss": 0.5272, "step": 1351 }, { "epoch": 0.08111837763244735, "grad_norm": 1.133468747138977, "learning_rate": 6.952246883691707e-06, "loss": 0.4907, "step": 1352 }, { "epoch": 0.08117837643247135, "grad_norm": 1.3009145259857178, "learning_rate": 6.952134846165556e-06, "loss": 0.5443, "step": 1353 }, { "epoch": 0.08123837523249534, "grad_norm": 1.29888916015625, "learning_rate": 6.952022678268002e-06, "loss": 0.5581, "step": 1354 }, { "epoch": 0.08129837403251935, "grad_norm": 1.3539131879806519, "learning_rate": 6.9519103800032774e-06, "loss": 0.4936, "step": 1355 }, { "epoch": 0.08135837283254335, "grad_norm": 1.3672175407409668, "learning_rate": 6.951797951375625e-06, "loss": 0.5042, "step": 1356 }, { "epoch": 0.08141837163256735, "grad_norm": 1.3389567136764526, "learning_rate": 6.9516853923892915e-06, "loss": 0.5215, "step": 1357 }, { "epoch": 0.08147837043259135, "grad_norm": 1.4954349994659424, "learning_rate": 6.951572703048525e-06, "loss": 0.5444, "step": 1358 }, { "epoch": 0.08153836923261534, "grad_norm": 1.2227517366409302, "learning_rate": 6.951459883357584e-06, "loss": 0.5387, "step": 1359 }, { "epoch": 0.08159836803263935, "grad_norm": 1.2477613687515259, "learning_rate": 6.951346933320727e-06, "loss": 0.4767, "step": 1360 }, { "epoch": 0.08165836683266335, "grad_norm": 1.2235974073410034, "learning_rate": 6.951233852942221e-06, "loss": 0.492, "step": 1361 }, { "epoch": 0.08171836563268735, "grad_norm": 1.2958323955535889, "learning_rate": 6.951120642226336e-06, "loss": 0.4919, "step": 1362 }, { "epoch": 0.08177836443271135, "grad_norm": 1.416183352470398, "learning_rate": 6.9510073011773485e-06, "loss": 0.5436, "step": 1363 }, { "epoch": 0.08183836323273534, "grad_norm": 1.4119465351104736, "learning_rate": 6.9508938297995375e-06, "loss": 0.5493, "step": 1364 }, { "epoch": 0.08189836203275934, "grad_norm": 1.196991205215454, "learning_rate": 6.950780228097188e-06, "loss": 0.4432, "step": 1365 }, { "epoch": 0.08195836083278335, "grad_norm": 1.368012547492981, "learning_rate": 6.950666496074592e-06, "loss": 0.5381, "step": 1366 }, { "epoch": 0.08201835963280735, "grad_norm": 1.2197352647781372, "learning_rate": 6.950552633736042e-06, "loss": 0.5074, "step": 1367 }, { "epoch": 0.08207835843283134, "grad_norm": 1.4203037023544312, "learning_rate": 6.950438641085842e-06, "loss": 0.5141, "step": 1368 }, { "epoch": 0.08213835723285534, "grad_norm": 1.3740665912628174, "learning_rate": 6.950324518128293e-06, "loss": 0.5613, "step": 1369 }, { "epoch": 0.08219835603287934, "grad_norm": 1.2336182594299316, "learning_rate": 6.950210264867707e-06, "loss": 0.5474, "step": 1370 }, { "epoch": 0.08225835483290334, "grad_norm": 1.277390956878662, "learning_rate": 6.950095881308399e-06, "loss": 0.482, "step": 1371 }, { "epoch": 0.08231835363292735, "grad_norm": 1.4979277849197388, "learning_rate": 6.949981367454688e-06, "loss": 0.4936, "step": 1372 }, { "epoch": 0.08237835243295134, "grad_norm": 1.3561660051345825, "learning_rate": 6.949866723310898e-06, "loss": 0.5275, "step": 1373 }, { "epoch": 0.08243835123297534, "grad_norm": 1.414745807647705, "learning_rate": 6.94975194888136e-06, "loss": 0.4977, "step": 1374 }, { "epoch": 0.08249835003299934, "grad_norm": 1.3278751373291016, "learning_rate": 6.949637044170408e-06, "loss": 0.5249, "step": 1375 }, { "epoch": 0.08255834883302333, "grad_norm": 1.3266228437423706, "learning_rate": 6.949522009182382e-06, "loss": 0.5287, "step": 1376 }, { "epoch": 0.08261834763304735, "grad_norm": 1.2265814542770386, "learning_rate": 6.949406843921625e-06, "loss": 0.445, "step": 1377 }, { "epoch": 0.08267834643307134, "grad_norm": 1.3347238302230835, "learning_rate": 6.949291548392486e-06, "loss": 0.5097, "step": 1378 }, { "epoch": 0.08273834523309534, "grad_norm": 1.41946542263031, "learning_rate": 6.9491761225993214e-06, "loss": 0.5131, "step": 1379 }, { "epoch": 0.08279834403311934, "grad_norm": 1.2440766096115112, "learning_rate": 6.949060566546489e-06, "loss": 0.4486, "step": 1380 }, { "epoch": 0.08285834283314333, "grad_norm": 1.3836181163787842, "learning_rate": 6.948944880238352e-06, "loss": 0.5382, "step": 1381 }, { "epoch": 0.08291834163316733, "grad_norm": 1.273637294769287, "learning_rate": 6.948829063679282e-06, "loss": 0.5134, "step": 1382 }, { "epoch": 0.08297834043319134, "grad_norm": 1.3733147382736206, "learning_rate": 6.94871311687365e-06, "loss": 0.454, "step": 1383 }, { "epoch": 0.08303833923321534, "grad_norm": 1.2995854616165161, "learning_rate": 6.948597039825836e-06, "loss": 0.5156, "step": 1384 }, { "epoch": 0.08309833803323934, "grad_norm": 1.3246235847473145, "learning_rate": 6.948480832540223e-06, "loss": 0.5093, "step": 1385 }, { "epoch": 0.08315833683326333, "grad_norm": 1.1660252809524536, "learning_rate": 6.9483644950212e-06, "loss": 0.503, "step": 1386 }, { "epoch": 0.08321833563328733, "grad_norm": 1.1784629821777344, "learning_rate": 6.948248027273161e-06, "loss": 0.5141, "step": 1387 }, { "epoch": 0.08327833443331134, "grad_norm": 1.1842310428619385, "learning_rate": 6.948131429300506e-06, "loss": 0.4756, "step": 1388 }, { "epoch": 0.08333833323333534, "grad_norm": 1.2767298221588135, "learning_rate": 6.948014701107634e-06, "loss": 0.5038, "step": 1389 }, { "epoch": 0.08339833203335933, "grad_norm": 1.2577931880950928, "learning_rate": 6.947897842698958e-06, "loss": 0.54, "step": 1390 }, { "epoch": 0.08345833083338333, "grad_norm": 1.213409662246704, "learning_rate": 6.947780854078888e-06, "loss": 0.4807, "step": 1391 }, { "epoch": 0.08351832963340733, "grad_norm": 1.3701165914535522, "learning_rate": 6.947663735251843e-06, "loss": 0.547, "step": 1392 }, { "epoch": 0.08357832843343133, "grad_norm": 1.343451976776123, "learning_rate": 6.947546486222246e-06, "loss": 0.5384, "step": 1393 }, { "epoch": 0.08363832723345534, "grad_norm": 1.346948266029358, "learning_rate": 6.947429106994526e-06, "loss": 0.4737, "step": 1394 }, { "epoch": 0.08369832603347933, "grad_norm": 1.340469479560852, "learning_rate": 6.947311597573114e-06, "loss": 0.5079, "step": 1395 }, { "epoch": 0.08375832483350333, "grad_norm": 1.285022258758545, "learning_rate": 6.94719395796245e-06, "loss": 0.4929, "step": 1396 }, { "epoch": 0.08381832363352733, "grad_norm": 1.2701326608657837, "learning_rate": 6.947076188166975e-06, "loss": 0.4081, "step": 1397 }, { "epoch": 0.08387832243355132, "grad_norm": 1.195178747177124, "learning_rate": 6.946958288191138e-06, "loss": 0.4627, "step": 1398 }, { "epoch": 0.08393832123357532, "grad_norm": 1.4634722471237183, "learning_rate": 6.94684025803939e-06, "loss": 0.5098, "step": 1399 }, { "epoch": 0.08399832003359933, "grad_norm": 1.2288768291473389, "learning_rate": 6.946722097716189e-06, "loss": 0.5428, "step": 1400 }, { "epoch": 0.08405831883362333, "grad_norm": 1.3147097826004028, "learning_rate": 6.946603807225998e-06, "loss": 0.5526, "step": 1401 }, { "epoch": 0.08411831763364733, "grad_norm": 1.2661157846450806, "learning_rate": 6.946485386573284e-06, "loss": 0.4697, "step": 1402 }, { "epoch": 0.08417831643367132, "grad_norm": 1.3315821886062622, "learning_rate": 6.9463668357625196e-06, "loss": 0.5324, "step": 1403 }, { "epoch": 0.08423831523369532, "grad_norm": 1.2424330711364746, "learning_rate": 6.94624815479818e-06, "loss": 0.5222, "step": 1404 }, { "epoch": 0.08429831403371933, "grad_norm": 1.165648341178894, "learning_rate": 6.94612934368475e-06, "loss": 0.4315, "step": 1405 }, { "epoch": 0.08435831283374333, "grad_norm": 1.269059658050537, "learning_rate": 6.946010402426714e-06, "loss": 0.5325, "step": 1406 }, { "epoch": 0.08441831163376733, "grad_norm": 1.4287102222442627, "learning_rate": 6.945891331028565e-06, "loss": 0.514, "step": 1407 }, { "epoch": 0.08447831043379132, "grad_norm": 1.2818907499313354, "learning_rate": 6.9457721294948016e-06, "loss": 0.4884, "step": 1408 }, { "epoch": 0.08453830923381532, "grad_norm": 1.278200626373291, "learning_rate": 6.945652797829922e-06, "loss": 0.4755, "step": 1409 }, { "epoch": 0.08459830803383932, "grad_norm": 1.2840263843536377, "learning_rate": 6.945533336038436e-06, "loss": 0.4767, "step": 1410 }, { "epoch": 0.08465830683386333, "grad_norm": 1.2761929035186768, "learning_rate": 6.945413744124854e-06, "loss": 0.4939, "step": 1411 }, { "epoch": 0.08471830563388733, "grad_norm": 1.2668373584747314, "learning_rate": 6.94529402209369e-06, "loss": 0.4777, "step": 1412 }, { "epoch": 0.08477830443391132, "grad_norm": 1.3598229885101318, "learning_rate": 6.9451741699494695e-06, "loss": 0.507, "step": 1413 }, { "epoch": 0.08483830323393532, "grad_norm": 1.410400152206421, "learning_rate": 6.945054187696716e-06, "loss": 0.5212, "step": 1414 }, { "epoch": 0.08489830203395932, "grad_norm": 1.3727871179580688, "learning_rate": 6.944934075339961e-06, "loss": 0.544, "step": 1415 }, { "epoch": 0.08495830083398331, "grad_norm": 1.3742092847824097, "learning_rate": 6.944813832883742e-06, "loss": 0.3996, "step": 1416 }, { "epoch": 0.08501829963400732, "grad_norm": 1.3014177083969116, "learning_rate": 6.944693460332598e-06, "loss": 0.5232, "step": 1417 }, { "epoch": 0.08507829843403132, "grad_norm": 1.3484266996383667, "learning_rate": 6.944572957691077e-06, "loss": 0.5482, "step": 1418 }, { "epoch": 0.08513829723405532, "grad_norm": 1.298783302307129, "learning_rate": 6.944452324963728e-06, "loss": 0.5209, "step": 1419 }, { "epoch": 0.08519829603407932, "grad_norm": 1.364944577217102, "learning_rate": 6.9443315621551075e-06, "loss": 0.552, "step": 1420 }, { "epoch": 0.08525829483410331, "grad_norm": 1.3230094909667969, "learning_rate": 6.944210669269777e-06, "loss": 0.4761, "step": 1421 }, { "epoch": 0.08531829363412732, "grad_norm": 1.1478058099746704, "learning_rate": 6.944089646312301e-06, "loss": 0.5228, "step": 1422 }, { "epoch": 0.08537829243415132, "grad_norm": 1.2099430561065674, "learning_rate": 6.943968493287251e-06, "loss": 0.488, "step": 1423 }, { "epoch": 0.08543829123417532, "grad_norm": 1.2933924198150635, "learning_rate": 6.9438472101992e-06, "loss": 0.5651, "step": 1424 }, { "epoch": 0.08549829003419931, "grad_norm": 1.2157938480377197, "learning_rate": 6.943725797052732e-06, "loss": 0.484, "step": 1425 }, { "epoch": 0.08555828883422331, "grad_norm": 1.1639938354492188, "learning_rate": 6.9436042538524285e-06, "loss": 0.422, "step": 1426 }, { "epoch": 0.08561828763424731, "grad_norm": 1.2130526304244995, "learning_rate": 6.943482580602883e-06, "loss": 0.4607, "step": 1427 }, { "epoch": 0.08567828643427132, "grad_norm": 1.175947904586792, "learning_rate": 6.943360777308688e-06, "loss": 0.4878, "step": 1428 }, { "epoch": 0.08573828523429532, "grad_norm": 1.1292502880096436, "learning_rate": 6.943238843974444e-06, "loss": 0.4706, "step": 1429 }, { "epoch": 0.08579828403431931, "grad_norm": 1.4298229217529297, "learning_rate": 6.943116780604756e-06, "loss": 0.5242, "step": 1430 }, { "epoch": 0.08585828283434331, "grad_norm": 1.3149843215942383, "learning_rate": 6.942994587204234e-06, "loss": 0.5007, "step": 1431 }, { "epoch": 0.08591828163436731, "grad_norm": 1.4755786657333374, "learning_rate": 6.942872263777494e-06, "loss": 0.503, "step": 1432 }, { "epoch": 0.08597828043439132, "grad_norm": 1.3876466751098633, "learning_rate": 6.942749810329153e-06, "loss": 0.4245, "step": 1433 }, { "epoch": 0.08603827923441532, "grad_norm": 1.405505657196045, "learning_rate": 6.942627226863838e-06, "loss": 0.4696, "step": 1434 }, { "epoch": 0.08609827803443931, "grad_norm": 1.263688325881958, "learning_rate": 6.9425045133861765e-06, "loss": 0.4912, "step": 1435 }, { "epoch": 0.08615827683446331, "grad_norm": 1.2780413627624512, "learning_rate": 6.9423816699008036e-06, "loss": 0.4756, "step": 1436 }, { "epoch": 0.0862182756344873, "grad_norm": 1.2268266677856445, "learning_rate": 6.942258696412358e-06, "loss": 0.5001, "step": 1437 }, { "epoch": 0.0862782744345113, "grad_norm": 1.3212356567382812, "learning_rate": 6.942135592925485e-06, "loss": 0.4869, "step": 1438 }, { "epoch": 0.08633827323453531, "grad_norm": 1.3328773975372314, "learning_rate": 6.942012359444832e-06, "loss": 0.5219, "step": 1439 }, { "epoch": 0.08639827203455931, "grad_norm": 1.287213683128357, "learning_rate": 6.941888995975056e-06, "loss": 0.501, "step": 1440 }, { "epoch": 0.08645827083458331, "grad_norm": 1.2681726217269897, "learning_rate": 6.9417655025208115e-06, "loss": 0.5057, "step": 1441 }, { "epoch": 0.0865182696346073, "grad_norm": 1.320334792137146, "learning_rate": 6.941641879086766e-06, "loss": 0.4864, "step": 1442 }, { "epoch": 0.0865782684346313, "grad_norm": 1.4414477348327637, "learning_rate": 6.9415181256775865e-06, "loss": 0.5005, "step": 1443 }, { "epoch": 0.0866382672346553, "grad_norm": 1.364332914352417, "learning_rate": 6.941394242297947e-06, "loss": 0.4975, "step": 1444 }, { "epoch": 0.08669826603467931, "grad_norm": 1.219680905342102, "learning_rate": 6.941270228952526e-06, "loss": 0.5249, "step": 1445 }, { "epoch": 0.08675826483470331, "grad_norm": 1.2685644626617432, "learning_rate": 6.941146085646006e-06, "loss": 0.5413, "step": 1446 }, { "epoch": 0.0868182636347273, "grad_norm": 1.3009710311889648, "learning_rate": 6.9410218123830775e-06, "loss": 0.5248, "step": 1447 }, { "epoch": 0.0868782624347513, "grad_norm": 1.2026100158691406, "learning_rate": 6.940897409168432e-06, "loss": 0.4517, "step": 1448 }, { "epoch": 0.0869382612347753, "grad_norm": 1.3654725551605225, "learning_rate": 6.940772876006767e-06, "loss": 0.5487, "step": 1449 }, { "epoch": 0.08699826003479931, "grad_norm": 1.3261698484420776, "learning_rate": 6.940648212902787e-06, "loss": 0.4663, "step": 1450 }, { "epoch": 0.08705825883482331, "grad_norm": 1.4333161115646362, "learning_rate": 6.940523419861201e-06, "loss": 0.4569, "step": 1451 }, { "epoch": 0.0871182576348473, "grad_norm": 1.3446321487426758, "learning_rate": 6.940398496886719e-06, "loss": 0.4794, "step": 1452 }, { "epoch": 0.0871782564348713, "grad_norm": 1.2013492584228516, "learning_rate": 6.940273443984061e-06, "loss": 0.4853, "step": 1453 }, { "epoch": 0.0872382552348953, "grad_norm": 1.2026622295379639, "learning_rate": 6.940148261157947e-06, "loss": 0.499, "step": 1454 }, { "epoch": 0.0872982540349193, "grad_norm": 1.44822359085083, "learning_rate": 6.940022948413108e-06, "loss": 0.5764, "step": 1455 }, { "epoch": 0.0873582528349433, "grad_norm": 1.4900749921798706, "learning_rate": 6.939897505754275e-06, "loss": 0.5028, "step": 1456 }, { "epoch": 0.0874182516349673, "grad_norm": 1.4003037214279175, "learning_rate": 6.939771933186185e-06, "loss": 0.4888, "step": 1457 }, { "epoch": 0.0874782504349913, "grad_norm": 1.266086459159851, "learning_rate": 6.939646230713581e-06, "loss": 0.4924, "step": 1458 }, { "epoch": 0.0875382492350153, "grad_norm": 1.2813769578933716, "learning_rate": 6.93952039834121e-06, "loss": 0.5179, "step": 1459 }, { "epoch": 0.0875982480350393, "grad_norm": 1.3047797679901123, "learning_rate": 6.939394436073824e-06, "loss": 0.5423, "step": 1460 }, { "epoch": 0.0876582468350633, "grad_norm": 1.3021568059921265, "learning_rate": 6.939268343916179e-06, "loss": 0.4862, "step": 1461 }, { "epoch": 0.0877182456350873, "grad_norm": 1.2476991415023804, "learning_rate": 6.939142121873038e-06, "loss": 0.4543, "step": 1462 }, { "epoch": 0.0877782444351113, "grad_norm": 1.2371827363967896, "learning_rate": 6.939015769949169e-06, "loss": 0.4773, "step": 1463 }, { "epoch": 0.0878382432351353, "grad_norm": 1.3625088930130005, "learning_rate": 6.938889288149341e-06, "loss": 0.4595, "step": 1464 }, { "epoch": 0.0878982420351593, "grad_norm": 1.3695305585861206, "learning_rate": 6.938762676478333e-06, "loss": 0.5572, "step": 1465 }, { "epoch": 0.08795824083518329, "grad_norm": 1.2870416641235352, "learning_rate": 6.938635934940926e-06, "loss": 0.513, "step": 1466 }, { "epoch": 0.0880182396352073, "grad_norm": 1.280110239982605, "learning_rate": 6.9385090635419055e-06, "loss": 0.4777, "step": 1467 }, { "epoch": 0.0880782384352313, "grad_norm": 1.3959962129592896, "learning_rate": 6.938382062286063e-06, "loss": 0.4797, "step": 1468 }, { "epoch": 0.0881382372352553, "grad_norm": 1.1665676832199097, "learning_rate": 6.9382549311781965e-06, "loss": 0.5042, "step": 1469 }, { "epoch": 0.08819823603527929, "grad_norm": 1.2782464027404785, "learning_rate": 6.9381276702231046e-06, "loss": 0.4704, "step": 1470 }, { "epoch": 0.08825823483530329, "grad_norm": 1.1752378940582275, "learning_rate": 6.938000279425595e-06, "loss": 0.454, "step": 1471 }, { "epoch": 0.08831823363532729, "grad_norm": 1.5012304782867432, "learning_rate": 6.937872758790479e-06, "loss": 0.5734, "step": 1472 }, { "epoch": 0.0883782324353513, "grad_norm": 1.2427361011505127, "learning_rate": 6.9377451083225716e-06, "loss": 0.4929, "step": 1473 }, { "epoch": 0.0884382312353753, "grad_norm": 1.2697670459747314, "learning_rate": 6.937617328026695e-06, "loss": 0.4969, "step": 1474 }, { "epoch": 0.08849823003539929, "grad_norm": 1.3649290800094604, "learning_rate": 6.937489417907671e-06, "loss": 0.4707, "step": 1475 }, { "epoch": 0.08855822883542329, "grad_norm": 1.3203390836715698, "learning_rate": 6.9373613779703345e-06, "loss": 0.5374, "step": 1476 }, { "epoch": 0.08861822763544729, "grad_norm": 1.296085000038147, "learning_rate": 6.937233208219519e-06, "loss": 0.4736, "step": 1477 }, { "epoch": 0.0886782264354713, "grad_norm": 1.34046471118927, "learning_rate": 6.937104908660065e-06, "loss": 0.5189, "step": 1478 }, { "epoch": 0.0887382252354953, "grad_norm": 1.1855826377868652, "learning_rate": 6.936976479296819e-06, "loss": 0.5075, "step": 1479 }, { "epoch": 0.08879822403551929, "grad_norm": 1.308813452720642, "learning_rate": 6.936847920134629e-06, "loss": 0.5008, "step": 1480 }, { "epoch": 0.08885822283554329, "grad_norm": 1.4518874883651733, "learning_rate": 6.936719231178351e-06, "loss": 0.5482, "step": 1481 }, { "epoch": 0.08891822163556728, "grad_norm": 1.277183175086975, "learning_rate": 6.9365904124328476e-06, "loss": 0.4971, "step": 1482 }, { "epoch": 0.08897822043559128, "grad_norm": 1.2489736080169678, "learning_rate": 6.9364614639029795e-06, "loss": 0.4862, "step": 1483 }, { "epoch": 0.08903821923561529, "grad_norm": 1.19568932056427, "learning_rate": 6.9363323855936185e-06, "loss": 0.4837, "step": 1484 }, { "epoch": 0.08909821803563929, "grad_norm": 1.1772176027297974, "learning_rate": 6.936203177509639e-06, "loss": 0.4675, "step": 1485 }, { "epoch": 0.08915821683566329, "grad_norm": 1.3431663513183594, "learning_rate": 6.936073839655922e-06, "loss": 0.4843, "step": 1486 }, { "epoch": 0.08921821563568728, "grad_norm": 1.3140815496444702, "learning_rate": 6.9359443720373495e-06, "loss": 0.5335, "step": 1487 }, { "epoch": 0.08927821443571128, "grad_norm": 1.4191663265228271, "learning_rate": 6.935814774658813e-06, "loss": 0.5118, "step": 1488 }, { "epoch": 0.08933821323573529, "grad_norm": 1.349871277809143, "learning_rate": 6.935685047525205e-06, "loss": 0.4906, "step": 1489 }, { "epoch": 0.08939821203575929, "grad_norm": 1.2732813358306885, "learning_rate": 6.935555190641426e-06, "loss": 0.5283, "step": 1490 }, { "epoch": 0.08945821083578329, "grad_norm": 1.1672120094299316, "learning_rate": 6.93542520401238e-06, "loss": 0.4268, "step": 1491 }, { "epoch": 0.08951820963580728, "grad_norm": 1.2014317512512207, "learning_rate": 6.935295087642978e-06, "loss": 0.482, "step": 1492 }, { "epoch": 0.08957820843583128, "grad_norm": 1.196479082107544, "learning_rate": 6.9351648415381284e-06, "loss": 0.4542, "step": 1493 }, { "epoch": 0.08963820723585528, "grad_norm": 1.2882225513458252, "learning_rate": 6.935034465702755e-06, "loss": 0.4746, "step": 1494 }, { "epoch": 0.08969820603587929, "grad_norm": 1.32566237449646, "learning_rate": 6.934903960141779e-06, "loss": 0.5122, "step": 1495 }, { "epoch": 0.08975820483590329, "grad_norm": 1.3511483669281006, "learning_rate": 6.93477332486013e-06, "loss": 0.5028, "step": 1496 }, { "epoch": 0.08981820363592728, "grad_norm": 1.3088648319244385, "learning_rate": 6.9346425598627416e-06, "loss": 0.4819, "step": 1497 }, { "epoch": 0.08987820243595128, "grad_norm": 1.1611988544464111, "learning_rate": 6.934511665154553e-06, "loss": 0.5034, "step": 1498 }, { "epoch": 0.08993820123597528, "grad_norm": 1.3141915798187256, "learning_rate": 6.934380640740504e-06, "loss": 0.5209, "step": 1499 }, { "epoch": 0.08999820003599927, "grad_norm": 1.3664252758026123, "learning_rate": 6.934249486625546e-06, "loss": 0.5263, "step": 1500 }, { "epoch": 0.09005819883602328, "grad_norm": 1.2774754762649536, "learning_rate": 6.934118202814633e-06, "loss": 0.4927, "step": 1501 }, { "epoch": 0.09011819763604728, "grad_norm": 1.4506829977035522, "learning_rate": 6.93398678931272e-06, "loss": 0.4637, "step": 1502 }, { "epoch": 0.09017819643607128, "grad_norm": 1.2645806074142456, "learning_rate": 6.933855246124771e-06, "loss": 0.4805, "step": 1503 }, { "epoch": 0.09023819523609528, "grad_norm": 1.1780486106872559, "learning_rate": 6.933723573255754e-06, "loss": 0.466, "step": 1504 }, { "epoch": 0.09029819403611927, "grad_norm": 1.2256780862808228, "learning_rate": 6.9335917707106424e-06, "loss": 0.5052, "step": 1505 }, { "epoch": 0.09035819283614328, "grad_norm": 1.3002694845199585, "learning_rate": 6.933459838494411e-06, "loss": 0.4662, "step": 1506 }, { "epoch": 0.09041819163616728, "grad_norm": 1.3686420917510986, "learning_rate": 6.933327776612046e-06, "loss": 0.54, "step": 1507 }, { "epoch": 0.09047819043619128, "grad_norm": 1.1912357807159424, "learning_rate": 6.933195585068533e-06, "loss": 0.4056, "step": 1508 }, { "epoch": 0.09053818923621527, "grad_norm": 1.195619821548462, "learning_rate": 6.933063263868864e-06, "loss": 0.5042, "step": 1509 }, { "epoch": 0.09059818803623927, "grad_norm": 1.5585699081420898, "learning_rate": 6.932930813018037e-06, "loss": 0.5207, "step": 1510 }, { "epoch": 0.09065818683626327, "grad_norm": 1.2713667154312134, "learning_rate": 6.9327982325210535e-06, "loss": 0.5131, "step": 1511 }, { "epoch": 0.09071818563628728, "grad_norm": 1.3834534883499146, "learning_rate": 6.93266552238292e-06, "loss": 0.5644, "step": 1512 }, { "epoch": 0.09077818443631128, "grad_norm": 1.2832276821136475, "learning_rate": 6.932532682608649e-06, "loss": 0.5175, "step": 1513 }, { "epoch": 0.09083818323633527, "grad_norm": 1.259924054145813, "learning_rate": 6.9323997132032574e-06, "loss": 0.4577, "step": 1514 }, { "epoch": 0.09089818203635927, "grad_norm": 1.2699943780899048, "learning_rate": 6.932266614171767e-06, "loss": 0.5003, "step": 1515 }, { "epoch": 0.09095818083638327, "grad_norm": 1.236193299293518, "learning_rate": 6.932133385519204e-06, "loss": 0.4423, "step": 1516 }, { "epoch": 0.09101817963640728, "grad_norm": 1.2883992195129395, "learning_rate": 6.932000027250599e-06, "loss": 0.448, "step": 1517 }, { "epoch": 0.09107817843643128, "grad_norm": 1.1724923849105835, "learning_rate": 6.931866539370989e-06, "loss": 0.4626, "step": 1518 }, { "epoch": 0.09113817723645527, "grad_norm": 1.2460612058639526, "learning_rate": 6.931732921885416e-06, "loss": 0.4934, "step": 1519 }, { "epoch": 0.09119817603647927, "grad_norm": 1.2102051973342896, "learning_rate": 6.931599174798925e-06, "loss": 0.4705, "step": 1520 }, { "epoch": 0.09125817483650327, "grad_norm": 1.375780463218689, "learning_rate": 6.9314652981165676e-06, "loss": 0.4815, "step": 1521 }, { "epoch": 0.09131817363652726, "grad_norm": 1.1969190835952759, "learning_rate": 6.9313312918434e-06, "loss": 0.5414, "step": 1522 }, { "epoch": 0.09137817243655127, "grad_norm": 1.1803804636001587, "learning_rate": 6.9311971559844824e-06, "loss": 0.416, "step": 1523 }, { "epoch": 0.09143817123657527, "grad_norm": 1.191014051437378, "learning_rate": 6.9310628905448805e-06, "loss": 0.4651, "step": 1524 }, { "epoch": 0.09149817003659927, "grad_norm": 1.344130039215088, "learning_rate": 6.9309284955296655e-06, "loss": 0.5086, "step": 1525 }, { "epoch": 0.09155816883662327, "grad_norm": 1.2629897594451904, "learning_rate": 6.930793970943913e-06, "loss": 0.4672, "step": 1526 }, { "epoch": 0.09161816763664726, "grad_norm": 1.266947627067566, "learning_rate": 6.9306593167927015e-06, "loss": 0.4679, "step": 1527 }, { "epoch": 0.09167816643667126, "grad_norm": 1.2265112400054932, "learning_rate": 6.9305245330811186e-06, "loss": 0.4741, "step": 1528 }, { "epoch": 0.09173816523669527, "grad_norm": 1.3680968284606934, "learning_rate": 6.930389619814252e-06, "loss": 0.5036, "step": 1529 }, { "epoch": 0.09179816403671927, "grad_norm": 1.3540754318237305, "learning_rate": 6.930254576997201e-06, "loss": 0.4873, "step": 1530 }, { "epoch": 0.09185816283674327, "grad_norm": 1.4247390031814575, "learning_rate": 6.930119404635061e-06, "loss": 0.4927, "step": 1531 }, { "epoch": 0.09191816163676726, "grad_norm": 1.1954013109207153, "learning_rate": 6.929984102732939e-06, "loss": 0.4575, "step": 1532 }, { "epoch": 0.09197816043679126, "grad_norm": 1.5000903606414795, "learning_rate": 6.929848671295945e-06, "loss": 0.5363, "step": 1533 }, { "epoch": 0.09203815923681527, "grad_norm": 1.3423351049423218, "learning_rate": 6.929713110329192e-06, "loss": 0.5282, "step": 1534 }, { "epoch": 0.09209815803683927, "grad_norm": 1.3479409217834473, "learning_rate": 6.9295774198378e-06, "loss": 0.5537, "step": 1535 }, { "epoch": 0.09215815683686326, "grad_norm": 1.2769522666931152, "learning_rate": 6.929441599826896e-06, "loss": 0.5083, "step": 1536 }, { "epoch": 0.09221815563688726, "grad_norm": 1.2334827184677124, "learning_rate": 6.929305650301606e-06, "loss": 0.5041, "step": 1537 }, { "epoch": 0.09227815443691126, "grad_norm": 1.3807103633880615, "learning_rate": 6.9291695712670664e-06, "loss": 0.4831, "step": 1538 }, { "epoch": 0.09233815323693526, "grad_norm": 1.2612133026123047, "learning_rate": 6.929033362728414e-06, "loss": 0.4797, "step": 1539 }, { "epoch": 0.09239815203695927, "grad_norm": 1.3101292848587036, "learning_rate": 6.928897024690795e-06, "loss": 0.4888, "step": 1540 }, { "epoch": 0.09245815083698326, "grad_norm": 1.3851953744888306, "learning_rate": 6.928760557159357e-06, "loss": 0.4592, "step": 1541 }, { "epoch": 0.09251814963700726, "grad_norm": 1.3433085680007935, "learning_rate": 6.928623960139255e-06, "loss": 0.5029, "step": 1542 }, { "epoch": 0.09257814843703126, "grad_norm": 1.2333210706710815, "learning_rate": 6.928487233635646e-06, "loss": 0.554, "step": 1543 }, { "epoch": 0.09263814723705525, "grad_norm": 1.3889812231063843, "learning_rate": 6.928350377653695e-06, "loss": 0.518, "step": 1544 }, { "epoch": 0.09269814603707925, "grad_norm": 1.3432270288467407, "learning_rate": 6.9282133921985685e-06, "loss": 0.5205, "step": 1545 }, { "epoch": 0.09275814483710326, "grad_norm": 1.3023569583892822, "learning_rate": 6.928076277275442e-06, "loss": 0.4785, "step": 1546 }, { "epoch": 0.09281814363712726, "grad_norm": 1.3125932216644287, "learning_rate": 6.9279390328894925e-06, "loss": 0.4696, "step": 1547 }, { "epoch": 0.09287814243715126, "grad_norm": 1.2919162511825562, "learning_rate": 6.9278016590459034e-06, "loss": 0.4724, "step": 1548 }, { "epoch": 0.09293814123717525, "grad_norm": 1.1328033208847046, "learning_rate": 6.9276641557498625e-06, "loss": 0.4906, "step": 1549 }, { "epoch": 0.09299814003719925, "grad_norm": 1.3839470148086548, "learning_rate": 6.927526523006564e-06, "loss": 0.5658, "step": 1550 }, { "epoch": 0.09305813883722326, "grad_norm": 1.5386011600494385, "learning_rate": 6.927388760821203e-06, "loss": 0.4332, "step": 1551 }, { "epoch": 0.09311813763724726, "grad_norm": 1.3670015335083008, "learning_rate": 6.927250869198984e-06, "loss": 0.5224, "step": 1552 }, { "epoch": 0.09317813643727126, "grad_norm": 1.2907770872116089, "learning_rate": 6.927112848145115e-06, "loss": 0.5445, "step": 1553 }, { "epoch": 0.09323813523729525, "grad_norm": 1.2158203125, "learning_rate": 6.926974697664807e-06, "loss": 0.4471, "step": 1554 }, { "epoch": 0.09329813403731925, "grad_norm": 1.3221461772918701, "learning_rate": 6.926836417763277e-06, "loss": 0.459, "step": 1555 }, { "epoch": 0.09335813283734325, "grad_norm": 1.2304086685180664, "learning_rate": 6.926698008445751e-06, "loss": 0.5312, "step": 1556 }, { "epoch": 0.09341813163736726, "grad_norm": 1.4941346645355225, "learning_rate": 6.926559469717451e-06, "loss": 0.5243, "step": 1557 }, { "epoch": 0.09347813043739125, "grad_norm": 1.2896617650985718, "learning_rate": 6.926420801583611e-06, "loss": 0.5047, "step": 1558 }, { "epoch": 0.09353812923741525, "grad_norm": 1.3040289878845215, "learning_rate": 6.926282004049468e-06, "loss": 0.5319, "step": 1559 }, { "epoch": 0.09359812803743925, "grad_norm": 1.2505700588226318, "learning_rate": 6.926143077120265e-06, "loss": 0.5252, "step": 1560 }, { "epoch": 0.09365812683746325, "grad_norm": 1.17677640914917, "learning_rate": 6.926004020801247e-06, "loss": 0.4749, "step": 1561 }, { "epoch": 0.09371812563748726, "grad_norm": 1.279574990272522, "learning_rate": 6.925864835097665e-06, "loss": 0.5218, "step": 1562 }, { "epoch": 0.09377812443751125, "grad_norm": 1.4192434549331665, "learning_rate": 6.925725520014777e-06, "loss": 0.5458, "step": 1563 }, { "epoch": 0.09383812323753525, "grad_norm": 1.2687506675720215, "learning_rate": 6.925586075557845e-06, "loss": 0.4976, "step": 1564 }, { "epoch": 0.09389812203755925, "grad_norm": 1.3543413877487183, "learning_rate": 6.925446501732133e-06, "loss": 0.5014, "step": 1565 }, { "epoch": 0.09395812083758324, "grad_norm": 1.279927372932434, "learning_rate": 6.925306798542912e-06, "loss": 0.4528, "step": 1566 }, { "epoch": 0.09401811963760724, "grad_norm": 1.3483963012695312, "learning_rate": 6.92516696599546e-06, "loss": 0.5113, "step": 1567 }, { "epoch": 0.09407811843763125, "grad_norm": 1.2063157558441162, "learning_rate": 6.9250270040950565e-06, "loss": 0.4615, "step": 1568 }, { "epoch": 0.09413811723765525, "grad_norm": 1.284279704093933, "learning_rate": 6.924886912846987e-06, "loss": 0.4498, "step": 1569 }, { "epoch": 0.09419811603767925, "grad_norm": 1.2589317560195923, "learning_rate": 6.924746692256542e-06, "loss": 0.4798, "step": 1570 }, { "epoch": 0.09425811483770324, "grad_norm": 1.3513376712799072, "learning_rate": 6.924606342329019e-06, "loss": 0.5139, "step": 1571 }, { "epoch": 0.09431811363772724, "grad_norm": 1.490864872932434, "learning_rate": 6.924465863069716e-06, "loss": 0.5079, "step": 1572 }, { "epoch": 0.09437811243775124, "grad_norm": 1.2009292840957642, "learning_rate": 6.924325254483939e-06, "loss": 0.4774, "step": 1573 }, { "epoch": 0.09443811123777525, "grad_norm": 1.3739392757415771, "learning_rate": 6.9241845165769996e-06, "loss": 0.4953, "step": 1574 }, { "epoch": 0.09449811003779925, "grad_norm": 1.492958426475525, "learning_rate": 6.92404364935421e-06, "loss": 0.495, "step": 1575 }, { "epoch": 0.09455810883782324, "grad_norm": 1.2458382844924927, "learning_rate": 6.923902652820892e-06, "loss": 0.5051, "step": 1576 }, { "epoch": 0.09461810763784724, "grad_norm": 1.333823323249817, "learning_rate": 6.923761526982371e-06, "loss": 0.5186, "step": 1577 }, { "epoch": 0.09467810643787124, "grad_norm": 1.1895246505737305, "learning_rate": 6.923620271843974e-06, "loss": 0.484, "step": 1578 }, { "epoch": 0.09473810523789525, "grad_norm": 1.1273345947265625, "learning_rate": 6.923478887411039e-06, "loss": 0.5021, "step": 1579 }, { "epoch": 0.09479810403791925, "grad_norm": 1.2630513906478882, "learning_rate": 6.923337373688902e-06, "loss": 0.4431, "step": 1580 }, { "epoch": 0.09485810283794324, "grad_norm": 1.1675970554351807, "learning_rate": 6.923195730682911e-06, "loss": 0.4624, "step": 1581 }, { "epoch": 0.09491810163796724, "grad_norm": 1.1871446371078491, "learning_rate": 6.923053958398414e-06, "loss": 0.511, "step": 1582 }, { "epoch": 0.09497810043799124, "grad_norm": 1.2739300727844238, "learning_rate": 6.922912056840762e-06, "loss": 0.4895, "step": 1583 }, { "epoch": 0.09503809923801523, "grad_norm": 1.239865779876709, "learning_rate": 6.922770026015317e-06, "loss": 0.5006, "step": 1584 }, { "epoch": 0.09509809803803924, "grad_norm": 1.3065792322158813, "learning_rate": 6.922627865927443e-06, "loss": 0.4974, "step": 1585 }, { "epoch": 0.09515809683806324, "grad_norm": 1.4080677032470703, "learning_rate": 6.9224855765825075e-06, "loss": 0.5251, "step": 1586 }, { "epoch": 0.09521809563808724, "grad_norm": 1.33579683303833, "learning_rate": 6.922343157985885e-06, "loss": 0.4966, "step": 1587 }, { "epoch": 0.09527809443811124, "grad_norm": 1.3842450380325317, "learning_rate": 6.922200610142952e-06, "loss": 0.5204, "step": 1588 }, { "epoch": 0.09533809323813523, "grad_norm": 1.423022985458374, "learning_rate": 6.922057933059095e-06, "loss": 0.5091, "step": 1589 }, { "epoch": 0.09539809203815924, "grad_norm": 1.2297621965408325, "learning_rate": 6.9219151267397e-06, "loss": 0.5155, "step": 1590 }, { "epoch": 0.09545809083818324, "grad_norm": 1.2475935220718384, "learning_rate": 6.921772191190161e-06, "loss": 0.55, "step": 1591 }, { "epoch": 0.09551808963820724, "grad_norm": 1.3559850454330444, "learning_rate": 6.921629126415877e-06, "loss": 0.4836, "step": 1592 }, { "epoch": 0.09557808843823123, "grad_norm": 1.3365187644958496, "learning_rate": 6.921485932422248e-06, "loss": 0.5042, "step": 1593 }, { "epoch": 0.09563808723825523, "grad_norm": 1.3627134561538696, "learning_rate": 6.921342609214685e-06, "loss": 0.4977, "step": 1594 }, { "epoch": 0.09569808603827923, "grad_norm": 1.2404747009277344, "learning_rate": 6.921199156798599e-06, "loss": 0.5149, "step": 1595 }, { "epoch": 0.09575808483830324, "grad_norm": 1.316316843032837, "learning_rate": 6.921055575179407e-06, "loss": 0.4995, "step": 1596 }, { "epoch": 0.09581808363832724, "grad_norm": 1.342124581336975, "learning_rate": 6.920911864362533e-06, "loss": 0.5556, "step": 1597 }, { "epoch": 0.09587808243835123, "grad_norm": 1.1309682130813599, "learning_rate": 6.920768024353403e-06, "loss": 0.4695, "step": 1598 }, { "epoch": 0.09593808123837523, "grad_norm": 1.2808271646499634, "learning_rate": 6.920624055157451e-06, "loss": 0.4983, "step": 1599 }, { "epoch": 0.09599808003839923, "grad_norm": 1.2547012567520142, "learning_rate": 6.920479956780111e-06, "loss": 0.4712, "step": 1600 }, { "epoch": 0.09605807883842322, "grad_norm": 1.3546288013458252, "learning_rate": 6.920335729226828e-06, "loss": 0.4996, "step": 1601 }, { "epoch": 0.09611807763844724, "grad_norm": 1.2411235570907593, "learning_rate": 6.920191372503047e-06, "loss": 0.4871, "step": 1602 }, { "epoch": 0.09617807643847123, "grad_norm": 1.2373641729354858, "learning_rate": 6.920046886614219e-06, "loss": 0.4559, "step": 1603 }, { "epoch": 0.09623807523849523, "grad_norm": 1.326836109161377, "learning_rate": 6.9199022715658035e-06, "loss": 0.4776, "step": 1604 }, { "epoch": 0.09629807403851923, "grad_norm": 1.2252705097198486, "learning_rate": 6.919757527363259e-06, "loss": 0.4562, "step": 1605 }, { "epoch": 0.09635807283854322, "grad_norm": 1.476279377937317, "learning_rate": 6.919612654012054e-06, "loss": 0.4775, "step": 1606 }, { "epoch": 0.09641807163856723, "grad_norm": 1.3414818048477173, "learning_rate": 6.919467651517659e-06, "loss": 0.4974, "step": 1607 }, { "epoch": 0.09647807043859123, "grad_norm": 1.2893991470336914, "learning_rate": 6.919322519885549e-06, "loss": 0.5268, "step": 1608 }, { "epoch": 0.09653806923861523, "grad_norm": 1.379409909248352, "learning_rate": 6.9191772591212066e-06, "loss": 0.5557, "step": 1609 }, { "epoch": 0.09659806803863923, "grad_norm": 1.3282954692840576, "learning_rate": 6.9190318692301164e-06, "loss": 0.5307, "step": 1610 }, { "epoch": 0.09665806683866322, "grad_norm": 1.2569457292556763, "learning_rate": 6.91888635021777e-06, "loss": 0.5368, "step": 1611 }, { "epoch": 0.09671806563868722, "grad_norm": 1.3713964223861694, "learning_rate": 6.918740702089662e-06, "loss": 0.5104, "step": 1612 }, { "epoch": 0.09677806443871123, "grad_norm": 1.2652087211608887, "learning_rate": 6.9185949248512935e-06, "loss": 0.5164, "step": 1613 }, { "epoch": 0.09683806323873523, "grad_norm": 1.1373099088668823, "learning_rate": 6.918449018508169e-06, "loss": 0.3818, "step": 1614 }, { "epoch": 0.09689806203875923, "grad_norm": 1.3334484100341797, "learning_rate": 6.918302983065801e-06, "loss": 0.4795, "step": 1615 }, { "epoch": 0.09695806083878322, "grad_norm": 1.225228190422058, "learning_rate": 6.918156818529703e-06, "loss": 0.4932, "step": 1616 }, { "epoch": 0.09701805963880722, "grad_norm": 1.2658382654190063, "learning_rate": 6.918010524905394e-06, "loss": 0.5165, "step": 1617 }, { "epoch": 0.09707805843883123, "grad_norm": 1.2580201625823975, "learning_rate": 6.9178641021984e-06, "loss": 0.5138, "step": 1618 }, { "epoch": 0.09713805723885523, "grad_norm": 1.1706876754760742, "learning_rate": 6.917717550414252e-06, "loss": 0.4693, "step": 1619 }, { "epoch": 0.09719805603887922, "grad_norm": 1.2847048044204712, "learning_rate": 6.917570869558482e-06, "loss": 0.5576, "step": 1620 }, { "epoch": 0.09725805483890322, "grad_norm": 1.2437434196472168, "learning_rate": 6.917424059636631e-06, "loss": 0.4055, "step": 1621 }, { "epoch": 0.09731805363892722, "grad_norm": 1.182468295097351, "learning_rate": 6.917277120654243e-06, "loss": 0.4756, "step": 1622 }, { "epoch": 0.09737805243895122, "grad_norm": 1.2860982418060303, "learning_rate": 6.917130052616867e-06, "loss": 0.4932, "step": 1623 }, { "epoch": 0.09743805123897523, "grad_norm": 1.261956810951233, "learning_rate": 6.9169828555300565e-06, "loss": 0.4737, "step": 1624 }, { "epoch": 0.09749805003899922, "grad_norm": 1.3429723978042603, "learning_rate": 6.916835529399373e-06, "loss": 0.5131, "step": 1625 }, { "epoch": 0.09755804883902322, "grad_norm": 1.1637818813323975, "learning_rate": 6.916688074230378e-06, "loss": 0.4234, "step": 1626 }, { "epoch": 0.09761804763904722, "grad_norm": 1.3746085166931152, "learning_rate": 6.916540490028641e-06, "loss": 0.4567, "step": 1627 }, { "epoch": 0.09767804643907121, "grad_norm": 1.2507438659667969, "learning_rate": 6.916392776799734e-06, "loss": 0.4848, "step": 1628 }, { "epoch": 0.09773804523909521, "grad_norm": 1.4010099172592163, "learning_rate": 6.916244934549239e-06, "loss": 0.4917, "step": 1629 }, { "epoch": 0.09779804403911922, "grad_norm": 1.1723037958145142, "learning_rate": 6.916096963282735e-06, "loss": 0.4776, "step": 1630 }, { "epoch": 0.09785804283914322, "grad_norm": 1.457318663597107, "learning_rate": 6.915948863005814e-06, "loss": 0.562, "step": 1631 }, { "epoch": 0.09791804163916722, "grad_norm": 1.2573107481002808, "learning_rate": 6.9158006337240675e-06, "loss": 0.5019, "step": 1632 }, { "epoch": 0.09797804043919121, "grad_norm": 1.2563222646713257, "learning_rate": 6.9156522754430934e-06, "loss": 0.5013, "step": 1633 }, { "epoch": 0.09803803923921521, "grad_norm": 1.291675090789795, "learning_rate": 6.915503788168493e-06, "loss": 0.4983, "step": 1634 }, { "epoch": 0.09809803803923922, "grad_norm": 1.297382116317749, "learning_rate": 6.915355171905876e-06, "loss": 0.5219, "step": 1635 }, { "epoch": 0.09815803683926322, "grad_norm": 1.4838818311691284, "learning_rate": 6.9152064266608565e-06, "loss": 0.506, "step": 1636 }, { "epoch": 0.09821803563928722, "grad_norm": 1.306200623512268, "learning_rate": 6.915057552439048e-06, "loss": 0.5201, "step": 1637 }, { "epoch": 0.09827803443931121, "grad_norm": 1.3913273811340332, "learning_rate": 6.914908549246075e-06, "loss": 0.5193, "step": 1638 }, { "epoch": 0.09833803323933521, "grad_norm": 1.375288963317871, "learning_rate": 6.9147594170875645e-06, "loss": 0.5045, "step": 1639 }, { "epoch": 0.0983980320393592, "grad_norm": 1.2274906635284424, "learning_rate": 6.914610155969148e-06, "loss": 0.4891, "step": 1640 }, { "epoch": 0.09845803083938322, "grad_norm": 1.3646854162216187, "learning_rate": 6.914460765896464e-06, "loss": 0.4502, "step": 1641 }, { "epoch": 0.09851802963940721, "grad_norm": 1.3673419952392578, "learning_rate": 6.914311246875152e-06, "loss": 0.4837, "step": 1642 }, { "epoch": 0.09857802843943121, "grad_norm": 1.285658597946167, "learning_rate": 6.914161598910861e-06, "loss": 0.4883, "step": 1643 }, { "epoch": 0.09863802723945521, "grad_norm": 1.390169620513916, "learning_rate": 6.91401182200924e-06, "loss": 0.5142, "step": 1644 }, { "epoch": 0.0986980260394792, "grad_norm": 1.5216190814971924, "learning_rate": 6.913861916175947e-06, "loss": 0.566, "step": 1645 }, { "epoch": 0.09875802483950322, "grad_norm": 1.2052332162857056, "learning_rate": 6.913711881416643e-06, "loss": 0.4304, "step": 1646 }, { "epoch": 0.09881802363952721, "grad_norm": 1.344665765762329, "learning_rate": 6.913561717736994e-06, "loss": 0.4684, "step": 1647 }, { "epoch": 0.09887802243955121, "grad_norm": 1.2664995193481445, "learning_rate": 6.913411425142672e-06, "loss": 0.4923, "step": 1648 }, { "epoch": 0.09893802123957521, "grad_norm": 1.2520439624786377, "learning_rate": 6.913261003639351e-06, "loss": 0.4741, "step": 1649 }, { "epoch": 0.0989980200395992, "grad_norm": 1.194468379020691, "learning_rate": 6.913110453232713e-06, "loss": 0.5179, "step": 1650 }, { "epoch": 0.0990580188396232, "grad_norm": 1.3085541725158691, "learning_rate": 6.912959773928443e-06, "loss": 0.4808, "step": 1651 }, { "epoch": 0.09911801763964721, "grad_norm": 1.329337477684021, "learning_rate": 6.912808965732232e-06, "loss": 0.5033, "step": 1652 }, { "epoch": 0.09917801643967121, "grad_norm": 1.4189608097076416, "learning_rate": 6.912658028649775e-06, "loss": 0.4803, "step": 1653 }, { "epoch": 0.09923801523969521, "grad_norm": 1.3599406480789185, "learning_rate": 6.912506962686772e-06, "loss": 0.5252, "step": 1654 }, { "epoch": 0.0992980140397192, "grad_norm": 1.3082107305526733, "learning_rate": 6.912355767848929e-06, "loss": 0.4687, "step": 1655 }, { "epoch": 0.0993580128397432, "grad_norm": 1.2519147396087646, "learning_rate": 6.912204444141955e-06, "loss": 0.5434, "step": 1656 }, { "epoch": 0.0994180116397672, "grad_norm": 1.3540328741073608, "learning_rate": 6.912052991571565e-06, "loss": 0.4908, "step": 1657 }, { "epoch": 0.09947801043979121, "grad_norm": 7.520875930786133, "learning_rate": 6.9119014101434786e-06, "loss": 0.5551, "step": 1658 }, { "epoch": 0.0995380092398152, "grad_norm": 1.4448908567428589, "learning_rate": 6.911749699863421e-06, "loss": 0.447, "step": 1659 }, { "epoch": 0.0995980080398392, "grad_norm": 1.4078391790390015, "learning_rate": 6.9115978607371206e-06, "loss": 0.4971, "step": 1660 }, { "epoch": 0.0996580068398632, "grad_norm": 1.5008337497711182, "learning_rate": 6.911445892770312e-06, "loss": 0.4253, "step": 1661 }, { "epoch": 0.0997180056398872, "grad_norm": 1.333880066871643, "learning_rate": 6.911293795968735e-06, "loss": 0.4646, "step": 1662 }, { "epoch": 0.09977800443991121, "grad_norm": 1.4264949560165405, "learning_rate": 6.9111415703381335e-06, "loss": 0.5019, "step": 1663 }, { "epoch": 0.0998380032399352, "grad_norm": 1.1739461421966553, "learning_rate": 6.910989215884257e-06, "loss": 0.4794, "step": 1664 }, { "epoch": 0.0998980020399592, "grad_norm": 1.2650117874145508, "learning_rate": 6.910836732612856e-06, "loss": 0.5022, "step": 1665 }, { "epoch": 0.0999580008399832, "grad_norm": 1.3390183448791504, "learning_rate": 6.910684120529692e-06, "loss": 0.4948, "step": 1666 }, { "epoch": 0.1000179996400072, "grad_norm": 1.2563997507095337, "learning_rate": 6.910531379640529e-06, "loss": 0.5068, "step": 1667 }, { "epoch": 0.1000779984400312, "grad_norm": 1.2466388940811157, "learning_rate": 6.910378509951132e-06, "loss": 0.4474, "step": 1668 }, { "epoch": 0.1001379972400552, "grad_norm": 1.192063808441162, "learning_rate": 6.910225511467277e-06, "loss": 0.4245, "step": 1669 }, { "epoch": 0.1001979960400792, "grad_norm": 1.3614226579666138, "learning_rate": 6.910072384194742e-06, "loss": 0.5195, "step": 1670 }, { "epoch": 0.1002579948401032, "grad_norm": 1.1835999488830566, "learning_rate": 6.90991912813931e-06, "loss": 0.4699, "step": 1671 }, { "epoch": 0.1003179936401272, "grad_norm": 1.3878097534179688, "learning_rate": 6.909765743306767e-06, "loss": 0.5422, "step": 1672 }, { "epoch": 0.10037799244015119, "grad_norm": 1.3583905696868896, "learning_rate": 6.909612229702907e-06, "loss": 0.4851, "step": 1673 }, { "epoch": 0.1004379912401752, "grad_norm": 1.348077416419983, "learning_rate": 6.909458587333527e-06, "loss": 0.5726, "step": 1674 }, { "epoch": 0.1004979900401992, "grad_norm": 1.2978321313858032, "learning_rate": 6.9093048162044295e-06, "loss": 0.4722, "step": 1675 }, { "epoch": 0.1005579888402232, "grad_norm": 1.1906020641326904, "learning_rate": 6.909150916321423e-06, "loss": 0.4439, "step": 1676 }, { "epoch": 0.1006179876402472, "grad_norm": 1.2116206884384155, "learning_rate": 6.908996887690318e-06, "loss": 0.475, "step": 1677 }, { "epoch": 0.10067798644027119, "grad_norm": 1.1086351871490479, "learning_rate": 6.908842730316931e-06, "loss": 0.4931, "step": 1678 }, { "epoch": 0.10073798524029519, "grad_norm": 1.419748067855835, "learning_rate": 6.908688444207085e-06, "loss": 0.4496, "step": 1679 }, { "epoch": 0.1007979840403192, "grad_norm": 1.297559142112732, "learning_rate": 6.9085340293666065e-06, "loss": 0.462, "step": 1680 }, { "epoch": 0.1008579828403432, "grad_norm": 1.4282077550888062, "learning_rate": 6.908379485801328e-06, "loss": 0.5224, "step": 1681 }, { "epoch": 0.1009179816403672, "grad_norm": 1.1867951154708862, "learning_rate": 6.908224813517084e-06, "loss": 0.4672, "step": 1682 }, { "epoch": 0.10097798044039119, "grad_norm": 1.2906627655029297, "learning_rate": 6.908070012519717e-06, "loss": 0.5074, "step": 1683 }, { "epoch": 0.10103797924041519, "grad_norm": 1.2998472452163696, "learning_rate": 6.907915082815072e-06, "loss": 0.4869, "step": 1684 }, { "epoch": 0.10109797804043918, "grad_norm": 1.2205934524536133, "learning_rate": 6.907760024409001e-06, "loss": 0.4711, "step": 1685 }, { "epoch": 0.1011579768404632, "grad_norm": 1.3012760877609253, "learning_rate": 6.907604837307359e-06, "loss": 0.4771, "step": 1686 }, { "epoch": 0.10121797564048719, "grad_norm": 1.2625181674957275, "learning_rate": 6.907449521516009e-06, "loss": 0.5158, "step": 1687 }, { "epoch": 0.10127797444051119, "grad_norm": 1.3223953247070312, "learning_rate": 6.907294077040814e-06, "loss": 0.491, "step": 1688 }, { "epoch": 0.10133797324053519, "grad_norm": 1.3743678331375122, "learning_rate": 6.907138503887645e-06, "loss": 0.5403, "step": 1689 }, { "epoch": 0.10139797204055918, "grad_norm": 1.2091869115829468, "learning_rate": 6.906982802062377e-06, "loss": 0.4723, "step": 1690 }, { "epoch": 0.1014579708405832, "grad_norm": 1.303939938545227, "learning_rate": 6.906826971570892e-06, "loss": 0.4799, "step": 1691 }, { "epoch": 0.10151796964060719, "grad_norm": 1.1968134641647339, "learning_rate": 6.906671012419073e-06, "loss": 0.4653, "step": 1692 }, { "epoch": 0.10157796844063119, "grad_norm": 1.2464834451675415, "learning_rate": 6.906514924612812e-06, "loss": 0.5273, "step": 1693 }, { "epoch": 0.10163796724065519, "grad_norm": 1.3698383569717407, "learning_rate": 6.906358708158001e-06, "loss": 0.457, "step": 1694 }, { "epoch": 0.10169796604067918, "grad_norm": 1.1916301250457764, "learning_rate": 6.906202363060541e-06, "loss": 0.4677, "step": 1695 }, { "epoch": 0.10175796484070318, "grad_norm": 1.333652138710022, "learning_rate": 6.906045889326336e-06, "loss": 0.445, "step": 1696 }, { "epoch": 0.10181796364072719, "grad_norm": 1.36515474319458, "learning_rate": 6.9058892869612965e-06, "loss": 0.542, "step": 1697 }, { "epoch": 0.10187796244075119, "grad_norm": 1.281527042388916, "learning_rate": 6.905732555971335e-06, "loss": 0.4993, "step": 1698 }, { "epoch": 0.10193796124077519, "grad_norm": 1.2456449270248413, "learning_rate": 6.905575696362372e-06, "loss": 0.4639, "step": 1699 }, { "epoch": 0.10199796004079918, "grad_norm": 1.2723828554153442, "learning_rate": 6.90541870814033e-06, "loss": 0.4907, "step": 1700 }, { "epoch": 0.10205795884082318, "grad_norm": 1.2741669416427612, "learning_rate": 6.9052615913111405e-06, "loss": 0.5083, "step": 1701 }, { "epoch": 0.10211795764084718, "grad_norm": 1.1875972747802734, "learning_rate": 6.905104345880733e-06, "loss": 0.461, "step": 1702 }, { "epoch": 0.10217795644087119, "grad_norm": 1.2336740493774414, "learning_rate": 6.904946971855048e-06, "loss": 0.506, "step": 1703 }, { "epoch": 0.10223795524089518, "grad_norm": 1.2736164331436157, "learning_rate": 6.90478946924003e-06, "loss": 0.5009, "step": 1704 }, { "epoch": 0.10229795404091918, "grad_norm": 1.267605185508728, "learning_rate": 6.904631838041625e-06, "loss": 0.4941, "step": 1705 }, { "epoch": 0.10235795284094318, "grad_norm": 1.2378753423690796, "learning_rate": 6.904474078265787e-06, "loss": 0.5141, "step": 1706 }, { "epoch": 0.10241795164096718, "grad_norm": 1.2701505422592163, "learning_rate": 6.904316189918474e-06, "loss": 0.484, "step": 1707 }, { "epoch": 0.10247795044099119, "grad_norm": 1.238232135772705, "learning_rate": 6.9041581730056485e-06, "loss": 0.4784, "step": 1708 }, { "epoch": 0.10253794924101518, "grad_norm": 1.3611171245574951, "learning_rate": 6.904000027533278e-06, "loss": 0.5147, "step": 1709 }, { "epoch": 0.10259794804103918, "grad_norm": 1.3866478204727173, "learning_rate": 6.903841753507335e-06, "loss": 0.4602, "step": 1710 }, { "epoch": 0.10265794684106318, "grad_norm": 1.3615275621414185, "learning_rate": 6.9036833509337975e-06, "loss": 0.5268, "step": 1711 }, { "epoch": 0.10271794564108717, "grad_norm": 1.511892318725586, "learning_rate": 6.903524819818646e-06, "loss": 0.5692, "step": 1712 }, { "epoch": 0.10277794444111117, "grad_norm": 1.3264336585998535, "learning_rate": 6.903366160167869e-06, "loss": 0.5131, "step": 1713 }, { "epoch": 0.10283794324113518, "grad_norm": 1.189056396484375, "learning_rate": 6.903207371987458e-06, "loss": 0.4388, "step": 1714 }, { "epoch": 0.10289794204115918, "grad_norm": 1.27509605884552, "learning_rate": 6.903048455283409e-06, "loss": 0.4612, "step": 1715 }, { "epoch": 0.10295794084118318, "grad_norm": 1.2149338722229004, "learning_rate": 6.902889410061725e-06, "loss": 0.5286, "step": 1716 }, { "epoch": 0.10301793964120717, "grad_norm": 1.258662462234497, "learning_rate": 6.902730236328411e-06, "loss": 0.4724, "step": 1717 }, { "epoch": 0.10307793844123117, "grad_norm": 1.2635632753372192, "learning_rate": 6.902570934089479e-06, "loss": 0.4863, "step": 1718 }, { "epoch": 0.10313793724125518, "grad_norm": 1.280929684638977, "learning_rate": 6.9024115033509445e-06, "loss": 0.5244, "step": 1719 }, { "epoch": 0.10319793604127918, "grad_norm": 1.2096370458602905, "learning_rate": 6.902251944118829e-06, "loss": 0.4423, "step": 1720 }, { "epoch": 0.10325793484130318, "grad_norm": 1.2306963205337524, "learning_rate": 6.902092256399158e-06, "loss": 0.4604, "step": 1721 }, { "epoch": 0.10331793364132717, "grad_norm": 1.0869557857513428, "learning_rate": 6.9019324401979625e-06, "loss": 0.4577, "step": 1722 }, { "epoch": 0.10337793244135117, "grad_norm": 1.1172205209732056, "learning_rate": 6.901772495521278e-06, "loss": 0.4076, "step": 1723 }, { "epoch": 0.10343793124137517, "grad_norm": 1.2785282135009766, "learning_rate": 6.901612422375144e-06, "loss": 0.5325, "step": 1724 }, { "epoch": 0.10349793004139918, "grad_norm": 1.2831820249557495, "learning_rate": 6.9014522207656075e-06, "loss": 0.4638, "step": 1725 }, { "epoch": 0.10355792884142317, "grad_norm": 1.2074884176254272, "learning_rate": 6.901291890698718e-06, "loss": 0.5168, "step": 1726 }, { "epoch": 0.10361792764144717, "grad_norm": 1.2790154218673706, "learning_rate": 6.9011314321805285e-06, "loss": 0.5124, "step": 1727 }, { "epoch": 0.10367792644147117, "grad_norm": 1.2173938751220703, "learning_rate": 6.900970845217102e-06, "loss": 0.4611, "step": 1728 }, { "epoch": 0.10373792524149517, "grad_norm": 1.2508152723312378, "learning_rate": 6.900810129814501e-06, "loss": 0.4956, "step": 1729 }, { "epoch": 0.10379792404151916, "grad_norm": 1.3548344373703003, "learning_rate": 6.9006492859787944e-06, "loss": 0.4992, "step": 1730 }, { "epoch": 0.10385792284154317, "grad_norm": 1.1893945932388306, "learning_rate": 6.9004883137160585e-06, "loss": 0.4758, "step": 1731 }, { "epoch": 0.10391792164156717, "grad_norm": 1.4019091129302979, "learning_rate": 6.900327213032371e-06, "loss": 0.5198, "step": 1732 }, { "epoch": 0.10397792044159117, "grad_norm": 1.1777015924453735, "learning_rate": 6.900165983933817e-06, "loss": 0.4669, "step": 1733 }, { "epoch": 0.10403791924161516, "grad_norm": 1.4650604724884033, "learning_rate": 6.900004626426484e-06, "loss": 0.5293, "step": 1734 }, { "epoch": 0.10409791804163916, "grad_norm": 1.1616824865341187, "learning_rate": 6.899843140516468e-06, "loss": 0.4509, "step": 1735 }, { "epoch": 0.10415791684166317, "grad_norm": 1.2194422483444214, "learning_rate": 6.899681526209866e-06, "loss": 0.5043, "step": 1736 }, { "epoch": 0.10421791564168717, "grad_norm": 1.3144428730010986, "learning_rate": 6.899519783512781e-06, "loss": 0.482, "step": 1737 }, { "epoch": 0.10427791444171117, "grad_norm": 1.3736228942871094, "learning_rate": 6.899357912431323e-06, "loss": 0.5107, "step": 1738 }, { "epoch": 0.10433791324173516, "grad_norm": 1.2058343887329102, "learning_rate": 6.899195912971603e-06, "loss": 0.5035, "step": 1739 }, { "epoch": 0.10439791204175916, "grad_norm": 1.1819759607315063, "learning_rate": 6.8990337851397405e-06, "loss": 0.4951, "step": 1740 }, { "epoch": 0.10445791084178316, "grad_norm": 1.27242112159729, "learning_rate": 6.898871528941858e-06, "loss": 0.4332, "step": 1741 }, { "epoch": 0.10451790964180717, "grad_norm": 1.28763747215271, "learning_rate": 6.898709144384082e-06, "loss": 0.5264, "step": 1742 }, { "epoch": 0.10457790844183117, "grad_norm": 1.377210021018982, "learning_rate": 6.898546631472547e-06, "loss": 0.4979, "step": 1743 }, { "epoch": 0.10463790724185516, "grad_norm": 1.1163784265518188, "learning_rate": 6.89838399021339e-06, "loss": 0.4279, "step": 1744 }, { "epoch": 0.10469790604187916, "grad_norm": 1.3525199890136719, "learning_rate": 6.898221220612752e-06, "loss": 0.5204, "step": 1745 }, { "epoch": 0.10475790484190316, "grad_norm": 1.257727861404419, "learning_rate": 6.8980583226767805e-06, "loss": 0.4998, "step": 1746 }, { "epoch": 0.10481790364192717, "grad_norm": 1.4645801782608032, "learning_rate": 6.897895296411629e-06, "loss": 0.5331, "step": 1747 }, { "epoch": 0.10487790244195117, "grad_norm": 1.2178657054901123, "learning_rate": 6.897732141823452e-06, "loss": 0.4887, "step": 1748 }, { "epoch": 0.10493790124197516, "grad_norm": 1.3609590530395508, "learning_rate": 6.897568858918412e-06, "loss": 0.476, "step": 1749 }, { "epoch": 0.10499790004199916, "grad_norm": 1.262253999710083, "learning_rate": 6.897405447702675e-06, "loss": 0.4866, "step": 1750 }, { "epoch": 0.10505789884202316, "grad_norm": 1.4682128429412842, "learning_rate": 6.897241908182413e-06, "loss": 0.5009, "step": 1751 }, { "epoch": 0.10511789764204715, "grad_norm": 1.4079500436782837, "learning_rate": 6.897078240363803e-06, "loss": 0.5228, "step": 1752 }, { "epoch": 0.10517789644207116, "grad_norm": 1.1566390991210938, "learning_rate": 6.896914444253024e-06, "loss": 0.4692, "step": 1753 }, { "epoch": 0.10523789524209516, "grad_norm": 1.254787802696228, "learning_rate": 6.896750519856263e-06, "loss": 0.4996, "step": 1754 }, { "epoch": 0.10529789404211916, "grad_norm": 1.196568489074707, "learning_rate": 6.896586467179709e-06, "loss": 0.5063, "step": 1755 }, { "epoch": 0.10535789284214316, "grad_norm": 1.409050464630127, "learning_rate": 6.896422286229561e-06, "loss": 0.4818, "step": 1756 }, { "epoch": 0.10541789164216715, "grad_norm": 1.320106863975525, "learning_rate": 6.896257977012017e-06, "loss": 0.5257, "step": 1757 }, { "epoch": 0.10547789044219115, "grad_norm": 1.2520273923873901, "learning_rate": 6.896093539533281e-06, "loss": 0.5067, "step": 1758 }, { "epoch": 0.10553788924221516, "grad_norm": 1.2643972635269165, "learning_rate": 6.895928973799566e-06, "loss": 0.4771, "step": 1759 }, { "epoch": 0.10559788804223916, "grad_norm": 1.171598196029663, "learning_rate": 6.895764279817086e-06, "loss": 0.4434, "step": 1760 }, { "epoch": 0.10565788684226315, "grad_norm": 1.2728217840194702, "learning_rate": 6.89559945759206e-06, "loss": 0.5259, "step": 1761 }, { "epoch": 0.10571788564228715, "grad_norm": 1.3738783597946167, "learning_rate": 6.895434507130712e-06, "loss": 0.5198, "step": 1762 }, { "epoch": 0.10577788444231115, "grad_norm": 1.3299320936203003, "learning_rate": 6.895269428439272e-06, "loss": 0.4939, "step": 1763 }, { "epoch": 0.10583788324233516, "grad_norm": 1.304261326789856, "learning_rate": 6.895104221523975e-06, "loss": 0.4848, "step": 1764 }, { "epoch": 0.10589788204235916, "grad_norm": 1.1759990453720093, "learning_rate": 6.894938886391059e-06, "loss": 0.5447, "step": 1765 }, { "epoch": 0.10595788084238315, "grad_norm": 1.3493131399154663, "learning_rate": 6.894773423046768e-06, "loss": 0.4701, "step": 1766 }, { "epoch": 0.10601787964240715, "grad_norm": 1.2394435405731201, "learning_rate": 6.8946078314973525e-06, "loss": 0.483, "step": 1767 }, { "epoch": 0.10607787844243115, "grad_norm": 1.2691375017166138, "learning_rate": 6.894442111749066e-06, "loss": 0.4903, "step": 1768 }, { "epoch": 0.10613787724245514, "grad_norm": 1.3816791772842407, "learning_rate": 6.8942762638081645e-06, "loss": 0.4721, "step": 1769 }, { "epoch": 0.10619787604247916, "grad_norm": 1.3502453565597534, "learning_rate": 6.894110287680913e-06, "loss": 0.5456, "step": 1770 }, { "epoch": 0.10625787484250315, "grad_norm": 1.3076664209365845, "learning_rate": 6.89394418337358e-06, "loss": 0.4934, "step": 1771 }, { "epoch": 0.10631787364252715, "grad_norm": 1.1916248798370361, "learning_rate": 6.893777950892438e-06, "loss": 0.508, "step": 1772 }, { "epoch": 0.10637787244255115, "grad_norm": 1.2168611288070679, "learning_rate": 6.893611590243764e-06, "loss": 0.4237, "step": 1773 }, { "epoch": 0.10643787124257514, "grad_norm": 1.216245174407959, "learning_rate": 6.893445101433841e-06, "loss": 0.498, "step": 1774 }, { "epoch": 0.10649787004259915, "grad_norm": 1.326242208480835, "learning_rate": 6.893278484468959e-06, "loss": 0.4518, "step": 1775 }, { "epoch": 0.10655786884262315, "grad_norm": 1.2905046939849854, "learning_rate": 6.893111739355407e-06, "loss": 0.4777, "step": 1776 }, { "epoch": 0.10661786764264715, "grad_norm": 1.2602653503417969, "learning_rate": 6.892944866099484e-06, "loss": 0.4741, "step": 1777 }, { "epoch": 0.10667786644267115, "grad_norm": 1.1941994428634644, "learning_rate": 6.8927778647074915e-06, "loss": 0.471, "step": 1778 }, { "epoch": 0.10673786524269514, "grad_norm": 1.2418676614761353, "learning_rate": 6.892610735185737e-06, "loss": 0.498, "step": 1779 }, { "epoch": 0.10679786404271914, "grad_norm": 1.2459219694137573, "learning_rate": 6.892443477540531e-06, "loss": 0.5169, "step": 1780 }, { "epoch": 0.10685786284274315, "grad_norm": 1.2908133268356323, "learning_rate": 6.892276091778191e-06, "loss": 0.4486, "step": 1781 }, { "epoch": 0.10691786164276715, "grad_norm": 1.3518190383911133, "learning_rate": 6.892108577905038e-06, "loss": 0.4986, "step": 1782 }, { "epoch": 0.10697786044279115, "grad_norm": 1.2346111536026, "learning_rate": 6.8919409359273986e-06, "loss": 0.4971, "step": 1783 }, { "epoch": 0.10703785924281514, "grad_norm": 1.3003393411636353, "learning_rate": 6.891773165851603e-06, "loss": 0.4957, "step": 1784 }, { "epoch": 0.10709785804283914, "grad_norm": 1.2870843410491943, "learning_rate": 6.891605267683989e-06, "loss": 0.5099, "step": 1785 }, { "epoch": 0.10715785684286314, "grad_norm": 1.2066833972930908, "learning_rate": 6.891437241430896e-06, "loss": 0.4395, "step": 1786 }, { "epoch": 0.10721785564288715, "grad_norm": 1.2880136966705322, "learning_rate": 6.89126908709867e-06, "loss": 0.5109, "step": 1787 }, { "epoch": 0.10727785444291114, "grad_norm": 1.4050018787384033, "learning_rate": 6.891100804693661e-06, "loss": 0.5227, "step": 1788 }, { "epoch": 0.10733785324293514, "grad_norm": 1.3418283462524414, "learning_rate": 6.890932394222223e-06, "loss": 0.4878, "step": 1789 }, { "epoch": 0.10739785204295914, "grad_norm": 1.2279322147369385, "learning_rate": 6.890763855690719e-06, "loss": 0.5099, "step": 1790 }, { "epoch": 0.10745785084298314, "grad_norm": 1.1923794746398926, "learning_rate": 6.8905951891055115e-06, "loss": 0.4843, "step": 1791 }, { "epoch": 0.10751784964300715, "grad_norm": 1.4139236211776733, "learning_rate": 6.890426394472971e-06, "loss": 0.4503, "step": 1792 }, { "epoch": 0.10757784844303114, "grad_norm": 1.3099626302719116, "learning_rate": 6.890257471799474e-06, "loss": 0.4634, "step": 1793 }, { "epoch": 0.10763784724305514, "grad_norm": 1.2136013507843018, "learning_rate": 6.890088421091398e-06, "loss": 0.462, "step": 1794 }, { "epoch": 0.10769784604307914, "grad_norm": 1.1788885593414307, "learning_rate": 6.889919242355126e-06, "loss": 0.4679, "step": 1795 }, { "epoch": 0.10775784484310313, "grad_norm": 1.177225112915039, "learning_rate": 6.88974993559705e-06, "loss": 0.4209, "step": 1796 }, { "epoch": 0.10781784364312713, "grad_norm": 1.2973233461380005, "learning_rate": 6.889580500823562e-06, "loss": 0.4675, "step": 1797 }, { "epoch": 0.10787784244315114, "grad_norm": 1.3637120723724365, "learning_rate": 6.889410938041062e-06, "loss": 0.5026, "step": 1798 }, { "epoch": 0.10793784124317514, "grad_norm": 1.3100253343582153, "learning_rate": 6.889241247255952e-06, "loss": 0.5116, "step": 1799 }, { "epoch": 0.10799784004319914, "grad_norm": 1.2003610134124756, "learning_rate": 6.889071428474642e-06, "loss": 0.4603, "step": 1800 }, { "epoch": 0.10805783884322313, "grad_norm": 1.3390403985977173, "learning_rate": 6.888901481703544e-06, "loss": 0.4809, "step": 1801 }, { "epoch": 0.10811783764324713, "grad_norm": 1.2424954175949097, "learning_rate": 6.888731406949078e-06, "loss": 0.4343, "step": 1802 }, { "epoch": 0.10817783644327114, "grad_norm": 1.2746797800064087, "learning_rate": 6.8885612042176645e-06, "loss": 0.4268, "step": 1803 }, { "epoch": 0.10823783524329514, "grad_norm": 1.2460256814956665, "learning_rate": 6.888390873515733e-06, "loss": 0.462, "step": 1804 }, { "epoch": 0.10829783404331914, "grad_norm": 1.1694481372833252, "learning_rate": 6.8882204148497165e-06, "loss": 0.484, "step": 1805 }, { "epoch": 0.10835783284334313, "grad_norm": 1.2601165771484375, "learning_rate": 6.888049828226052e-06, "loss": 0.5085, "step": 1806 }, { "epoch": 0.10841783164336713, "grad_norm": 1.384376883506775, "learning_rate": 6.88787911365118e-06, "loss": 0.5137, "step": 1807 }, { "epoch": 0.10847783044339113, "grad_norm": 1.3190549612045288, "learning_rate": 6.8877082711315495e-06, "loss": 0.5573, "step": 1808 }, { "epoch": 0.10853782924341514, "grad_norm": 1.2651971578598022, "learning_rate": 6.887537300673612e-06, "loss": 0.4943, "step": 1809 }, { "epoch": 0.10859782804343913, "grad_norm": 1.2179796695709229, "learning_rate": 6.887366202283825e-06, "loss": 0.4836, "step": 1810 }, { "epoch": 0.10865782684346313, "grad_norm": 1.1881242990493774, "learning_rate": 6.887194975968648e-06, "loss": 0.4629, "step": 1811 }, { "epoch": 0.10871782564348713, "grad_norm": 1.4120113849639893, "learning_rate": 6.887023621734549e-06, "loss": 0.5349, "step": 1812 }, { "epoch": 0.10877782444351113, "grad_norm": 1.326563835144043, "learning_rate": 6.886852139588e-06, "loss": 0.4951, "step": 1813 }, { "epoch": 0.10883782324353512, "grad_norm": 1.1578088998794556, "learning_rate": 6.886680529535475e-06, "loss": 0.4604, "step": 1814 }, { "epoch": 0.10889782204355913, "grad_norm": 1.3106184005737305, "learning_rate": 6.886508791583457e-06, "loss": 0.4921, "step": 1815 }, { "epoch": 0.10895782084358313, "grad_norm": 1.521634578704834, "learning_rate": 6.886336925738431e-06, "loss": 0.555, "step": 1816 }, { "epoch": 0.10901781964360713, "grad_norm": 1.4337220191955566, "learning_rate": 6.886164932006886e-06, "loss": 0.4738, "step": 1817 }, { "epoch": 0.10907781844363112, "grad_norm": 1.4588439464569092, "learning_rate": 6.88599281039532e-06, "loss": 0.4927, "step": 1818 }, { "epoch": 0.10913781724365512, "grad_norm": 1.303240418434143, "learning_rate": 6.885820560910231e-06, "loss": 0.5102, "step": 1819 }, { "epoch": 0.10919781604367913, "grad_norm": 1.2764630317687988, "learning_rate": 6.885648183558127e-06, "loss": 0.4679, "step": 1820 }, { "epoch": 0.10925781484370313, "grad_norm": 1.1463592052459717, "learning_rate": 6.885475678345515e-06, "loss": 0.4441, "step": 1821 }, { "epoch": 0.10931781364372713, "grad_norm": 1.1964776515960693, "learning_rate": 6.885303045278911e-06, "loss": 0.4625, "step": 1822 }, { "epoch": 0.10937781244375112, "grad_norm": 1.2167267799377441, "learning_rate": 6.885130284364834e-06, "loss": 0.4694, "step": 1823 }, { "epoch": 0.10943781124377512, "grad_norm": 1.261090636253357, "learning_rate": 6.8849573956098085e-06, "loss": 0.4852, "step": 1824 }, { "epoch": 0.10949781004379912, "grad_norm": 1.2873035669326782, "learning_rate": 6.884784379020366e-06, "loss": 0.5798, "step": 1825 }, { "epoch": 0.10955780884382313, "grad_norm": 1.2700148820877075, "learning_rate": 6.884611234603036e-06, "loss": 0.4225, "step": 1826 }, { "epoch": 0.10961780764384713, "grad_norm": 1.3748990297317505, "learning_rate": 6.8844379623643605e-06, "loss": 0.5319, "step": 1827 }, { "epoch": 0.10967780644387112, "grad_norm": 1.4182312488555908, "learning_rate": 6.884264562310884e-06, "loss": 0.4537, "step": 1828 }, { "epoch": 0.10973780524389512, "grad_norm": 1.5851696729660034, "learning_rate": 6.884091034449152e-06, "loss": 0.581, "step": 1829 }, { "epoch": 0.10979780404391912, "grad_norm": 1.1904051303863525, "learning_rate": 6.883917378785722e-06, "loss": 0.4738, "step": 1830 }, { "epoch": 0.10985780284394311, "grad_norm": 1.3124338388442993, "learning_rate": 6.8837435953271475e-06, "loss": 0.519, "step": 1831 }, { "epoch": 0.10991780164396713, "grad_norm": 1.1062191724777222, "learning_rate": 6.883569684079994e-06, "loss": 0.4588, "step": 1832 }, { "epoch": 0.10997780044399112, "grad_norm": 1.448736548423767, "learning_rate": 6.88339564505083e-06, "loss": 0.5112, "step": 1833 }, { "epoch": 0.11003779924401512, "grad_norm": 1.3941315412521362, "learning_rate": 6.8832214782462255e-06, "loss": 0.4549, "step": 1834 }, { "epoch": 0.11009779804403912, "grad_norm": 1.2176249027252197, "learning_rate": 6.883047183672761e-06, "loss": 0.4616, "step": 1835 }, { "epoch": 0.11015779684406311, "grad_norm": 1.2637749910354614, "learning_rate": 6.882872761337016e-06, "loss": 0.4515, "step": 1836 }, { "epoch": 0.11021779564408712, "grad_norm": 1.3454397916793823, "learning_rate": 6.882698211245581e-06, "loss": 0.4761, "step": 1837 }, { "epoch": 0.11027779444411112, "grad_norm": 1.2335013151168823, "learning_rate": 6.8825235334050455e-06, "loss": 0.489, "step": 1838 }, { "epoch": 0.11033779324413512, "grad_norm": 1.2911388874053955, "learning_rate": 6.882348727822007e-06, "loss": 0.4573, "step": 1839 }, { "epoch": 0.11039779204415912, "grad_norm": 1.2410814762115479, "learning_rate": 6.882173794503067e-06, "loss": 0.4742, "step": 1840 }, { "epoch": 0.11045779084418311, "grad_norm": 1.209367275238037, "learning_rate": 6.881998733454831e-06, "loss": 0.4753, "step": 1841 }, { "epoch": 0.11051778964420711, "grad_norm": 1.1918784379959106, "learning_rate": 6.881823544683912e-06, "loss": 0.4368, "step": 1842 }, { "epoch": 0.11057778844423112, "grad_norm": 1.1992701292037964, "learning_rate": 6.8816482281969245e-06, "loss": 0.4572, "step": 1843 }, { "epoch": 0.11063778724425512, "grad_norm": 1.461879014968872, "learning_rate": 6.881472784000492e-06, "loss": 0.5518, "step": 1844 }, { "epoch": 0.11069778604427911, "grad_norm": 1.2330750226974487, "learning_rate": 6.881297212101237e-06, "loss": 0.4784, "step": 1845 }, { "epoch": 0.11075778484430311, "grad_norm": 1.439900517463684, "learning_rate": 6.881121512505791e-06, "loss": 0.4859, "step": 1846 }, { "epoch": 0.11081778364432711, "grad_norm": 1.3106999397277832, "learning_rate": 6.880945685220791e-06, "loss": 0.4622, "step": 1847 }, { "epoch": 0.11087778244435112, "grad_norm": 1.4199719429016113, "learning_rate": 6.880769730252875e-06, "loss": 0.5438, "step": 1848 }, { "epoch": 0.11093778124437512, "grad_norm": 1.3166592121124268, "learning_rate": 6.880593647608689e-06, "loss": 0.5506, "step": 1849 }, { "epoch": 0.11099778004439911, "grad_norm": 1.3819849491119385, "learning_rate": 6.880417437294883e-06, "loss": 0.4726, "step": 1850 }, { "epoch": 0.11105777884442311, "grad_norm": 1.2247880697250366, "learning_rate": 6.880241099318113e-06, "loss": 0.5093, "step": 1851 }, { "epoch": 0.11111777764444711, "grad_norm": 1.3785446882247925, "learning_rate": 6.8800646336850365e-06, "loss": 0.4753, "step": 1852 }, { "epoch": 0.1111777764444711, "grad_norm": 1.3584429025650024, "learning_rate": 6.8798880404023176e-06, "loss": 0.4895, "step": 1853 }, { "epoch": 0.11123777524449512, "grad_norm": 1.5423229932785034, "learning_rate": 6.879711319476625e-06, "loss": 0.4986, "step": 1854 }, { "epoch": 0.11129777404451911, "grad_norm": 1.2822065353393555, "learning_rate": 6.879534470914635e-06, "loss": 0.4809, "step": 1855 }, { "epoch": 0.11135777284454311, "grad_norm": 1.30703604221344, "learning_rate": 6.879357494723026e-06, "loss": 0.4401, "step": 1856 }, { "epoch": 0.1114177716445671, "grad_norm": 1.3602838516235352, "learning_rate": 6.8791803909084805e-06, "loss": 0.4659, "step": 1857 }, { "epoch": 0.1114777704445911, "grad_norm": 1.313852310180664, "learning_rate": 6.879003159477687e-06, "loss": 0.4642, "step": 1858 }, { "epoch": 0.1115377692446151, "grad_norm": 1.2966501712799072, "learning_rate": 6.878825800437338e-06, "loss": 0.4681, "step": 1859 }, { "epoch": 0.11159776804463911, "grad_norm": 1.3197375535964966, "learning_rate": 6.878648313794133e-06, "loss": 0.4781, "step": 1860 }, { "epoch": 0.11165776684466311, "grad_norm": 1.2826361656188965, "learning_rate": 6.878470699554774e-06, "loss": 0.4674, "step": 1861 }, { "epoch": 0.1117177656446871, "grad_norm": 1.27534818649292, "learning_rate": 6.878292957725969e-06, "loss": 0.4987, "step": 1862 }, { "epoch": 0.1117777644447111, "grad_norm": 1.3435323238372803, "learning_rate": 6.878115088314431e-06, "loss": 0.5326, "step": 1863 }, { "epoch": 0.1118377632447351, "grad_norm": 1.2770718336105347, "learning_rate": 6.877937091326876e-06, "loss": 0.5212, "step": 1864 }, { "epoch": 0.11189776204475911, "grad_norm": 1.401060700416565, "learning_rate": 6.8777589667700275e-06, "loss": 0.5235, "step": 1865 }, { "epoch": 0.11195776084478311, "grad_norm": 1.387635350227356, "learning_rate": 6.877580714650611e-06, "loss": 0.5488, "step": 1866 }, { "epoch": 0.1120177596448071, "grad_norm": 1.1599234342575073, "learning_rate": 6.877402334975361e-06, "loss": 0.4688, "step": 1867 }, { "epoch": 0.1120777584448311, "grad_norm": 1.35148024559021, "learning_rate": 6.87722382775101e-06, "loss": 0.5509, "step": 1868 }, { "epoch": 0.1121377572448551, "grad_norm": 1.360990285873413, "learning_rate": 6.877045192984303e-06, "loss": 0.4977, "step": 1869 }, { "epoch": 0.1121977560448791, "grad_norm": 1.1385407447814941, "learning_rate": 6.876866430681984e-06, "loss": 0.5106, "step": 1870 }, { "epoch": 0.11225775484490311, "grad_norm": 1.3902772665023804, "learning_rate": 6.876687540850806e-06, "loss": 0.5165, "step": 1871 }, { "epoch": 0.1123177536449271, "grad_norm": 1.3843672275543213, "learning_rate": 6.876508523497523e-06, "loss": 0.5347, "step": 1872 }, { "epoch": 0.1123777524449511, "grad_norm": 1.2943096160888672, "learning_rate": 6.876329378628896e-06, "loss": 0.4785, "step": 1873 }, { "epoch": 0.1124377512449751, "grad_norm": 1.3831065893173218, "learning_rate": 6.876150106251691e-06, "loss": 0.5207, "step": 1874 }, { "epoch": 0.1124977500449991, "grad_norm": 1.2849664688110352, "learning_rate": 6.8759707063726786e-06, "loss": 0.4701, "step": 1875 }, { "epoch": 0.1125577488450231, "grad_norm": 1.2491364479064941, "learning_rate": 6.875791178998634e-06, "loss": 0.465, "step": 1876 }, { "epoch": 0.1126177476450471, "grad_norm": 1.2969472408294678, "learning_rate": 6.875611524136336e-06, "loss": 0.5362, "step": 1877 }, { "epoch": 0.1126777464450711, "grad_norm": 1.2791163921356201, "learning_rate": 6.8754317417925705e-06, "loss": 0.4453, "step": 1878 }, { "epoch": 0.1127377452450951, "grad_norm": 1.2402045726776123, "learning_rate": 6.8752518319741255e-06, "loss": 0.4944, "step": 1879 }, { "epoch": 0.1127977440451191, "grad_norm": 1.2490015029907227, "learning_rate": 6.875071794687797e-06, "loss": 0.4603, "step": 1880 }, { "epoch": 0.11285774284514309, "grad_norm": 1.271740436553955, "learning_rate": 6.874891629940384e-06, "loss": 0.4411, "step": 1881 }, { "epoch": 0.1129177416451671, "grad_norm": 1.233751893043518, "learning_rate": 6.874711337738689e-06, "loss": 0.4613, "step": 1882 }, { "epoch": 0.1129777404451911, "grad_norm": 1.2457857131958008, "learning_rate": 6.8745309180895235e-06, "loss": 0.5004, "step": 1883 }, { "epoch": 0.1130377392452151, "grad_norm": 1.4886388778686523, "learning_rate": 6.874350370999699e-06, "loss": 0.5272, "step": 1884 }, { "epoch": 0.1130977380452391, "grad_norm": 1.2970069646835327, "learning_rate": 6.874169696476034e-06, "loss": 0.5117, "step": 1885 }, { "epoch": 0.11315773684526309, "grad_norm": 1.2194715738296509, "learning_rate": 6.873988894525352e-06, "loss": 0.4909, "step": 1886 }, { "epoch": 0.11321773564528709, "grad_norm": 1.2953470945358276, "learning_rate": 6.873807965154481e-06, "loss": 0.4329, "step": 1887 }, { "epoch": 0.1132777344453111, "grad_norm": 1.2096054553985596, "learning_rate": 6.873626908370255e-06, "loss": 0.489, "step": 1888 }, { "epoch": 0.1133377332453351, "grad_norm": 1.139928936958313, "learning_rate": 6.873445724179511e-06, "loss": 0.4589, "step": 1889 }, { "epoch": 0.11339773204535909, "grad_norm": 1.113566517829895, "learning_rate": 6.873264412589091e-06, "loss": 0.4675, "step": 1890 }, { "epoch": 0.11345773084538309, "grad_norm": 1.3220926523208618, "learning_rate": 6.873082973605844e-06, "loss": 0.5033, "step": 1891 }, { "epoch": 0.11351772964540709, "grad_norm": 1.2928071022033691, "learning_rate": 6.87290140723662e-06, "loss": 0.4822, "step": 1892 }, { "epoch": 0.1135777284454311, "grad_norm": 1.2978705167770386, "learning_rate": 6.872719713488276e-06, "loss": 0.5141, "step": 1893 }, { "epoch": 0.1136377272454551, "grad_norm": 1.2394545078277588, "learning_rate": 6.872537892367675e-06, "loss": 0.4332, "step": 1894 }, { "epoch": 0.11369772604547909, "grad_norm": 1.3528155088424683, "learning_rate": 6.872355943881682e-06, "loss": 0.5137, "step": 1895 }, { "epoch": 0.11375772484550309, "grad_norm": 1.2306400537490845, "learning_rate": 6.872173868037171e-06, "loss": 0.4698, "step": 1896 }, { "epoch": 0.11381772364552709, "grad_norm": 1.3003748655319214, "learning_rate": 6.8719916648410155e-06, "loss": 0.5387, "step": 1897 }, { "epoch": 0.11387772244555108, "grad_norm": 1.4087804555892944, "learning_rate": 6.871809334300098e-06, "loss": 0.5214, "step": 1898 }, { "epoch": 0.1139377212455751, "grad_norm": 1.3559386730194092, "learning_rate": 6.871626876421304e-06, "loss": 0.4753, "step": 1899 }, { "epoch": 0.11399772004559909, "grad_norm": 1.3352997303009033, "learning_rate": 6.871444291211524e-06, "loss": 0.4865, "step": 1900 }, { "epoch": 0.11405771884562309, "grad_norm": 1.340793251991272, "learning_rate": 6.871261578677653e-06, "loss": 0.508, "step": 1901 }, { "epoch": 0.11411771764564708, "grad_norm": 1.3992024660110474, "learning_rate": 6.871078738826592e-06, "loss": 0.4685, "step": 1902 }, { "epoch": 0.11417771644567108, "grad_norm": 1.3515491485595703, "learning_rate": 6.870895771665246e-06, "loss": 0.4761, "step": 1903 }, { "epoch": 0.11423771524569509, "grad_norm": 1.1278362274169922, "learning_rate": 6.8707126772005235e-06, "loss": 0.464, "step": 1904 }, { "epoch": 0.11429771404571909, "grad_norm": 1.3662946224212646, "learning_rate": 6.870529455439341e-06, "loss": 0.5089, "step": 1905 }, { "epoch": 0.11435771284574309, "grad_norm": 1.205378770828247, "learning_rate": 6.8703461063886165e-06, "loss": 0.4515, "step": 1906 }, { "epoch": 0.11441771164576708, "grad_norm": 1.1570812463760376, "learning_rate": 6.870162630055275e-06, "loss": 0.429, "step": 1907 }, { "epoch": 0.11447771044579108, "grad_norm": 1.2136656045913696, "learning_rate": 6.869979026446246e-06, "loss": 0.4761, "step": 1908 }, { "epoch": 0.11453770924581508, "grad_norm": 1.3092211484909058, "learning_rate": 6.869795295568462e-06, "loss": 0.4513, "step": 1909 }, { "epoch": 0.11459770804583909, "grad_norm": 1.3029937744140625, "learning_rate": 6.869611437428862e-06, "loss": 0.5386, "step": 1910 }, { "epoch": 0.11465770684586309, "grad_norm": 1.3315315246582031, "learning_rate": 6.86942745203439e-06, "loss": 0.504, "step": 1911 }, { "epoch": 0.11471770564588708, "grad_norm": 1.321027159690857, "learning_rate": 6.869243339391995e-06, "loss": 0.485, "step": 1912 }, { "epoch": 0.11477770444591108, "grad_norm": 1.3216592073440552, "learning_rate": 6.86905909950863e-06, "loss": 0.4874, "step": 1913 }, { "epoch": 0.11483770324593508, "grad_norm": 1.3058459758758545, "learning_rate": 6.868874732391251e-06, "loss": 0.4549, "step": 1914 }, { "epoch": 0.11489770204595907, "grad_norm": 1.1413111686706543, "learning_rate": 6.868690238046822e-06, "loss": 0.5055, "step": 1915 }, { "epoch": 0.11495770084598309, "grad_norm": 1.3342063426971436, "learning_rate": 6.86850561648231e-06, "loss": 0.5069, "step": 1916 }, { "epoch": 0.11501769964600708, "grad_norm": 1.4465882778167725, "learning_rate": 6.868320867704689e-06, "loss": 0.5218, "step": 1917 }, { "epoch": 0.11507769844603108, "grad_norm": 1.3467192649841309, "learning_rate": 6.868135991720934e-06, "loss": 0.5156, "step": 1918 }, { "epoch": 0.11513769724605508, "grad_norm": 1.200728416442871, "learning_rate": 6.867950988538028e-06, "loss": 0.503, "step": 1919 }, { "epoch": 0.11519769604607907, "grad_norm": 1.1500033140182495, "learning_rate": 6.867765858162957e-06, "loss": 0.4748, "step": 1920 }, { "epoch": 0.11525769484610308, "grad_norm": 1.2786346673965454, "learning_rate": 6.867580600602714e-06, "loss": 0.4941, "step": 1921 }, { "epoch": 0.11531769364612708, "grad_norm": 1.3135744333267212, "learning_rate": 6.8673952158642936e-06, "loss": 0.4926, "step": 1922 }, { "epoch": 0.11537769244615108, "grad_norm": 1.2489192485809326, "learning_rate": 6.867209703954698e-06, "loss": 0.4846, "step": 1923 }, { "epoch": 0.11543769124617508, "grad_norm": 1.3427804708480835, "learning_rate": 6.867024064880933e-06, "loss": 0.5453, "step": 1924 }, { "epoch": 0.11549769004619907, "grad_norm": 1.3180360794067383, "learning_rate": 6.866838298650009e-06, "loss": 0.5415, "step": 1925 }, { "epoch": 0.11555768884622307, "grad_norm": 1.4515876770019531, "learning_rate": 6.866652405268941e-06, "loss": 0.4804, "step": 1926 }, { "epoch": 0.11561768764624708, "grad_norm": 1.5398738384246826, "learning_rate": 6.8664663847447515e-06, "loss": 0.5189, "step": 1927 }, { "epoch": 0.11567768644627108, "grad_norm": 1.2407110929489136, "learning_rate": 6.866280237084464e-06, "loss": 0.5358, "step": 1928 }, { "epoch": 0.11573768524629507, "grad_norm": 1.3954874277114868, "learning_rate": 6.866093962295107e-06, "loss": 0.4719, "step": 1929 }, { "epoch": 0.11579768404631907, "grad_norm": 1.2236274480819702, "learning_rate": 6.865907560383718e-06, "loss": 0.4454, "step": 1930 }, { "epoch": 0.11585768284634307, "grad_norm": 1.3839889764785767, "learning_rate": 6.865721031357336e-06, "loss": 0.4987, "step": 1931 }, { "epoch": 0.11591768164636708, "grad_norm": 1.269003987312317, "learning_rate": 6.865534375223005e-06, "loss": 0.4872, "step": 1932 }, { "epoch": 0.11597768044639108, "grad_norm": 1.1415554285049438, "learning_rate": 6.865347591987774e-06, "loss": 0.4565, "step": 1933 }, { "epoch": 0.11603767924641507, "grad_norm": 1.2703737020492554, "learning_rate": 6.865160681658696e-06, "loss": 0.4645, "step": 1934 }, { "epoch": 0.11609767804643907, "grad_norm": 1.3647692203521729, "learning_rate": 6.86497364424283e-06, "loss": 0.5227, "step": 1935 }, { "epoch": 0.11615767684646307, "grad_norm": 1.1949213743209839, "learning_rate": 6.864786479747242e-06, "loss": 0.4769, "step": 1936 }, { "epoch": 0.11621767564648706, "grad_norm": 1.280218482017517, "learning_rate": 6.864599188178998e-06, "loss": 0.5385, "step": 1937 }, { "epoch": 0.11627767444651108, "grad_norm": 1.3312599658966064, "learning_rate": 6.864411769545171e-06, "loss": 0.511, "step": 1938 }, { "epoch": 0.11633767324653507, "grad_norm": 1.2531968355178833, "learning_rate": 6.864224223852842e-06, "loss": 0.4583, "step": 1939 }, { "epoch": 0.11639767204655907, "grad_norm": 1.3613864183425903, "learning_rate": 6.8640365511090885e-06, "loss": 0.5204, "step": 1940 }, { "epoch": 0.11645767084658307, "grad_norm": 1.414637804031372, "learning_rate": 6.863848751321002e-06, "loss": 0.5227, "step": 1941 }, { "epoch": 0.11651766964660706, "grad_norm": 1.3047443628311157, "learning_rate": 6.8636608244956754e-06, "loss": 0.4991, "step": 1942 }, { "epoch": 0.11657766844663106, "grad_norm": 1.2727962732315063, "learning_rate": 6.863472770640202e-06, "loss": 0.5137, "step": 1943 }, { "epoch": 0.11663766724665507, "grad_norm": 1.1749255657196045, "learning_rate": 6.8632845897616875e-06, "loss": 0.4223, "step": 1944 }, { "epoch": 0.11669766604667907, "grad_norm": 1.2122489213943481, "learning_rate": 6.863096281867236e-06, "loss": 0.4869, "step": 1945 }, { "epoch": 0.11675766484670307, "grad_norm": 1.3640626668930054, "learning_rate": 6.862907846963961e-06, "loss": 0.4591, "step": 1946 }, { "epoch": 0.11681766364672706, "grad_norm": 1.4041801691055298, "learning_rate": 6.862719285058977e-06, "loss": 0.4845, "step": 1947 }, { "epoch": 0.11687766244675106, "grad_norm": 1.3287978172302246, "learning_rate": 6.862530596159408e-06, "loss": 0.4583, "step": 1948 }, { "epoch": 0.11693766124677507, "grad_norm": 1.495463490486145, "learning_rate": 6.862341780272377e-06, "loss": 0.4702, "step": 1949 }, { "epoch": 0.11699766004679907, "grad_norm": 1.2334671020507812, "learning_rate": 6.862152837405015e-06, "loss": 0.5026, "step": 1950 }, { "epoch": 0.11705765884682306, "grad_norm": 1.2726926803588867, "learning_rate": 6.861963767564458e-06, "loss": 0.5162, "step": 1951 }, { "epoch": 0.11711765764684706, "grad_norm": 1.2496124505996704, "learning_rate": 6.861774570757849e-06, "loss": 0.4731, "step": 1952 }, { "epoch": 0.11717765644687106, "grad_norm": 1.4183591604232788, "learning_rate": 6.8615852469923285e-06, "loss": 0.5139, "step": 1953 }, { "epoch": 0.11723765524689506, "grad_norm": 1.1878175735473633, "learning_rate": 6.861395796275049e-06, "loss": 0.4906, "step": 1954 }, { "epoch": 0.11729765404691907, "grad_norm": 1.1774170398712158, "learning_rate": 6.8612062186131646e-06, "loss": 0.4819, "step": 1955 }, { "epoch": 0.11735765284694306, "grad_norm": 1.210198998451233, "learning_rate": 6.861016514013836e-06, "loss": 0.4874, "step": 1956 }, { "epoch": 0.11741765164696706, "grad_norm": 1.3661112785339355, "learning_rate": 6.8608266824842255e-06, "loss": 0.5812, "step": 1957 }, { "epoch": 0.11747765044699106, "grad_norm": 1.2675740718841553, "learning_rate": 6.8606367240315035e-06, "loss": 0.4732, "step": 1958 }, { "epoch": 0.11753764924701506, "grad_norm": 1.12142014503479, "learning_rate": 6.8604466386628435e-06, "loss": 0.49, "step": 1959 }, { "epoch": 0.11759764804703907, "grad_norm": 1.3483383655548096, "learning_rate": 6.860256426385423e-06, "loss": 0.4895, "step": 1960 }, { "epoch": 0.11765764684706306, "grad_norm": 1.2722817659378052, "learning_rate": 6.860066087206428e-06, "loss": 0.4555, "step": 1961 }, { "epoch": 0.11771764564708706, "grad_norm": 1.1657676696777344, "learning_rate": 6.859875621133045e-06, "loss": 0.4419, "step": 1962 }, { "epoch": 0.11777764444711106, "grad_norm": 1.241991639137268, "learning_rate": 6.859685028172466e-06, "loss": 0.5095, "step": 1963 }, { "epoch": 0.11783764324713505, "grad_norm": 1.3784176111221313, "learning_rate": 6.8594943083318915e-06, "loss": 0.5294, "step": 1964 }, { "epoch": 0.11789764204715905, "grad_norm": 1.3687143325805664, "learning_rate": 6.859303461618522e-06, "loss": 0.5002, "step": 1965 }, { "epoch": 0.11795764084718306, "grad_norm": 1.3154792785644531, "learning_rate": 6.859112488039567e-06, "loss": 0.464, "step": 1966 }, { "epoch": 0.11801763964720706, "grad_norm": 1.1910359859466553, "learning_rate": 6.858921387602237e-06, "loss": 0.4111, "step": 1967 }, { "epoch": 0.11807763844723106, "grad_norm": 1.3037512302398682, "learning_rate": 6.858730160313749e-06, "loss": 0.4856, "step": 1968 }, { "epoch": 0.11813763724725505, "grad_norm": 1.2939356565475464, "learning_rate": 6.858538806181325e-06, "loss": 0.486, "step": 1969 }, { "epoch": 0.11819763604727905, "grad_norm": 1.308592677116394, "learning_rate": 6.858347325212192e-06, "loss": 0.5361, "step": 1970 }, { "epoch": 0.11825763484730305, "grad_norm": 1.3640203475952148, "learning_rate": 6.858155717413582e-06, "loss": 0.4495, "step": 1971 }, { "epoch": 0.11831763364732706, "grad_norm": 1.2292999029159546, "learning_rate": 6.857963982792729e-06, "loss": 0.5403, "step": 1972 }, { "epoch": 0.11837763244735106, "grad_norm": 1.2950948476791382, "learning_rate": 6.857772121356874e-06, "loss": 0.5177, "step": 1973 }, { "epoch": 0.11843763124737505, "grad_norm": 1.4784692525863647, "learning_rate": 6.857580133113266e-06, "loss": 0.5051, "step": 1974 }, { "epoch": 0.11849763004739905, "grad_norm": 1.266022801399231, "learning_rate": 6.857388018069154e-06, "loss": 0.4867, "step": 1975 }, { "epoch": 0.11855762884742305, "grad_norm": 1.2057688236236572, "learning_rate": 6.8571957762317915e-06, "loss": 0.4658, "step": 1976 }, { "epoch": 0.11861762764744706, "grad_norm": 1.1722633838653564, "learning_rate": 6.85700340760844e-06, "loss": 0.4556, "step": 1977 }, { "epoch": 0.11867762644747105, "grad_norm": 1.230035662651062, "learning_rate": 6.856810912206364e-06, "loss": 0.5531, "step": 1978 }, { "epoch": 0.11873762524749505, "grad_norm": 1.2829535007476807, "learning_rate": 6.856618290032834e-06, "loss": 0.473, "step": 1979 }, { "epoch": 0.11879762404751905, "grad_norm": 1.2459161281585693, "learning_rate": 6.856425541095125e-06, "loss": 0.4672, "step": 1980 }, { "epoch": 0.11885762284754305, "grad_norm": 1.2079401016235352, "learning_rate": 6.8562326654005135e-06, "loss": 0.4623, "step": 1981 }, { "epoch": 0.11891762164756704, "grad_norm": 1.1857306957244873, "learning_rate": 6.856039662956285e-06, "loss": 0.4759, "step": 1982 }, { "epoch": 0.11897762044759105, "grad_norm": 1.1895149946212769, "learning_rate": 6.85584653376973e-06, "loss": 0.4504, "step": 1983 }, { "epoch": 0.11903761924761505, "grad_norm": 1.3213059902191162, "learning_rate": 6.855653277848139e-06, "loss": 0.496, "step": 1984 }, { "epoch": 0.11909761804763905, "grad_norm": 1.401031732559204, "learning_rate": 6.855459895198813e-06, "loss": 0.4728, "step": 1985 }, { "epoch": 0.11915761684766304, "grad_norm": 1.2992371320724487, "learning_rate": 6.855266385829054e-06, "loss": 0.5014, "step": 1986 }, { "epoch": 0.11921761564768704, "grad_norm": 1.1829617023468018, "learning_rate": 6.85507274974617e-06, "loss": 0.524, "step": 1987 }, { "epoch": 0.11927761444771104, "grad_norm": 1.250239372253418, "learning_rate": 6.854878986957474e-06, "loss": 0.4431, "step": 1988 }, { "epoch": 0.11933761324773505, "grad_norm": 1.4183706045150757, "learning_rate": 6.854685097470284e-06, "loss": 0.5229, "step": 1989 }, { "epoch": 0.11939761204775905, "grad_norm": 1.353860855102539, "learning_rate": 6.854491081291921e-06, "loss": 0.4512, "step": 1990 }, { "epoch": 0.11945761084778304, "grad_norm": 1.4265549182891846, "learning_rate": 6.854296938429714e-06, "loss": 0.4809, "step": 1991 }, { "epoch": 0.11951760964780704, "grad_norm": 1.3861918449401855, "learning_rate": 6.8541026688909915e-06, "loss": 0.4538, "step": 1992 }, { "epoch": 0.11957760844783104, "grad_norm": 1.1049264669418335, "learning_rate": 6.853908272683094e-06, "loss": 0.4404, "step": 1993 }, { "epoch": 0.11963760724785505, "grad_norm": 1.2252874374389648, "learning_rate": 6.853713749813362e-06, "loss": 0.4434, "step": 1994 }, { "epoch": 0.11969760604787905, "grad_norm": 1.3484628200531006, "learning_rate": 6.8535191002891406e-06, "loss": 0.4344, "step": 1995 }, { "epoch": 0.11975760484790304, "grad_norm": 1.3130589723587036, "learning_rate": 6.8533243241177814e-06, "loss": 0.4981, "step": 1996 }, { "epoch": 0.11981760364792704, "grad_norm": 1.308420181274414, "learning_rate": 6.853129421306639e-06, "loss": 0.4772, "step": 1997 }, { "epoch": 0.11987760244795104, "grad_norm": 1.2796061038970947, "learning_rate": 6.852934391863077e-06, "loss": 0.4602, "step": 1998 }, { "epoch": 0.11993760124797503, "grad_norm": 1.4596247673034668, "learning_rate": 6.852739235794458e-06, "loss": 0.5131, "step": 1999 }, { "epoch": 0.11999760004799905, "grad_norm": 1.2852579355239868, "learning_rate": 6.852543953108153e-06, "loss": 0.497, "step": 2000 }, { "epoch": 0.12005759884802304, "grad_norm": 1.412743330001831, "learning_rate": 6.852348543811537e-06, "loss": 0.4914, "step": 2001 }, { "epoch": 0.12011759764804704, "grad_norm": 1.3492681980133057, "learning_rate": 6.852153007911991e-06, "loss": 0.5089, "step": 2002 }, { "epoch": 0.12017759644807104, "grad_norm": 1.418546438217163, "learning_rate": 6.8519573454168964e-06, "loss": 0.5432, "step": 2003 }, { "epoch": 0.12023759524809503, "grad_norm": 1.5683972835540771, "learning_rate": 6.8517615563336455e-06, "loss": 0.556, "step": 2004 }, { "epoch": 0.12029759404811904, "grad_norm": 1.3040026426315308, "learning_rate": 6.851565640669631e-06, "loss": 0.4803, "step": 2005 }, { "epoch": 0.12035759284814304, "grad_norm": 1.2101496458053589, "learning_rate": 6.851369598432251e-06, "loss": 0.4876, "step": 2006 }, { "epoch": 0.12041759164816704, "grad_norm": 1.3537734746932983, "learning_rate": 6.8511734296289124e-06, "loss": 0.4989, "step": 2007 }, { "epoch": 0.12047759044819104, "grad_norm": 1.1890122890472412, "learning_rate": 6.850977134267018e-06, "loss": 0.4819, "step": 2008 }, { "epoch": 0.12053758924821503, "grad_norm": 1.247153401374817, "learning_rate": 6.850780712353987e-06, "loss": 0.5014, "step": 2009 }, { "epoch": 0.12059758804823903, "grad_norm": 1.4142118692398071, "learning_rate": 6.8505841638972336e-06, "loss": 0.5726, "step": 2010 }, { "epoch": 0.12065758684826304, "grad_norm": 1.2879760265350342, "learning_rate": 6.850387488904182e-06, "loss": 0.54, "step": 2011 }, { "epoch": 0.12071758564828704, "grad_norm": 1.3655809164047241, "learning_rate": 6.850190687382258e-06, "loss": 0.5397, "step": 2012 }, { "epoch": 0.12077758444831103, "grad_norm": 1.1244968175888062, "learning_rate": 6.8499937593388974e-06, "loss": 0.4684, "step": 2013 }, { "epoch": 0.12083758324833503, "grad_norm": 1.4119621515274048, "learning_rate": 6.8497967047815335e-06, "loss": 0.4949, "step": 2014 }, { "epoch": 0.12089758204835903, "grad_norm": 1.2128669023513794, "learning_rate": 6.84959952371761e-06, "loss": 0.5027, "step": 2015 }, { "epoch": 0.12095758084838303, "grad_norm": 1.2672367095947266, "learning_rate": 6.849402216154573e-06, "loss": 0.4452, "step": 2016 }, { "epoch": 0.12101757964840704, "grad_norm": 1.2093570232391357, "learning_rate": 6.849204782099875e-06, "loss": 0.4159, "step": 2017 }, { "epoch": 0.12107757844843103, "grad_norm": 1.3110272884368896, "learning_rate": 6.84900722156097e-06, "loss": 0.4755, "step": 2018 }, { "epoch": 0.12113757724845503, "grad_norm": 1.336670994758606, "learning_rate": 6.848809534545321e-06, "loss": 0.4637, "step": 2019 }, { "epoch": 0.12119757604847903, "grad_norm": 1.2575030326843262, "learning_rate": 6.848611721060393e-06, "loss": 0.5134, "step": 2020 }, { "epoch": 0.12125757484850302, "grad_norm": 1.2685039043426514, "learning_rate": 6.848413781113656e-06, "loss": 0.5084, "step": 2021 }, { "epoch": 0.12131757364852704, "grad_norm": 1.3315794467926025, "learning_rate": 6.848215714712587e-06, "loss": 0.4662, "step": 2022 }, { "epoch": 0.12137757244855103, "grad_norm": 1.1995428800582886, "learning_rate": 6.848017521864664e-06, "loss": 0.4714, "step": 2023 }, { "epoch": 0.12143757124857503, "grad_norm": 1.3049232959747314, "learning_rate": 6.847819202577372e-06, "loss": 0.4433, "step": 2024 }, { "epoch": 0.12149757004859903, "grad_norm": 1.3559024333953857, "learning_rate": 6.8476207568582026e-06, "loss": 0.4746, "step": 2025 }, { "epoch": 0.12155756884862302, "grad_norm": 1.4006441831588745, "learning_rate": 6.847422184714648e-06, "loss": 0.4755, "step": 2026 }, { "epoch": 0.12161756764864702, "grad_norm": 1.1802393198013306, "learning_rate": 6.84722348615421e-06, "loss": 0.516, "step": 2027 }, { "epoch": 0.12167756644867103, "grad_norm": 1.208917260169983, "learning_rate": 6.847024661184389e-06, "loss": 0.508, "step": 2028 }, { "epoch": 0.12173756524869503, "grad_norm": 1.3032050132751465, "learning_rate": 6.846825709812697e-06, "loss": 0.4827, "step": 2029 }, { "epoch": 0.12179756404871903, "grad_norm": 1.218095302581787, "learning_rate": 6.846626632046645e-06, "loss": 0.4454, "step": 2030 }, { "epoch": 0.12185756284874302, "grad_norm": 1.0899614095687866, "learning_rate": 6.846427427893753e-06, "loss": 0.4682, "step": 2031 }, { "epoch": 0.12191756164876702, "grad_norm": 1.2898870706558228, "learning_rate": 6.846228097361543e-06, "loss": 0.4984, "step": 2032 }, { "epoch": 0.12197756044879103, "grad_norm": 1.4321284294128418, "learning_rate": 6.846028640457543e-06, "loss": 0.4577, "step": 2033 }, { "epoch": 0.12203755924881503, "grad_norm": 1.3668897151947021, "learning_rate": 6.845829057189285e-06, "loss": 0.4895, "step": 2034 }, { "epoch": 0.12209755804883902, "grad_norm": 1.2844845056533813, "learning_rate": 6.845629347564309e-06, "loss": 0.4961, "step": 2035 }, { "epoch": 0.12215755684886302, "grad_norm": 1.2737603187561035, "learning_rate": 6.845429511590154e-06, "loss": 0.4835, "step": 2036 }, { "epoch": 0.12221755564888702, "grad_norm": 1.3915719985961914, "learning_rate": 6.845229549274368e-06, "loss": 0.5211, "step": 2037 }, { "epoch": 0.12227755444891102, "grad_norm": 1.183667778968811, "learning_rate": 6.845029460624502e-06, "loss": 0.469, "step": 2038 }, { "epoch": 0.12233755324893503, "grad_norm": 1.2748215198516846, "learning_rate": 6.844829245648115e-06, "loss": 0.4567, "step": 2039 }, { "epoch": 0.12239755204895902, "grad_norm": 1.3007292747497559, "learning_rate": 6.844628904352765e-06, "loss": 0.4819, "step": 2040 }, { "epoch": 0.12245755084898302, "grad_norm": 1.2039848566055298, "learning_rate": 6.84442843674602e-06, "loss": 0.4196, "step": 2041 }, { "epoch": 0.12251754964900702, "grad_norm": 1.099257469177246, "learning_rate": 6.84422784283545e-06, "loss": 0.4474, "step": 2042 }, { "epoch": 0.12257754844903102, "grad_norm": 1.1819918155670166, "learning_rate": 6.844027122628631e-06, "loss": 0.4343, "step": 2043 }, { "epoch": 0.12263754724905501, "grad_norm": 1.2542628049850464, "learning_rate": 6.843826276133142e-06, "loss": 0.5134, "step": 2044 }, { "epoch": 0.12269754604907902, "grad_norm": 1.132980227470398, "learning_rate": 6.8436253033565695e-06, "loss": 0.4752, "step": 2045 }, { "epoch": 0.12275754484910302, "grad_norm": 1.4120193719863892, "learning_rate": 6.843424204306504e-06, "loss": 0.5262, "step": 2046 }, { "epoch": 0.12281754364912702, "grad_norm": 1.4409252405166626, "learning_rate": 6.8432229789905365e-06, "loss": 0.5087, "step": 2047 }, { "epoch": 0.12287754244915101, "grad_norm": 1.2061508893966675, "learning_rate": 6.84302162741627e-06, "loss": 0.483, "step": 2048 }, { "epoch": 0.12293754124917501, "grad_norm": 1.226117491722107, "learning_rate": 6.842820149591308e-06, "loss": 0.4903, "step": 2049 }, { "epoch": 0.12299754004919902, "grad_norm": 1.1866823434829712, "learning_rate": 6.842618545523258e-06, "loss": 0.492, "step": 2050 }, { "epoch": 0.12305753884922302, "grad_norm": 1.2992136478424072, "learning_rate": 6.842416815219735e-06, "loss": 0.4716, "step": 2051 }, { "epoch": 0.12311753764924702, "grad_norm": 1.3570588827133179, "learning_rate": 6.842214958688356e-06, "loss": 0.4831, "step": 2052 }, { "epoch": 0.12317753644927101, "grad_norm": 1.304990530014038, "learning_rate": 6.842012975936746e-06, "loss": 0.5115, "step": 2053 }, { "epoch": 0.12323753524929501, "grad_norm": 1.2836920022964478, "learning_rate": 6.8418108669725316e-06, "loss": 0.4941, "step": 2054 }, { "epoch": 0.12329753404931901, "grad_norm": 1.4245654344558716, "learning_rate": 6.841608631803347e-06, "loss": 0.5338, "step": 2055 }, { "epoch": 0.12335753284934302, "grad_norm": 1.302734613418579, "learning_rate": 6.841406270436827e-06, "loss": 0.5, "step": 2056 }, { "epoch": 0.12341753164936702, "grad_norm": 1.1822752952575684, "learning_rate": 6.841203782880617e-06, "loss": 0.4578, "step": 2057 }, { "epoch": 0.12347753044939101, "grad_norm": 1.1254372596740723, "learning_rate": 6.841001169142362e-06, "loss": 0.5036, "step": 2058 }, { "epoch": 0.12353752924941501, "grad_norm": 1.249692678451538, "learning_rate": 6.840798429229714e-06, "loss": 0.456, "step": 2059 }, { "epoch": 0.123597528049439, "grad_norm": 1.2023522853851318, "learning_rate": 6.840595563150332e-06, "loss": 0.5024, "step": 2060 }, { "epoch": 0.12365752684946302, "grad_norm": 1.3320549726486206, "learning_rate": 6.840392570911873e-06, "loss": 0.5045, "step": 2061 }, { "epoch": 0.12371752564948701, "grad_norm": 1.3113385438919067, "learning_rate": 6.8401894525220065e-06, "loss": 0.5043, "step": 2062 }, { "epoch": 0.12377752444951101, "grad_norm": 1.2684954404830933, "learning_rate": 6.839986207988401e-06, "loss": 0.5333, "step": 2063 }, { "epoch": 0.12383752324953501, "grad_norm": 1.2379189729690552, "learning_rate": 6.839782837318734e-06, "loss": 0.5126, "step": 2064 }, { "epoch": 0.123897522049559, "grad_norm": 1.2519723176956177, "learning_rate": 6.839579340520686e-06, "loss": 0.4564, "step": 2065 }, { "epoch": 0.123957520849583, "grad_norm": 1.2457777261734009, "learning_rate": 6.8393757176019414e-06, "loss": 0.4268, "step": 2066 }, { "epoch": 0.12401751964960701, "grad_norm": 1.1538740396499634, "learning_rate": 6.839171968570188e-06, "loss": 0.4413, "step": 2067 }, { "epoch": 0.12407751844963101, "grad_norm": 1.2116055488586426, "learning_rate": 6.8389680934331255e-06, "loss": 0.4768, "step": 2068 }, { "epoch": 0.12413751724965501, "grad_norm": 1.3630921840667725, "learning_rate": 6.83876409219845e-06, "loss": 0.5206, "step": 2069 }, { "epoch": 0.124197516049679, "grad_norm": 1.1799092292785645, "learning_rate": 6.8385599648738645e-06, "loss": 0.4809, "step": 2070 }, { "epoch": 0.124257514849703, "grad_norm": 1.4633110761642456, "learning_rate": 6.83835571146708e-06, "loss": 0.5484, "step": 2071 }, { "epoch": 0.124317513649727, "grad_norm": 1.1307735443115234, "learning_rate": 6.83815133198581e-06, "loss": 0.4759, "step": 2072 }, { "epoch": 0.12437751244975101, "grad_norm": 1.348552942276001, "learning_rate": 6.8379468264377725e-06, "loss": 0.5817, "step": 2073 }, { "epoch": 0.124437511249775, "grad_norm": 1.3637266159057617, "learning_rate": 6.837742194830692e-06, "loss": 0.5289, "step": 2074 }, { "epoch": 0.124497510049799, "grad_norm": 1.2826824188232422, "learning_rate": 6.837537437172295e-06, "loss": 0.52, "step": 2075 }, { "epoch": 0.124557508849823, "grad_norm": 1.2861310243606567, "learning_rate": 6.837332553470314e-06, "loss": 0.4255, "step": 2076 }, { "epoch": 0.124617507649847, "grad_norm": 1.2781156301498413, "learning_rate": 6.837127543732488e-06, "loss": 0.487, "step": 2077 }, { "epoch": 0.12467750644987101, "grad_norm": 1.1735912561416626, "learning_rate": 6.836922407966559e-06, "loss": 0.474, "step": 2078 }, { "epoch": 0.124737505249895, "grad_norm": 1.1555414199829102, "learning_rate": 6.8367171461802734e-06, "loss": 0.4491, "step": 2079 }, { "epoch": 0.124797504049919, "grad_norm": 1.4212563037872314, "learning_rate": 6.836511758381383e-06, "loss": 0.4891, "step": 2080 }, { "epoch": 0.124857502849943, "grad_norm": 1.2807443141937256, "learning_rate": 6.836306244577644e-06, "loss": 0.5191, "step": 2081 }, { "epoch": 0.124917501649967, "grad_norm": 1.389324426651001, "learning_rate": 6.836100604776819e-06, "loss": 0.531, "step": 2082 }, { "epoch": 0.124977500449991, "grad_norm": 1.327422022819519, "learning_rate": 6.835894838986674e-06, "loss": 0.4736, "step": 2083 }, { "epoch": 0.125037499250015, "grad_norm": 1.339190125465393, "learning_rate": 6.835688947214977e-06, "loss": 0.5649, "step": 2084 }, { "epoch": 0.125097498050039, "grad_norm": 1.3205721378326416, "learning_rate": 6.835482929469508e-06, "loss": 0.4794, "step": 2085 }, { "epoch": 0.125157496850063, "grad_norm": 1.2462815046310425, "learning_rate": 6.835276785758045e-06, "loss": 0.5015, "step": 2086 }, { "epoch": 0.125217495650087, "grad_norm": 1.1301602125167847, "learning_rate": 6.835070516088373e-06, "loss": 0.4018, "step": 2087 }, { "epoch": 0.125277494450111, "grad_norm": 1.3346716165542603, "learning_rate": 6.834864120468281e-06, "loss": 0.5187, "step": 2088 }, { "epoch": 0.125337493250135, "grad_norm": 1.4236974716186523, "learning_rate": 6.834657598905567e-06, "loss": 0.4742, "step": 2089 }, { "epoch": 0.125397492050159, "grad_norm": 1.1983453035354614, "learning_rate": 6.834450951408026e-06, "loss": 0.4653, "step": 2090 }, { "epoch": 0.12545749085018298, "grad_norm": 1.320758581161499, "learning_rate": 6.834244177983466e-06, "loss": 0.4885, "step": 2091 }, { "epoch": 0.125517489650207, "grad_norm": 1.3087236881256104, "learning_rate": 6.834037278639693e-06, "loss": 0.4691, "step": 2092 }, { "epoch": 0.125577488450231, "grad_norm": 1.1603506803512573, "learning_rate": 6.833830253384522e-06, "loss": 0.4423, "step": 2093 }, { "epoch": 0.125637487250255, "grad_norm": 1.3063021898269653, "learning_rate": 6.833623102225772e-06, "loss": 0.5455, "step": 2094 }, { "epoch": 0.125697486050279, "grad_norm": 1.4302363395690918, "learning_rate": 6.833415825171264e-06, "loss": 0.4974, "step": 2095 }, { "epoch": 0.125757484850303, "grad_norm": 1.3442498445510864, "learning_rate": 6.833208422228828e-06, "loss": 0.5112, "step": 2096 }, { "epoch": 0.125817483650327, "grad_norm": 1.319989800453186, "learning_rate": 6.8330008934062965e-06, "loss": 0.4543, "step": 2097 }, { "epoch": 0.125877482450351, "grad_norm": 1.2482025623321533, "learning_rate": 6.832793238711505e-06, "loss": 0.4543, "step": 2098 }, { "epoch": 0.125937481250375, "grad_norm": 1.2502838373184204, "learning_rate": 6.832585458152299e-06, "loss": 0.4669, "step": 2099 }, { "epoch": 0.12599748005039899, "grad_norm": 1.1957480907440186, "learning_rate": 6.832377551736523e-06, "loss": 0.4606, "step": 2100 }, { "epoch": 0.12605747885042298, "grad_norm": 1.3165723085403442, "learning_rate": 6.832169519472027e-06, "loss": 0.4914, "step": 2101 }, { "epoch": 0.12611747765044698, "grad_norm": 1.2518608570098877, "learning_rate": 6.831961361366672e-06, "loss": 0.5241, "step": 2102 }, { "epoch": 0.12617747645047098, "grad_norm": 1.279815673828125, "learning_rate": 6.831753077428316e-06, "loss": 0.5183, "step": 2103 }, { "epoch": 0.126237475250495, "grad_norm": 1.2337993383407593, "learning_rate": 6.831544667664826e-06, "loss": 0.4682, "step": 2104 }, { "epoch": 0.126297474050519, "grad_norm": 1.3598682880401611, "learning_rate": 6.831336132084072e-06, "loss": 0.5368, "step": 2105 }, { "epoch": 0.126357472850543, "grad_norm": 1.132279396057129, "learning_rate": 6.831127470693929e-06, "loss": 0.447, "step": 2106 }, { "epoch": 0.126417471650567, "grad_norm": 1.214678406715393, "learning_rate": 6.830918683502279e-06, "loss": 0.5227, "step": 2107 }, { "epoch": 0.126477470450591, "grad_norm": 1.339887261390686, "learning_rate": 6.830709770517006e-06, "loss": 0.5089, "step": 2108 }, { "epoch": 0.126537469250615, "grad_norm": 1.5819952487945557, "learning_rate": 6.830500731745999e-06, "loss": 0.5682, "step": 2109 }, { "epoch": 0.12659746805063898, "grad_norm": 1.1733677387237549, "learning_rate": 6.830291567197153e-06, "loss": 0.4705, "step": 2110 }, { "epoch": 0.12665746685066298, "grad_norm": 1.3487669229507446, "learning_rate": 6.830082276878368e-06, "loss": 0.5, "step": 2111 }, { "epoch": 0.12671746565068698, "grad_norm": 1.2468057870864868, "learning_rate": 6.8298728607975474e-06, "loss": 0.4233, "step": 2112 }, { "epoch": 0.12677746445071097, "grad_norm": 1.4108163118362427, "learning_rate": 6.8296633189626e-06, "loss": 0.5243, "step": 2113 }, { "epoch": 0.12683746325073497, "grad_norm": 1.2726610898971558, "learning_rate": 6.829453651381438e-06, "loss": 0.5002, "step": 2114 }, { "epoch": 0.126897462050759, "grad_norm": 1.2418192625045776, "learning_rate": 6.82924385806198e-06, "loss": 0.4423, "step": 2115 }, { "epoch": 0.126957460850783, "grad_norm": 1.2852939367294312, "learning_rate": 6.82903393901215e-06, "loss": 0.4767, "step": 2116 }, { "epoch": 0.127017459650807, "grad_norm": 1.2767575979232788, "learning_rate": 6.828823894239876e-06, "loss": 0.4466, "step": 2117 }, { "epoch": 0.127077458450831, "grad_norm": 1.3843927383422852, "learning_rate": 6.8286137237530894e-06, "loss": 0.497, "step": 2118 }, { "epoch": 0.12713745725085498, "grad_norm": 1.1993917226791382, "learning_rate": 6.8284034275597265e-06, "loss": 0.4809, "step": 2119 }, { "epoch": 0.12719745605087898, "grad_norm": 1.255922555923462, "learning_rate": 6.828193005667731e-06, "loss": 0.4373, "step": 2120 }, { "epoch": 0.12725745485090298, "grad_norm": 1.1709500551223755, "learning_rate": 6.82798245808505e-06, "loss": 0.4645, "step": 2121 }, { "epoch": 0.12731745365092698, "grad_norm": 1.396477460861206, "learning_rate": 6.827771784819632e-06, "loss": 0.471, "step": 2122 }, { "epoch": 0.12737745245095097, "grad_norm": 1.447417140007019, "learning_rate": 6.827560985879435e-06, "loss": 0.5144, "step": 2123 }, { "epoch": 0.12743745125097497, "grad_norm": 1.2793077230453491, "learning_rate": 6.827350061272421e-06, "loss": 0.4506, "step": 2124 }, { "epoch": 0.12749745005099897, "grad_norm": 1.2831406593322754, "learning_rate": 6.827139011006554e-06, "loss": 0.5095, "step": 2125 }, { "epoch": 0.127557448851023, "grad_norm": 1.1600781679153442, "learning_rate": 6.826927835089806e-06, "loss": 0.4661, "step": 2126 }, { "epoch": 0.127617447651047, "grad_norm": 1.2132002115249634, "learning_rate": 6.826716533530149e-06, "loss": 0.4741, "step": 2127 }, { "epoch": 0.12767744645107099, "grad_norm": 1.519992709159851, "learning_rate": 6.826505106335567e-06, "loss": 0.5087, "step": 2128 }, { "epoch": 0.12773744525109498, "grad_norm": 1.2075014114379883, "learning_rate": 6.826293553514042e-06, "loss": 0.4464, "step": 2129 }, { "epoch": 0.12779744405111898, "grad_norm": 1.3394508361816406, "learning_rate": 6.826081875073564e-06, "loss": 0.5075, "step": 2130 }, { "epoch": 0.12785744285114298, "grad_norm": 1.2233912944793701, "learning_rate": 6.825870071022128e-06, "loss": 0.4106, "step": 2131 }, { "epoch": 0.12791744165116697, "grad_norm": 1.4124109745025635, "learning_rate": 6.82565814136773e-06, "loss": 0.5227, "step": 2132 }, { "epoch": 0.12797744045119097, "grad_norm": 1.3660907745361328, "learning_rate": 6.825446086118377e-06, "loss": 0.4716, "step": 2133 }, { "epoch": 0.12803743925121497, "grad_norm": 1.209736704826355, "learning_rate": 6.825233905282076e-06, "loss": 0.4499, "step": 2134 }, { "epoch": 0.12809743805123897, "grad_norm": 1.2144049406051636, "learning_rate": 6.82502159886684e-06, "loss": 0.4404, "step": 2135 }, { "epoch": 0.12815743685126296, "grad_norm": 1.288614273071289, "learning_rate": 6.824809166880687e-06, "loss": 0.5134, "step": 2136 }, { "epoch": 0.128217435651287, "grad_norm": 1.120805263519287, "learning_rate": 6.824596609331639e-06, "loss": 0.4431, "step": 2137 }, { "epoch": 0.12827743445131098, "grad_norm": 1.2858620882034302, "learning_rate": 6.8243839262277246e-06, "loss": 0.4684, "step": 2138 }, { "epoch": 0.12833743325133498, "grad_norm": 1.166275978088379, "learning_rate": 6.824171117576975e-06, "loss": 0.4571, "step": 2139 }, { "epoch": 0.12839743205135898, "grad_norm": 1.2067310810089111, "learning_rate": 6.823958183387427e-06, "loss": 0.4559, "step": 2140 }, { "epoch": 0.12845743085138298, "grad_norm": 1.324031114578247, "learning_rate": 6.823745123667122e-06, "loss": 0.4486, "step": 2141 }, { "epoch": 0.12851742965140697, "grad_norm": 1.3366315364837646, "learning_rate": 6.823531938424107e-06, "loss": 0.5141, "step": 2142 }, { "epoch": 0.12857742845143097, "grad_norm": 1.2435293197631836, "learning_rate": 6.823318627666433e-06, "loss": 0.5632, "step": 2143 }, { "epoch": 0.12863742725145497, "grad_norm": 1.4159685373306274, "learning_rate": 6.823105191402156e-06, "loss": 0.4332, "step": 2144 }, { "epoch": 0.12869742605147896, "grad_norm": 1.3070603609085083, "learning_rate": 6.822891629639334e-06, "loss": 0.4372, "step": 2145 }, { "epoch": 0.12875742485150296, "grad_norm": 1.3480280637741089, "learning_rate": 6.822677942386036e-06, "loss": 0.499, "step": 2146 }, { "epoch": 0.12881742365152696, "grad_norm": 1.2684297561645508, "learning_rate": 6.822464129650329e-06, "loss": 0.4911, "step": 2147 }, { "epoch": 0.12887742245155098, "grad_norm": 1.6236913204193115, "learning_rate": 6.8222501914402896e-06, "loss": 0.4945, "step": 2148 }, { "epoch": 0.12893742125157498, "grad_norm": 1.374553918838501, "learning_rate": 6.822036127763996e-06, "loss": 0.455, "step": 2149 }, { "epoch": 0.12899742005159898, "grad_norm": 1.288693904876709, "learning_rate": 6.821821938629535e-06, "loss": 0.4514, "step": 2150 }, { "epoch": 0.12905741885162297, "grad_norm": 1.302626132965088, "learning_rate": 6.821607624044992e-06, "loss": 0.4967, "step": 2151 }, { "epoch": 0.12911741765164697, "grad_norm": 1.2811309099197388, "learning_rate": 6.821393184018461e-06, "loss": 0.5025, "step": 2152 }, { "epoch": 0.12917741645167097, "grad_norm": 1.3305903673171997, "learning_rate": 6.821178618558043e-06, "loss": 0.4738, "step": 2153 }, { "epoch": 0.12923741525169496, "grad_norm": 1.2114609479904175, "learning_rate": 6.82096392767184e-06, "loss": 0.5162, "step": 2154 }, { "epoch": 0.12929741405171896, "grad_norm": 1.220075249671936, "learning_rate": 6.820749111367959e-06, "loss": 0.4838, "step": 2155 }, { "epoch": 0.12935741285174296, "grad_norm": 1.1229292154312134, "learning_rate": 6.820534169654514e-06, "loss": 0.4065, "step": 2156 }, { "epoch": 0.12941741165176696, "grad_norm": 1.2005561590194702, "learning_rate": 6.820319102539622e-06, "loss": 0.5133, "step": 2157 }, { "epoch": 0.12947741045179095, "grad_norm": 1.1733778715133667, "learning_rate": 6.820103910031405e-06, "loss": 0.4887, "step": 2158 }, { "epoch": 0.12953740925181495, "grad_norm": 1.193386197090149, "learning_rate": 6.819888592137989e-06, "loss": 0.4629, "step": 2159 }, { "epoch": 0.12959740805183897, "grad_norm": 1.3280836343765259, "learning_rate": 6.819673148867506e-06, "loss": 0.4739, "step": 2160 }, { "epoch": 0.12965740685186297, "grad_norm": 1.232338309288025, "learning_rate": 6.819457580228094e-06, "loss": 0.4872, "step": 2161 }, { "epoch": 0.12971740565188697, "grad_norm": 1.3354893922805786, "learning_rate": 6.81924188622789e-06, "loss": 0.4912, "step": 2162 }, { "epoch": 0.12977740445191097, "grad_norm": 1.1662170886993408, "learning_rate": 6.819026066875045e-06, "loss": 0.4707, "step": 2163 }, { "epoch": 0.12983740325193496, "grad_norm": 1.2345778942108154, "learning_rate": 6.818810122177706e-06, "loss": 0.4754, "step": 2164 }, { "epoch": 0.12989740205195896, "grad_norm": 1.2259823083877563, "learning_rate": 6.818594052144028e-06, "loss": 0.4436, "step": 2165 }, { "epoch": 0.12995740085198296, "grad_norm": 1.2961827516555786, "learning_rate": 6.818377856782173e-06, "loss": 0.4599, "step": 2166 }, { "epoch": 0.13001739965200695, "grad_norm": 1.35842764377594, "learning_rate": 6.818161536100304e-06, "loss": 0.4845, "step": 2167 }, { "epoch": 0.13007739845203095, "grad_norm": 1.4370596408843994, "learning_rate": 6.8179450901065934e-06, "loss": 0.4845, "step": 2168 }, { "epoch": 0.13013739725205495, "grad_norm": 1.392432689666748, "learning_rate": 6.8177285188092115e-06, "loss": 0.4736, "step": 2169 }, { "epoch": 0.13019739605207895, "grad_norm": 1.2897998094558716, "learning_rate": 6.81751182221634e-06, "loss": 0.4381, "step": 2170 }, { "epoch": 0.13025739485210297, "grad_norm": 1.2702624797821045, "learning_rate": 6.8172950003361615e-06, "loss": 0.4783, "step": 2171 }, { "epoch": 0.13031739365212697, "grad_norm": 1.3700361251831055, "learning_rate": 6.817078053176864e-06, "loss": 0.5197, "step": 2172 }, { "epoch": 0.13037739245215096, "grad_norm": 1.2630562782287598, "learning_rate": 6.81686098074664e-06, "loss": 0.5047, "step": 2173 }, { "epoch": 0.13043739125217496, "grad_norm": 1.2749732732772827, "learning_rate": 6.816643783053689e-06, "loss": 0.4928, "step": 2174 }, { "epoch": 0.13049739005219896, "grad_norm": 1.1917898654937744, "learning_rate": 6.816426460106214e-06, "loss": 0.449, "step": 2175 }, { "epoch": 0.13055738885222296, "grad_norm": 1.3038051128387451, "learning_rate": 6.81620901191242e-06, "loss": 0.4582, "step": 2176 }, { "epoch": 0.13061738765224695, "grad_norm": 1.2730677127838135, "learning_rate": 6.815991438480521e-06, "loss": 0.4584, "step": 2177 }, { "epoch": 0.13067738645227095, "grad_norm": 1.3505237102508545, "learning_rate": 6.815773739818733e-06, "loss": 0.5157, "step": 2178 }, { "epoch": 0.13073738525229495, "grad_norm": 1.314908504486084, "learning_rate": 6.815555915935277e-06, "loss": 0.5188, "step": 2179 }, { "epoch": 0.13079738405231894, "grad_norm": 1.205268144607544, "learning_rate": 6.81533796683838e-06, "loss": 0.4273, "step": 2180 }, { "epoch": 0.13085738285234294, "grad_norm": 1.2555419206619263, "learning_rate": 6.815119892536273e-06, "loss": 0.4892, "step": 2181 }, { "epoch": 0.13091738165236697, "grad_norm": 1.2749930620193481, "learning_rate": 6.8149016930371905e-06, "loss": 0.4314, "step": 2182 }, { "epoch": 0.13097738045239096, "grad_norm": 1.1208325624465942, "learning_rate": 6.814683368349374e-06, "loss": 0.4733, "step": 2183 }, { "epoch": 0.13103737925241496, "grad_norm": 1.3502947092056274, "learning_rate": 6.814464918481068e-06, "loss": 0.4642, "step": 2184 }, { "epoch": 0.13109737805243896, "grad_norm": 1.4395029544830322, "learning_rate": 6.814246343440522e-06, "loss": 0.497, "step": 2185 }, { "epoch": 0.13115737685246295, "grad_norm": 1.3030122518539429, "learning_rate": 6.814027643235993e-06, "loss": 0.4924, "step": 2186 }, { "epoch": 0.13121737565248695, "grad_norm": 1.3465484380722046, "learning_rate": 6.813808817875738e-06, "loss": 0.5068, "step": 2187 }, { "epoch": 0.13127737445251095, "grad_norm": 1.1889740228652954, "learning_rate": 6.813589867368021e-06, "loss": 0.4607, "step": 2188 }, { "epoch": 0.13133737325253494, "grad_norm": 1.2985169887542725, "learning_rate": 6.813370791721112e-06, "loss": 0.4659, "step": 2189 }, { "epoch": 0.13139737205255894, "grad_norm": 1.2045751810073853, "learning_rate": 6.813151590943284e-06, "loss": 0.4735, "step": 2190 }, { "epoch": 0.13145737085258294, "grad_norm": 1.2974480390548706, "learning_rate": 6.812932265042814e-06, "loss": 0.4732, "step": 2191 }, { "epoch": 0.13151736965260694, "grad_norm": 1.381632685661316, "learning_rate": 6.812712814027988e-06, "loss": 0.5245, "step": 2192 }, { "epoch": 0.13157736845263096, "grad_norm": 1.2445100545883179, "learning_rate": 6.8124932379070905e-06, "loss": 0.468, "step": 2193 }, { "epoch": 0.13163736725265496, "grad_norm": 1.3966692686080933, "learning_rate": 6.812273536688415e-06, "loss": 0.5387, "step": 2194 }, { "epoch": 0.13169736605267895, "grad_norm": 1.292970061302185, "learning_rate": 6.812053710380258e-06, "loss": 0.4752, "step": 2195 }, { "epoch": 0.13175736485270295, "grad_norm": 1.2839001417160034, "learning_rate": 6.811833758990923e-06, "loss": 0.4853, "step": 2196 }, { "epoch": 0.13181736365272695, "grad_norm": 1.1843221187591553, "learning_rate": 6.811613682528716e-06, "loss": 0.4332, "step": 2197 }, { "epoch": 0.13187736245275095, "grad_norm": 1.37429940700531, "learning_rate": 6.811393481001947e-06, "loss": 0.5172, "step": 2198 }, { "epoch": 0.13193736125277494, "grad_norm": 1.387078881263733, "learning_rate": 6.811173154418934e-06, "loss": 0.5284, "step": 2199 }, { "epoch": 0.13199736005279894, "grad_norm": 1.165852665901184, "learning_rate": 6.810952702787996e-06, "loss": 0.4095, "step": 2200 }, { "epoch": 0.13205735885282294, "grad_norm": 1.4797619581222534, "learning_rate": 6.8107321261174595e-06, "loss": 0.5144, "step": 2201 }, { "epoch": 0.13211735765284693, "grad_norm": 1.3841251134872437, "learning_rate": 6.810511424415653e-06, "loss": 0.5282, "step": 2202 }, { "epoch": 0.13217735645287093, "grad_norm": 1.1952422857284546, "learning_rate": 6.810290597690913e-06, "loss": 0.4771, "step": 2203 }, { "epoch": 0.13223735525289496, "grad_norm": 1.4543087482452393, "learning_rate": 6.81006964595158e-06, "loss": 0.4922, "step": 2204 }, { "epoch": 0.13229735405291895, "grad_norm": 1.3491460084915161, "learning_rate": 6.809848569205996e-06, "loss": 0.4815, "step": 2205 }, { "epoch": 0.13235735285294295, "grad_norm": 1.2871240377426147, "learning_rate": 6.809627367462511e-06, "loss": 0.4602, "step": 2206 }, { "epoch": 0.13241735165296695, "grad_norm": 1.3232747316360474, "learning_rate": 6.8094060407294795e-06, "loss": 0.4864, "step": 2207 }, { "epoch": 0.13247735045299094, "grad_norm": 1.2401574850082397, "learning_rate": 6.809184589015259e-06, "loss": 0.5298, "step": 2208 }, { "epoch": 0.13253734925301494, "grad_norm": 1.2774105072021484, "learning_rate": 6.808963012328214e-06, "loss": 0.4928, "step": 2209 }, { "epoch": 0.13259734805303894, "grad_norm": 1.2094730138778687, "learning_rate": 6.808741310676711e-06, "loss": 0.4569, "step": 2210 }, { "epoch": 0.13265734685306294, "grad_norm": 1.276282548904419, "learning_rate": 6.808519484069123e-06, "loss": 0.4941, "step": 2211 }, { "epoch": 0.13271734565308693, "grad_norm": 1.328934669494629, "learning_rate": 6.808297532513829e-06, "loss": 0.4717, "step": 2212 }, { "epoch": 0.13277734445311093, "grad_norm": 1.285863995552063, "learning_rate": 6.808075456019208e-06, "loss": 0.5034, "step": 2213 }, { "epoch": 0.13283734325313493, "grad_norm": 1.5111850500106812, "learning_rate": 6.80785325459365e-06, "loss": 0.5602, "step": 2214 }, { "epoch": 0.13289734205315892, "grad_norm": 1.2033427953720093, "learning_rate": 6.807630928245544e-06, "loss": 0.4562, "step": 2215 }, { "epoch": 0.13295734085318295, "grad_norm": 1.3217095136642456, "learning_rate": 6.807408476983289e-06, "loss": 0.5148, "step": 2216 }, { "epoch": 0.13301733965320695, "grad_norm": 1.4572299718856812, "learning_rate": 6.807185900815283e-06, "loss": 0.5036, "step": 2217 }, { "epoch": 0.13307733845323094, "grad_norm": 1.3406113386154175, "learning_rate": 6.8069631997499336e-06, "loss": 0.5302, "step": 2218 }, { "epoch": 0.13313733725325494, "grad_norm": 1.2375906705856323, "learning_rate": 6.80674037379565e-06, "loss": 0.5094, "step": 2219 }, { "epoch": 0.13319733605327894, "grad_norm": 1.1670457124710083, "learning_rate": 6.806517422960848e-06, "loss": 0.4593, "step": 2220 }, { "epoch": 0.13325733485330293, "grad_norm": 1.0959935188293457, "learning_rate": 6.8062943472539476e-06, "loss": 0.4984, "step": 2221 }, { "epoch": 0.13331733365332693, "grad_norm": 1.1766858100891113, "learning_rate": 6.806071146683373e-06, "loss": 0.4933, "step": 2222 }, { "epoch": 0.13337733245335093, "grad_norm": 1.2504854202270508, "learning_rate": 6.805847821257555e-06, "loss": 0.4797, "step": 2223 }, { "epoch": 0.13343733125337492, "grad_norm": 1.2834054231643677, "learning_rate": 6.805624370984924e-06, "loss": 0.4711, "step": 2224 }, { "epoch": 0.13349733005339892, "grad_norm": 1.3151201009750366, "learning_rate": 6.805400795873922e-06, "loss": 0.4674, "step": 2225 }, { "epoch": 0.13355732885342292, "grad_norm": 1.21792471408844, "learning_rate": 6.805177095932992e-06, "loss": 0.4417, "step": 2226 }, { "epoch": 0.13361732765344694, "grad_norm": 1.3375986814498901, "learning_rate": 6.80495327117058e-06, "loss": 0.4368, "step": 2227 }, { "epoch": 0.13367732645347094, "grad_norm": 1.1921321153640747, "learning_rate": 6.804729321595142e-06, "loss": 0.4916, "step": 2228 }, { "epoch": 0.13373732525349494, "grad_norm": 1.2217611074447632, "learning_rate": 6.804505247215133e-06, "loss": 0.4213, "step": 2229 }, { "epoch": 0.13379732405351893, "grad_norm": 1.1957597732543945, "learning_rate": 6.804281048039015e-06, "loss": 0.4524, "step": 2230 }, { "epoch": 0.13385732285354293, "grad_norm": 1.2654370069503784, "learning_rate": 6.804056724075257e-06, "loss": 0.4661, "step": 2231 }, { "epoch": 0.13391732165356693, "grad_norm": 1.331771969795227, "learning_rate": 6.803832275332329e-06, "loss": 0.4064, "step": 2232 }, { "epoch": 0.13397732045359093, "grad_norm": 1.1797120571136475, "learning_rate": 6.803607701818709e-06, "loss": 0.4633, "step": 2233 }, { "epoch": 0.13403731925361492, "grad_norm": 1.2239595651626587, "learning_rate": 6.803383003542878e-06, "loss": 0.4588, "step": 2234 }, { "epoch": 0.13409731805363892, "grad_norm": 1.3241978883743286, "learning_rate": 6.80315818051332e-06, "loss": 0.5025, "step": 2235 }, { "epoch": 0.13415731685366292, "grad_norm": 1.2713582515716553, "learning_rate": 6.802933232738527e-06, "loss": 0.4948, "step": 2236 }, { "epoch": 0.1342173156536869, "grad_norm": 1.2258491516113281, "learning_rate": 6.802708160226994e-06, "loss": 0.4762, "step": 2237 }, { "epoch": 0.13427731445371094, "grad_norm": 1.329001545906067, "learning_rate": 6.8024829629872215e-06, "loss": 0.4892, "step": 2238 }, { "epoch": 0.13433731325373494, "grad_norm": 1.213997721672058, "learning_rate": 6.8022576410277124e-06, "loss": 0.4662, "step": 2239 }, { "epoch": 0.13439731205375893, "grad_norm": 1.1889432668685913, "learning_rate": 6.802032194356978e-06, "loss": 0.491, "step": 2240 }, { "epoch": 0.13445731085378293, "grad_norm": 1.195996642112732, "learning_rate": 6.801806622983531e-06, "loss": 0.426, "step": 2241 }, { "epoch": 0.13451730965380693, "grad_norm": 1.3488749265670776, "learning_rate": 6.801580926915892e-06, "loss": 0.4787, "step": 2242 }, { "epoch": 0.13457730845383092, "grad_norm": 1.4764662981033325, "learning_rate": 6.801355106162582e-06, "loss": 0.5028, "step": 2243 }, { "epoch": 0.13463730725385492, "grad_norm": 1.3406821489334106, "learning_rate": 6.801129160732132e-06, "loss": 0.4881, "step": 2244 }, { "epoch": 0.13469730605387892, "grad_norm": 1.306262493133545, "learning_rate": 6.800903090633073e-06, "loss": 0.4722, "step": 2245 }, { "epoch": 0.13475730485390292, "grad_norm": 1.1546237468719482, "learning_rate": 6.800676895873942e-06, "loss": 0.4432, "step": 2246 }, { "epoch": 0.1348173036539269, "grad_norm": 1.3256975412368774, "learning_rate": 6.800450576463284e-06, "loss": 0.5228, "step": 2247 }, { "epoch": 0.1348773024539509, "grad_norm": 1.1979049444198608, "learning_rate": 6.800224132409643e-06, "loss": 0.4341, "step": 2248 }, { "epoch": 0.13493730125397493, "grad_norm": 1.3237626552581787, "learning_rate": 6.799997563721574e-06, "loss": 0.4951, "step": 2249 }, { "epoch": 0.13499730005399893, "grad_norm": 1.2082871198654175, "learning_rate": 6.7997708704076305e-06, "loss": 0.5069, "step": 2250 }, { "epoch": 0.13505729885402293, "grad_norm": 1.3075917959213257, "learning_rate": 6.7995440524763756e-06, "loss": 0.5349, "step": 2251 }, { "epoch": 0.13511729765404693, "grad_norm": 1.4257959127426147, "learning_rate": 6.799317109936374e-06, "loss": 0.4856, "step": 2252 }, { "epoch": 0.13517729645407092, "grad_norm": 1.390946388244629, "learning_rate": 6.799090042796197e-06, "loss": 0.5041, "step": 2253 }, { "epoch": 0.13523729525409492, "grad_norm": 1.3354483842849731, "learning_rate": 6.79886285106442e-06, "loss": 0.4445, "step": 2254 }, { "epoch": 0.13529729405411892, "grad_norm": 1.1149972677230835, "learning_rate": 6.798635534749622e-06, "loss": 0.4684, "step": 2255 }, { "epoch": 0.1353572928541429, "grad_norm": 1.2705522775650024, "learning_rate": 6.7984080938603886e-06, "loss": 0.5138, "step": 2256 }, { "epoch": 0.1354172916541669, "grad_norm": 1.2732441425323486, "learning_rate": 6.7981805284053096e-06, "loss": 0.4845, "step": 2257 }, { "epoch": 0.1354772904541909, "grad_norm": 1.2144843339920044, "learning_rate": 6.797952838392979e-06, "loss": 0.4556, "step": 2258 }, { "epoch": 0.1355372892542149, "grad_norm": 1.3575539588928223, "learning_rate": 6.797725023831994e-06, "loss": 0.4507, "step": 2259 }, { "epoch": 0.1355972880542389, "grad_norm": 1.2898714542388916, "learning_rate": 6.7974970847309595e-06, "loss": 0.53, "step": 2260 }, { "epoch": 0.13565728685426293, "grad_norm": 1.2352436780929565, "learning_rate": 6.797269021098483e-06, "loss": 0.4755, "step": 2261 }, { "epoch": 0.13571728565428692, "grad_norm": 1.3274407386779785, "learning_rate": 6.797040832943179e-06, "loss": 0.4907, "step": 2262 }, { "epoch": 0.13577728445431092, "grad_norm": 1.3123916387557983, "learning_rate": 6.796812520273664e-06, "loss": 0.4863, "step": 2263 }, { "epoch": 0.13583728325433492, "grad_norm": 1.254185676574707, "learning_rate": 6.79658408309856e-06, "loss": 0.5048, "step": 2264 }, { "epoch": 0.13589728205435891, "grad_norm": 1.2808332443237305, "learning_rate": 6.796355521426494e-06, "loss": 0.445, "step": 2265 }, { "epoch": 0.1359572808543829, "grad_norm": 1.2463971376419067, "learning_rate": 6.796126835266098e-06, "loss": 0.4562, "step": 2266 }, { "epoch": 0.1360172796544069, "grad_norm": 1.2409214973449707, "learning_rate": 6.795898024626009e-06, "loss": 0.4099, "step": 2267 }, { "epoch": 0.1360772784544309, "grad_norm": 1.435150384902954, "learning_rate": 6.795669089514868e-06, "loss": 0.5051, "step": 2268 }, { "epoch": 0.1361372772544549, "grad_norm": 1.2960002422332764, "learning_rate": 6.79544002994132e-06, "loss": 0.489, "step": 2269 }, { "epoch": 0.1361972760544789, "grad_norm": 1.1993317604064941, "learning_rate": 6.795210845914016e-06, "loss": 0.4777, "step": 2270 }, { "epoch": 0.1362572748545029, "grad_norm": 1.3535196781158447, "learning_rate": 6.794981537441612e-06, "loss": 0.4766, "step": 2271 }, { "epoch": 0.13631727365452692, "grad_norm": 1.272809624671936, "learning_rate": 6.794752104532766e-06, "loss": 0.4914, "step": 2272 }, { "epoch": 0.13637727245455092, "grad_norm": 1.3798116445541382, "learning_rate": 6.794522547196145e-06, "loss": 0.4686, "step": 2273 }, { "epoch": 0.13643727125457492, "grad_norm": 1.3919965028762817, "learning_rate": 6.794292865440417e-06, "loss": 0.5122, "step": 2274 }, { "epoch": 0.1364972700545989, "grad_norm": 1.1273586750030518, "learning_rate": 6.7940630592742555e-06, "loss": 0.507, "step": 2275 }, { "epoch": 0.1365572688546229, "grad_norm": 1.429455041885376, "learning_rate": 6.793833128706341e-06, "loss": 0.4889, "step": 2276 }, { "epoch": 0.1366172676546469, "grad_norm": 1.3365212678909302, "learning_rate": 6.793603073745355e-06, "loss": 0.5348, "step": 2277 }, { "epoch": 0.1366772664546709, "grad_norm": 1.2898225784301758, "learning_rate": 6.793372894399987e-06, "loss": 0.484, "step": 2278 }, { "epoch": 0.1367372652546949, "grad_norm": 1.3966251611709595, "learning_rate": 6.793142590678929e-06, "loss": 0.5353, "step": 2279 }, { "epoch": 0.1367972640547189, "grad_norm": 1.2977330684661865, "learning_rate": 6.79291216259088e-06, "loss": 0.483, "step": 2280 }, { "epoch": 0.1368572628547429, "grad_norm": 1.246843695640564, "learning_rate": 6.79268161014454e-06, "loss": 0.4943, "step": 2281 }, { "epoch": 0.1369172616547669, "grad_norm": 1.310448408126831, "learning_rate": 6.792450933348616e-06, "loss": 0.5227, "step": 2282 }, { "epoch": 0.13697726045479092, "grad_norm": 1.2183756828308105, "learning_rate": 6.792220132211822e-06, "loss": 0.4726, "step": 2283 }, { "epoch": 0.1370372592548149, "grad_norm": 1.389392614364624, "learning_rate": 6.791989206742873e-06, "loss": 0.4901, "step": 2284 }, { "epoch": 0.1370972580548389, "grad_norm": 1.3144348859786987, "learning_rate": 6.79175815695049e-06, "loss": 0.4528, "step": 2285 }, { "epoch": 0.1371572568548629, "grad_norm": 1.1948572397232056, "learning_rate": 6.7915269828433974e-06, "loss": 0.4531, "step": 2286 }, { "epoch": 0.1372172556548869, "grad_norm": 1.1894198656082153, "learning_rate": 6.791295684430328e-06, "loss": 0.4809, "step": 2287 }, { "epoch": 0.1372772544549109, "grad_norm": 1.2873988151550293, "learning_rate": 6.791064261720016e-06, "loss": 0.5048, "step": 2288 }, { "epoch": 0.1373372532549349, "grad_norm": 1.1742891073226929, "learning_rate": 6.7908327147211996e-06, "loss": 0.4627, "step": 2289 }, { "epoch": 0.1373972520549589, "grad_norm": 1.2450587749481201, "learning_rate": 6.790601043442626e-06, "loss": 0.5193, "step": 2290 }, { "epoch": 0.1374572508549829, "grad_norm": 1.4193060398101807, "learning_rate": 6.790369247893041e-06, "loss": 0.4731, "step": 2291 }, { "epoch": 0.1375172496550069, "grad_norm": 1.2501758337020874, "learning_rate": 6.790137328081203e-06, "loss": 0.5076, "step": 2292 }, { "epoch": 0.1375772484550309, "grad_norm": 1.30014169216156, "learning_rate": 6.789905284015866e-06, "loss": 0.4777, "step": 2293 }, { "epoch": 0.1376372472550549, "grad_norm": 1.3083677291870117, "learning_rate": 6.789673115705795e-06, "loss": 0.447, "step": 2294 }, { "epoch": 0.1376972460550789, "grad_norm": 1.168752908706665, "learning_rate": 6.789440823159759e-06, "loss": 0.4428, "step": 2295 }, { "epoch": 0.1377572448551029, "grad_norm": 1.4145820140838623, "learning_rate": 6.78920840638653e-06, "loss": 0.5459, "step": 2296 }, { "epoch": 0.1378172436551269, "grad_norm": 1.3423339128494263, "learning_rate": 6.788975865394884e-06, "loss": 0.5063, "step": 2297 }, { "epoch": 0.1378772424551509, "grad_norm": 1.316146969795227, "learning_rate": 6.788743200193605e-06, "loss": 0.4876, "step": 2298 }, { "epoch": 0.1379372412551749, "grad_norm": 1.3297784328460693, "learning_rate": 6.788510410791479e-06, "loss": 0.4961, "step": 2299 }, { "epoch": 0.1379972400551989, "grad_norm": 1.4111977815628052, "learning_rate": 6.788277497197297e-06, "loss": 0.4898, "step": 2300 }, { "epoch": 0.1380572388552229, "grad_norm": 1.3203986883163452, "learning_rate": 6.788044459419855e-06, "loss": 0.5096, "step": 2301 }, { "epoch": 0.1381172376552469, "grad_norm": 1.104288101196289, "learning_rate": 6.787811297467955e-06, "loss": 0.4646, "step": 2302 }, { "epoch": 0.13817723645527089, "grad_norm": 1.2744510173797607, "learning_rate": 6.787578011350399e-06, "loss": 0.4617, "step": 2303 }, { "epoch": 0.13823723525529488, "grad_norm": 1.278188705444336, "learning_rate": 6.787344601076002e-06, "loss": 0.4672, "step": 2304 }, { "epoch": 0.1382972340553189, "grad_norm": 1.285744547843933, "learning_rate": 6.7871110666535765e-06, "loss": 0.4629, "step": 2305 }, { "epoch": 0.1383572328553429, "grad_norm": 1.292157530784607, "learning_rate": 6.786877408091941e-06, "loss": 0.5096, "step": 2306 }, { "epoch": 0.1384172316553669, "grad_norm": 1.5050137042999268, "learning_rate": 6.786643625399921e-06, "loss": 0.5073, "step": 2307 }, { "epoch": 0.1384772304553909, "grad_norm": 1.24142587184906, "learning_rate": 6.7864097185863465e-06, "loss": 0.4906, "step": 2308 }, { "epoch": 0.1385372292554149, "grad_norm": 1.2417407035827637, "learning_rate": 6.786175687660049e-06, "loss": 0.5071, "step": 2309 }, { "epoch": 0.1385972280554389, "grad_norm": 1.3491582870483398, "learning_rate": 6.7859415326298656e-06, "loss": 0.4567, "step": 2310 }, { "epoch": 0.1386572268554629, "grad_norm": 1.38102388381958, "learning_rate": 6.785707253504644e-06, "loss": 0.4521, "step": 2311 }, { "epoch": 0.1387172256554869, "grad_norm": 1.2999098300933838, "learning_rate": 6.785472850293228e-06, "loss": 0.4956, "step": 2312 }, { "epoch": 0.13877722445551088, "grad_norm": 1.3455727100372314, "learning_rate": 6.785238323004471e-06, "loss": 0.5535, "step": 2313 }, { "epoch": 0.13883722325553488, "grad_norm": 1.322257399559021, "learning_rate": 6.7850036716472294e-06, "loss": 0.5162, "step": 2314 }, { "epoch": 0.13889722205555888, "grad_norm": 1.3267862796783447, "learning_rate": 6.7847688962303655e-06, "loss": 0.4832, "step": 2315 }, { "epoch": 0.13895722085558287, "grad_norm": 1.2861086130142212, "learning_rate": 6.7845339967627465e-06, "loss": 0.424, "step": 2316 }, { "epoch": 0.1390172196556069, "grad_norm": 1.1722512245178223, "learning_rate": 6.784298973253243e-06, "loss": 0.4581, "step": 2317 }, { "epoch": 0.1390772184556309, "grad_norm": 1.1177397966384888, "learning_rate": 6.7840638257107305e-06, "loss": 0.4645, "step": 2318 }, { "epoch": 0.1391372172556549, "grad_norm": 1.131883978843689, "learning_rate": 6.783828554144089e-06, "loss": 0.514, "step": 2319 }, { "epoch": 0.1391972160556789, "grad_norm": 1.2268248796463013, "learning_rate": 6.783593158562204e-06, "loss": 0.479, "step": 2320 }, { "epoch": 0.1392572148557029, "grad_norm": 1.2247681617736816, "learning_rate": 6.783357638973966e-06, "loss": 0.4569, "step": 2321 }, { "epoch": 0.13931721365572688, "grad_norm": 1.2939789295196533, "learning_rate": 6.783121995388269e-06, "loss": 0.4486, "step": 2322 }, { "epoch": 0.13937721245575088, "grad_norm": 1.1465083360671997, "learning_rate": 6.782886227814011e-06, "loss": 0.5059, "step": 2323 }, { "epoch": 0.13943721125577488, "grad_norm": 1.4309645891189575, "learning_rate": 6.782650336260098e-06, "loss": 0.5168, "step": 2324 }, { "epoch": 0.13949721005579888, "grad_norm": 1.3025716543197632, "learning_rate": 6.782414320735436e-06, "loss": 0.4312, "step": 2325 }, { "epoch": 0.13955720885582287, "grad_norm": 1.2984106540679932, "learning_rate": 6.782178181248942e-06, "loss": 0.4636, "step": 2326 }, { "epoch": 0.13961720765584687, "grad_norm": 1.372394323348999, "learning_rate": 6.78194191780953e-06, "loss": 0.4578, "step": 2327 }, { "epoch": 0.1396772064558709, "grad_norm": 1.5276970863342285, "learning_rate": 6.7817055304261255e-06, "loss": 0.5238, "step": 2328 }, { "epoch": 0.1397372052558949, "grad_norm": 1.3034703731536865, "learning_rate": 6.781469019107653e-06, "loss": 0.4653, "step": 2329 }, { "epoch": 0.1397972040559189, "grad_norm": 1.2956057786941528, "learning_rate": 6.781232383863048e-06, "loss": 0.4429, "step": 2330 }, { "epoch": 0.13985720285594289, "grad_norm": 1.389641523361206, "learning_rate": 6.7809956247012435e-06, "loss": 0.4057, "step": 2331 }, { "epoch": 0.13991720165596688, "grad_norm": 1.1767756938934326, "learning_rate": 6.780758741631182e-06, "loss": 0.4414, "step": 2332 }, { "epoch": 0.13997720045599088, "grad_norm": 1.1887675523757935, "learning_rate": 6.780521734661811e-06, "loss": 0.4984, "step": 2333 }, { "epoch": 0.14003719925601488, "grad_norm": 1.2633558511734009, "learning_rate": 6.78028460380208e-06, "loss": 0.4279, "step": 2334 }, { "epoch": 0.14009719805603887, "grad_norm": 1.2688804864883423, "learning_rate": 6.780047349060945e-06, "loss": 0.4934, "step": 2335 }, { "epoch": 0.14015719685606287, "grad_norm": 1.3102734088897705, "learning_rate": 6.779809970447365e-06, "loss": 0.4921, "step": 2336 }, { "epoch": 0.14021719565608687, "grad_norm": 1.1247191429138184, "learning_rate": 6.779572467970304e-06, "loss": 0.3749, "step": 2337 }, { "epoch": 0.14027719445611087, "grad_norm": 1.3316904306411743, "learning_rate": 6.779334841638734e-06, "loss": 0.4385, "step": 2338 }, { "epoch": 0.1403371932561349, "grad_norm": 1.2946759462356567, "learning_rate": 6.779097091461628e-06, "loss": 0.4686, "step": 2339 }, { "epoch": 0.1403971920561589, "grad_norm": 1.2958322763442993, "learning_rate": 6.778859217447963e-06, "loss": 0.5323, "step": 2340 }, { "epoch": 0.14045719085618288, "grad_norm": 1.2084507942199707, "learning_rate": 6.778621219606724e-06, "loss": 0.4705, "step": 2341 }, { "epoch": 0.14051718965620688, "grad_norm": 1.4541683197021484, "learning_rate": 6.7783830979469e-06, "loss": 0.5102, "step": 2342 }, { "epoch": 0.14057718845623088, "grad_norm": 1.2527927160263062, "learning_rate": 6.778144852477482e-06, "loss": 0.4716, "step": 2343 }, { "epoch": 0.14063718725625488, "grad_norm": 1.2502063512802124, "learning_rate": 6.777906483207468e-06, "loss": 0.5217, "step": 2344 }, { "epoch": 0.14069718605627887, "grad_norm": 1.26762056350708, "learning_rate": 6.777667990145861e-06, "loss": 0.4362, "step": 2345 }, { "epoch": 0.14075718485630287, "grad_norm": 1.3202799558639526, "learning_rate": 6.777429373301666e-06, "loss": 0.5098, "step": 2346 }, { "epoch": 0.14081718365632687, "grad_norm": 1.230130910873413, "learning_rate": 6.777190632683896e-06, "loss": 0.4909, "step": 2347 }, { "epoch": 0.14087718245635086, "grad_norm": 1.1836503744125366, "learning_rate": 6.776951768301567e-06, "loss": 0.4857, "step": 2348 }, { "epoch": 0.14093718125637486, "grad_norm": 1.2045543193817139, "learning_rate": 6.7767127801636985e-06, "loss": 0.4641, "step": 2349 }, { "epoch": 0.14099718005639889, "grad_norm": 1.358977198600769, "learning_rate": 6.776473668279318e-06, "loss": 0.437, "step": 2350 }, { "epoch": 0.14105717885642288, "grad_norm": 1.2257308959960938, "learning_rate": 6.776234432657453e-06, "loss": 0.4535, "step": 2351 }, { "epoch": 0.14111717765644688, "grad_norm": 1.422265887260437, "learning_rate": 6.775995073307141e-06, "loss": 0.4854, "step": 2352 }, { "epoch": 0.14117717645647088, "grad_norm": 1.3817174434661865, "learning_rate": 6.775755590237422e-06, "loss": 0.4966, "step": 2353 }, { "epoch": 0.14123717525649487, "grad_norm": 1.1721512079238892, "learning_rate": 6.775515983457337e-06, "loss": 0.4768, "step": 2354 }, { "epoch": 0.14129717405651887, "grad_norm": 1.2591441869735718, "learning_rate": 6.775276252975936e-06, "loss": 0.5027, "step": 2355 }, { "epoch": 0.14135717285654287, "grad_norm": 1.2973566055297852, "learning_rate": 6.775036398802274e-06, "loss": 0.4828, "step": 2356 }, { "epoch": 0.14141717165656686, "grad_norm": 1.3767536878585815, "learning_rate": 6.774796420945408e-06, "loss": 0.4553, "step": 2357 }, { "epoch": 0.14147717045659086, "grad_norm": 1.2240090370178223, "learning_rate": 6.7745563194144015e-06, "loss": 0.4802, "step": 2358 }, { "epoch": 0.14153716925661486, "grad_norm": 1.2652759552001953, "learning_rate": 6.774316094218322e-06, "loss": 0.4285, "step": 2359 }, { "epoch": 0.14159716805663886, "grad_norm": 1.159904956817627, "learning_rate": 6.77407574536624e-06, "loss": 0.4687, "step": 2360 }, { "epoch": 0.14165716685666288, "grad_norm": 1.2730112075805664, "learning_rate": 6.773835272867234e-06, "loss": 0.4967, "step": 2361 }, { "epoch": 0.14171716565668688, "grad_norm": 1.2832528352737427, "learning_rate": 6.773594676730386e-06, "loss": 0.441, "step": 2362 }, { "epoch": 0.14177716445671087, "grad_norm": 1.1519672870635986, "learning_rate": 6.773353956964781e-06, "loss": 0.462, "step": 2363 }, { "epoch": 0.14183716325673487, "grad_norm": 1.231584906578064, "learning_rate": 6.77311311357951e-06, "loss": 0.4604, "step": 2364 }, { "epoch": 0.14189716205675887, "grad_norm": 1.3617916107177734, "learning_rate": 6.7728721465836705e-06, "loss": 0.4893, "step": 2365 }, { "epoch": 0.14195716085678287, "grad_norm": 1.235746145248413, "learning_rate": 6.77263105598636e-06, "loss": 0.5135, "step": 2366 }, { "epoch": 0.14201715965680686, "grad_norm": 1.2839436531066895, "learning_rate": 6.772389841796684e-06, "loss": 0.4447, "step": 2367 }, { "epoch": 0.14207715845683086, "grad_norm": 1.267945647239685, "learning_rate": 6.772148504023755e-06, "loss": 0.4964, "step": 2368 }, { "epoch": 0.14213715725685486, "grad_norm": 1.2749428749084473, "learning_rate": 6.771907042676682e-06, "loss": 0.4605, "step": 2369 }, { "epoch": 0.14219715605687885, "grad_norm": 1.1741158962249756, "learning_rate": 6.771665457764589e-06, "loss": 0.5131, "step": 2370 }, { "epoch": 0.14225715485690285, "grad_norm": 1.1650123596191406, "learning_rate": 6.771423749296597e-06, "loss": 0.4492, "step": 2371 }, { "epoch": 0.14231715365692685, "grad_norm": 1.1451085805892944, "learning_rate": 6.771181917281833e-06, "loss": 0.4676, "step": 2372 }, { "epoch": 0.14237715245695087, "grad_norm": 1.2171987295150757, "learning_rate": 6.770939961729433e-06, "loss": 0.5024, "step": 2373 }, { "epoch": 0.14243715125697487, "grad_norm": 1.2749338150024414, "learning_rate": 6.770697882648533e-06, "loss": 0.4988, "step": 2374 }, { "epoch": 0.14249715005699887, "grad_norm": 1.2011444568634033, "learning_rate": 6.770455680048274e-06, "loss": 0.5377, "step": 2375 }, { "epoch": 0.14255714885702286, "grad_norm": 1.1841567754745483, "learning_rate": 6.770213353937805e-06, "loss": 0.4921, "step": 2376 }, { "epoch": 0.14261714765704686, "grad_norm": 1.4167847633361816, "learning_rate": 6.769970904326277e-06, "loss": 0.5163, "step": 2377 }, { "epoch": 0.14267714645707086, "grad_norm": 1.4114190340042114, "learning_rate": 6.769728331222846e-06, "loss": 0.4562, "step": 2378 }, { "epoch": 0.14273714525709486, "grad_norm": 1.3399293422698975, "learning_rate": 6.769485634636671e-06, "loss": 0.4799, "step": 2379 }, { "epoch": 0.14279714405711885, "grad_norm": 1.3050223588943481, "learning_rate": 6.769242814576922e-06, "loss": 0.5211, "step": 2380 }, { "epoch": 0.14285714285714285, "grad_norm": 1.2196787595748901, "learning_rate": 6.768999871052764e-06, "loss": 0.4836, "step": 2381 }, { "epoch": 0.14291714165716685, "grad_norm": 1.2598249912261963, "learning_rate": 6.768756804073376e-06, "loss": 0.4617, "step": 2382 }, { "epoch": 0.14297714045719084, "grad_norm": 1.3862180709838867, "learning_rate": 6.768513613647935e-06, "loss": 0.431, "step": 2383 }, { "epoch": 0.14303713925721487, "grad_norm": 1.4058712720870972, "learning_rate": 6.7682702997856264e-06, "loss": 0.5423, "step": 2384 }, { "epoch": 0.14309713805723887, "grad_norm": 1.340469479560852, "learning_rate": 6.768026862495639e-06, "loss": 0.4559, "step": 2385 }, { "epoch": 0.14315713685726286, "grad_norm": 1.2179629802703857, "learning_rate": 6.7677833017871665e-06, "loss": 0.4525, "step": 2386 }, { "epoch": 0.14321713565728686, "grad_norm": 1.300115942955017, "learning_rate": 6.767539617669406e-06, "loss": 0.4419, "step": 2387 }, { "epoch": 0.14327713445731086, "grad_norm": 1.156455397605896, "learning_rate": 6.7672958101515615e-06, "loss": 0.4502, "step": 2388 }, { "epoch": 0.14333713325733485, "grad_norm": 1.235737681388855, "learning_rate": 6.7670518792428405e-06, "loss": 0.4658, "step": 2389 }, { "epoch": 0.14339713205735885, "grad_norm": 1.274819254875183, "learning_rate": 6.766807824952453e-06, "loss": 0.3769, "step": 2390 }, { "epoch": 0.14345713085738285, "grad_norm": 1.2314033508300781, "learning_rate": 6.766563647289619e-06, "loss": 0.4183, "step": 2391 }, { "epoch": 0.14351712965740684, "grad_norm": 1.2418289184570312, "learning_rate": 6.766319346263557e-06, "loss": 0.4544, "step": 2392 }, { "epoch": 0.14357712845743084, "grad_norm": 1.216623306274414, "learning_rate": 6.7660749218834955e-06, "loss": 0.4777, "step": 2393 }, { "epoch": 0.14363712725745484, "grad_norm": 1.3152804374694824, "learning_rate": 6.7658303741586645e-06, "loss": 0.4866, "step": 2394 }, { "epoch": 0.14369712605747886, "grad_norm": 1.2933473587036133, "learning_rate": 6.7655857030982994e-06, "loss": 0.4853, "step": 2395 }, { "epoch": 0.14375712485750286, "grad_norm": 1.3141307830810547, "learning_rate": 6.76534090871164e-06, "loss": 0.4685, "step": 2396 }, { "epoch": 0.14381712365752686, "grad_norm": 1.2037845849990845, "learning_rate": 6.765095991007931e-06, "loss": 0.5083, "step": 2397 }, { "epoch": 0.14387712245755085, "grad_norm": 1.3311537504196167, "learning_rate": 6.7648509499964225e-06, "loss": 0.4652, "step": 2398 }, { "epoch": 0.14393712125757485, "grad_norm": 1.1937209367752075, "learning_rate": 6.764605785686369e-06, "loss": 0.4598, "step": 2399 }, { "epoch": 0.14399712005759885, "grad_norm": 1.281579613685608, "learning_rate": 6.764360498087028e-06, "loss": 0.4712, "step": 2400 }, { "epoch": 0.14405711885762285, "grad_norm": 1.3755062818527222, "learning_rate": 6.764115087207663e-06, "loss": 0.5194, "step": 2401 }, { "epoch": 0.14411711765764684, "grad_norm": 1.2487940788269043, "learning_rate": 6.763869553057543e-06, "loss": 0.4362, "step": 2402 }, { "epoch": 0.14417711645767084, "grad_norm": 1.3211697340011597, "learning_rate": 6.763623895645939e-06, "loss": 0.494, "step": 2403 }, { "epoch": 0.14423711525769484, "grad_norm": 1.2606818675994873, "learning_rate": 6.76337811498213e-06, "loss": 0.488, "step": 2404 }, { "epoch": 0.14429711405771883, "grad_norm": 1.3494455814361572, "learning_rate": 6.763132211075398e-06, "loss": 0.4715, "step": 2405 }, { "epoch": 0.14435711285774286, "grad_norm": 1.2088911533355713, "learning_rate": 6.762886183935029e-06, "loss": 0.4399, "step": 2406 }, { "epoch": 0.14441711165776686, "grad_norm": 1.175696849822998, "learning_rate": 6.762640033570314e-06, "loss": 0.468, "step": 2407 }, { "epoch": 0.14447711045779085, "grad_norm": 1.308242917060852, "learning_rate": 6.7623937599905506e-06, "loss": 0.4659, "step": 2408 }, { "epoch": 0.14453710925781485, "grad_norm": 1.3980594873428345, "learning_rate": 6.762147363205038e-06, "loss": 0.4418, "step": 2409 }, { "epoch": 0.14459710805783885, "grad_norm": 1.3113980293273926, "learning_rate": 6.761900843223082e-06, "loss": 0.5121, "step": 2410 }, { "epoch": 0.14465710685786284, "grad_norm": 1.321953296661377, "learning_rate": 6.761654200053991e-06, "loss": 0.4781, "step": 2411 }, { "epoch": 0.14471710565788684, "grad_norm": 1.406347632408142, "learning_rate": 6.7614074337070814e-06, "loss": 0.5057, "step": 2412 }, { "epoch": 0.14477710445791084, "grad_norm": 1.2612709999084473, "learning_rate": 6.761160544191672e-06, "loss": 0.4924, "step": 2413 }, { "epoch": 0.14483710325793483, "grad_norm": 1.3577136993408203, "learning_rate": 6.760913531517087e-06, "loss": 0.484, "step": 2414 }, { "epoch": 0.14489710205795883, "grad_norm": 1.2971863746643066, "learning_rate": 6.760666395692655e-06, "loss": 0.4654, "step": 2415 }, { "epoch": 0.14495710085798283, "grad_norm": 1.235779047012329, "learning_rate": 6.7604191367277085e-06, "loss": 0.4378, "step": 2416 }, { "epoch": 0.14501709965800683, "grad_norm": 1.2629917860031128, "learning_rate": 6.760171754631585e-06, "loss": 0.4762, "step": 2417 }, { "epoch": 0.14507709845803085, "grad_norm": 1.2381418943405151, "learning_rate": 6.759924249413628e-06, "loss": 0.4414, "step": 2418 }, { "epoch": 0.14513709725805485, "grad_norm": 1.3656134605407715, "learning_rate": 6.759676621083185e-06, "loss": 0.5791, "step": 2419 }, { "epoch": 0.14519709605807885, "grad_norm": 1.4273091554641724, "learning_rate": 6.759428869649605e-06, "loss": 0.5076, "step": 2420 }, { "epoch": 0.14525709485810284, "grad_norm": 1.2699471712112427, "learning_rate": 6.759180995122249e-06, "loss": 0.457, "step": 2421 }, { "epoch": 0.14531709365812684, "grad_norm": 1.4064890146255493, "learning_rate": 6.7589329975104745e-06, "loss": 0.5033, "step": 2422 }, { "epoch": 0.14537709245815084, "grad_norm": 1.2054433822631836, "learning_rate": 6.758684876823649e-06, "loss": 0.4174, "step": 2423 }, { "epoch": 0.14543709125817483, "grad_norm": 1.3057045936584473, "learning_rate": 6.758436633071141e-06, "loss": 0.4389, "step": 2424 }, { "epoch": 0.14549709005819883, "grad_norm": 1.3091634511947632, "learning_rate": 6.758188266262327e-06, "loss": 0.5118, "step": 2425 }, { "epoch": 0.14555708885822283, "grad_norm": 1.3653074502944946, "learning_rate": 6.757939776406587e-06, "loss": 0.5079, "step": 2426 }, { "epoch": 0.14561708765824682, "grad_norm": 1.1982117891311646, "learning_rate": 6.757691163513305e-06, "loss": 0.4658, "step": 2427 }, { "epoch": 0.14567708645827082, "grad_norm": 1.2792720794677734, "learning_rate": 6.75744242759187e-06, "loss": 0.4511, "step": 2428 }, { "epoch": 0.14573708525829485, "grad_norm": 1.1839330196380615, "learning_rate": 6.757193568651674e-06, "loss": 0.504, "step": 2429 }, { "epoch": 0.14579708405831884, "grad_norm": 1.2360323667526245, "learning_rate": 6.756944586702117e-06, "loss": 0.471, "step": 2430 }, { "epoch": 0.14585708285834284, "grad_norm": 1.2824243307113647, "learning_rate": 6.756695481752602e-06, "loss": 0.4694, "step": 2431 }, { "epoch": 0.14591708165836684, "grad_norm": 1.2490404844284058, "learning_rate": 6.756446253812536e-06, "loss": 0.4783, "step": 2432 }, { "epoch": 0.14597708045839083, "grad_norm": 1.1699165105819702, "learning_rate": 6.7561969028913325e-06, "loss": 0.4489, "step": 2433 }, { "epoch": 0.14603707925841483, "grad_norm": 1.3622627258300781, "learning_rate": 6.755947428998406e-06, "loss": 0.4639, "step": 2434 }, { "epoch": 0.14609707805843883, "grad_norm": 1.2980031967163086, "learning_rate": 6.755697832143178e-06, "loss": 0.4637, "step": 2435 }, { "epoch": 0.14615707685846283, "grad_norm": 1.3772169351577759, "learning_rate": 6.7554481123350775e-06, "loss": 0.4747, "step": 2436 }, { "epoch": 0.14621707565848682, "grad_norm": 1.2943211793899536, "learning_rate": 6.755198269583533e-06, "loss": 0.4614, "step": 2437 }, { "epoch": 0.14627707445851082, "grad_norm": 1.1450845003128052, "learning_rate": 6.754948303897981e-06, "loss": 0.4528, "step": 2438 }, { "epoch": 0.14633707325853482, "grad_norm": 1.1980056762695312, "learning_rate": 6.7546982152878615e-06, "loss": 0.4614, "step": 2439 }, { "epoch": 0.14639707205855884, "grad_norm": 1.3982170820236206, "learning_rate": 6.7544480037626175e-06, "loss": 0.4645, "step": 2440 }, { "epoch": 0.14645707085858284, "grad_norm": 1.317228078842163, "learning_rate": 6.754197669331699e-06, "loss": 0.4824, "step": 2441 }, { "epoch": 0.14651706965860684, "grad_norm": 1.3998774290084839, "learning_rate": 6.753947212004563e-06, "loss": 0.4752, "step": 2442 }, { "epoch": 0.14657706845863083, "grad_norm": 1.3048748970031738, "learning_rate": 6.753696631790665e-06, "loss": 0.4496, "step": 2443 }, { "epoch": 0.14663706725865483, "grad_norm": 1.1439871788024902, "learning_rate": 6.753445928699468e-06, "loss": 0.4529, "step": 2444 }, { "epoch": 0.14669706605867883, "grad_norm": 1.1813071966171265, "learning_rate": 6.753195102740442e-06, "loss": 0.4547, "step": 2445 }, { "epoch": 0.14675706485870282, "grad_norm": 1.3880858421325684, "learning_rate": 6.752944153923058e-06, "loss": 0.4982, "step": 2446 }, { "epoch": 0.14681706365872682, "grad_norm": 1.317657470703125, "learning_rate": 6.752693082256793e-06, "loss": 0.5049, "step": 2447 }, { "epoch": 0.14687706245875082, "grad_norm": 1.3009709119796753, "learning_rate": 6.752441887751131e-06, "loss": 0.5175, "step": 2448 }, { "epoch": 0.14693706125877481, "grad_norm": 1.1766082048416138, "learning_rate": 6.752190570415555e-06, "loss": 0.5369, "step": 2449 }, { "epoch": 0.1469970600587988, "grad_norm": 1.4000508785247803, "learning_rate": 6.75193913025956e-06, "loss": 0.4671, "step": 2450 }, { "epoch": 0.14705705885882284, "grad_norm": 1.326926589012146, "learning_rate": 6.7516875672926384e-06, "loss": 0.4923, "step": 2451 }, { "epoch": 0.14711705765884683, "grad_norm": 1.291048288345337, "learning_rate": 6.751435881524293e-06, "loss": 0.4108, "step": 2452 }, { "epoch": 0.14717705645887083, "grad_norm": 1.2053247690200806, "learning_rate": 6.751184072964027e-06, "loss": 0.4638, "step": 2453 }, { "epoch": 0.14723705525889483, "grad_norm": 1.2539904117584229, "learning_rate": 6.750932141621352e-06, "loss": 0.4654, "step": 2454 }, { "epoch": 0.14729705405891882, "grad_norm": 1.2520766258239746, "learning_rate": 6.750680087505781e-06, "loss": 0.4763, "step": 2455 }, { "epoch": 0.14735705285894282, "grad_norm": 1.4322959184646606, "learning_rate": 6.750427910626833e-06, "loss": 0.5194, "step": 2456 }, { "epoch": 0.14741705165896682, "grad_norm": 1.2128586769104004, "learning_rate": 6.750175610994032e-06, "loss": 0.4868, "step": 2457 }, { "epoch": 0.14747705045899082, "grad_norm": 1.2810540199279785, "learning_rate": 6.749923188616905e-06, "loss": 0.4805, "step": 2458 }, { "epoch": 0.1475370492590148, "grad_norm": 1.334574580192566, "learning_rate": 6.749670643504987e-06, "loss": 0.4734, "step": 2459 }, { "epoch": 0.1475970480590388, "grad_norm": 1.3192462921142578, "learning_rate": 6.749417975667813e-06, "loss": 0.4827, "step": 2460 }, { "epoch": 0.1476570468590628, "grad_norm": 1.3355613946914673, "learning_rate": 6.749165185114927e-06, "loss": 0.4562, "step": 2461 }, { "epoch": 0.14771704565908683, "grad_norm": 1.4150909185409546, "learning_rate": 6.748912271855874e-06, "loss": 0.5093, "step": 2462 }, { "epoch": 0.14777704445911083, "grad_norm": 1.449459195137024, "learning_rate": 6.748659235900209e-06, "loss": 0.4769, "step": 2463 }, { "epoch": 0.14783704325913483, "grad_norm": 1.379839301109314, "learning_rate": 6.748406077257483e-06, "loss": 0.4945, "step": 2464 }, { "epoch": 0.14789704205915882, "grad_norm": 1.4220960140228271, "learning_rate": 6.7481527959372614e-06, "loss": 0.4966, "step": 2465 }, { "epoch": 0.14795704085918282, "grad_norm": 1.339298963546753, "learning_rate": 6.747899391949106e-06, "loss": 0.4851, "step": 2466 }, { "epoch": 0.14801703965920682, "grad_norm": 1.2288554906845093, "learning_rate": 6.7476458653025875e-06, "loss": 0.5145, "step": 2467 }, { "epoch": 0.14807703845923081, "grad_norm": 1.3070858716964722, "learning_rate": 6.747392216007282e-06, "loss": 0.4164, "step": 2468 }, { "epoch": 0.1481370372592548, "grad_norm": 1.319023847579956, "learning_rate": 6.747138444072766e-06, "loss": 0.484, "step": 2469 }, { "epoch": 0.1481970360592788, "grad_norm": 1.2393970489501953, "learning_rate": 6.746884549508627e-06, "loss": 0.5048, "step": 2470 }, { "epoch": 0.1482570348593028, "grad_norm": 1.2671476602554321, "learning_rate": 6.74663053232445e-06, "loss": 0.4623, "step": 2471 }, { "epoch": 0.1483170336593268, "grad_norm": 1.2055315971374512, "learning_rate": 6.74637639252983e-06, "loss": 0.4586, "step": 2472 }, { "epoch": 0.1483770324593508, "grad_norm": 1.144666075706482, "learning_rate": 6.746122130134364e-06, "loss": 0.4456, "step": 2473 }, { "epoch": 0.14843703125937482, "grad_norm": 1.3231751918792725, "learning_rate": 6.745867745147654e-06, "loss": 0.4733, "step": 2474 }, { "epoch": 0.14849703005939882, "grad_norm": 1.3578418493270874, "learning_rate": 6.745613237579308e-06, "loss": 0.4905, "step": 2475 }, { "epoch": 0.14855702885942282, "grad_norm": 1.4560973644256592, "learning_rate": 6.745358607438936e-06, "loss": 0.5048, "step": 2476 }, { "epoch": 0.14861702765944682, "grad_norm": 1.2300034761428833, "learning_rate": 6.745103854736156e-06, "loss": 0.4839, "step": 2477 }, { "epoch": 0.1486770264594708, "grad_norm": 1.3434134721755981, "learning_rate": 6.744848979480588e-06, "loss": 0.5101, "step": 2478 }, { "epoch": 0.1487370252594948, "grad_norm": 1.250072717666626, "learning_rate": 6.744593981681857e-06, "loss": 0.4623, "step": 2479 }, { "epoch": 0.1487970240595188, "grad_norm": 1.3471450805664062, "learning_rate": 6.744338861349594e-06, "loss": 0.5164, "step": 2480 }, { "epoch": 0.1488570228595428, "grad_norm": 1.1906962394714355, "learning_rate": 6.744083618493434e-06, "loss": 0.4583, "step": 2481 }, { "epoch": 0.1489170216595668, "grad_norm": 1.2900059223175049, "learning_rate": 6.743828253123016e-06, "loss": 0.4476, "step": 2482 }, { "epoch": 0.1489770204595908, "grad_norm": 1.1422755718231201, "learning_rate": 6.743572765247982e-06, "loss": 0.4568, "step": 2483 }, { "epoch": 0.1490370192596148, "grad_norm": 1.2902016639709473, "learning_rate": 6.743317154877983e-06, "loss": 0.4802, "step": 2484 }, { "epoch": 0.14909701805963882, "grad_norm": 1.3079655170440674, "learning_rate": 6.7430614220226724e-06, "loss": 0.4775, "step": 2485 }, { "epoch": 0.14915701685966282, "grad_norm": 1.3422281742095947, "learning_rate": 6.7428055666917076e-06, "loss": 0.4902, "step": 2486 }, { "epoch": 0.1492170156596868, "grad_norm": 1.3743641376495361, "learning_rate": 6.742549588894749e-06, "loss": 0.5125, "step": 2487 }, { "epoch": 0.1492770144597108, "grad_norm": 1.697386622428894, "learning_rate": 6.742293488641466e-06, "loss": 0.4551, "step": 2488 }, { "epoch": 0.1493370132597348, "grad_norm": 1.1823745965957642, "learning_rate": 6.7420372659415304e-06, "loss": 0.5063, "step": 2489 }, { "epoch": 0.1493970120597588, "grad_norm": 1.4260109663009644, "learning_rate": 6.7417809208046176e-06, "loss": 0.5175, "step": 2490 }, { "epoch": 0.1494570108597828, "grad_norm": 1.2567509412765503, "learning_rate": 6.7415244532404085e-06, "loss": 0.5093, "step": 2491 }, { "epoch": 0.1495170096598068, "grad_norm": 1.3377482891082764, "learning_rate": 6.74126786325859e-06, "loss": 0.4677, "step": 2492 }, { "epoch": 0.1495770084598308, "grad_norm": 1.2843855619430542, "learning_rate": 6.741011150868852e-06, "loss": 0.4734, "step": 2493 }, { "epoch": 0.1496370072598548, "grad_norm": 1.2465791702270508, "learning_rate": 6.740754316080887e-06, "loss": 0.4915, "step": 2494 }, { "epoch": 0.1496970060598788, "grad_norm": 1.197087049484253, "learning_rate": 6.7404973589043974e-06, "loss": 0.4639, "step": 2495 }, { "epoch": 0.14975700485990281, "grad_norm": 1.3564153909683228, "learning_rate": 6.740240279349086e-06, "loss": 0.5108, "step": 2496 }, { "epoch": 0.1498170036599268, "grad_norm": 1.348756194114685, "learning_rate": 6.739983077424662e-06, "loss": 0.4356, "step": 2497 }, { "epoch": 0.1498770024599508, "grad_norm": 1.275692343711853, "learning_rate": 6.7397257531408385e-06, "loss": 0.4802, "step": 2498 }, { "epoch": 0.1499370012599748, "grad_norm": 1.2859611511230469, "learning_rate": 6.7394683065073325e-06, "loss": 0.4703, "step": 2499 }, { "epoch": 0.1499970000599988, "grad_norm": 1.1828829050064087, "learning_rate": 6.739210737533869e-06, "loss": 0.476, "step": 2500 }, { "epoch": 0.1500569988600228, "grad_norm": 1.2662302255630493, "learning_rate": 6.738953046230172e-06, "loss": 0.4761, "step": 2501 }, { "epoch": 0.1501169976600468, "grad_norm": 1.1673524379730225, "learning_rate": 6.738695232605977e-06, "loss": 0.3838, "step": 2502 }, { "epoch": 0.1501769964600708, "grad_norm": 1.3363580703735352, "learning_rate": 6.738437296671018e-06, "loss": 0.4399, "step": 2503 }, { "epoch": 0.1502369952600948, "grad_norm": 1.279104232788086, "learning_rate": 6.738179238435036e-06, "loss": 0.4694, "step": 2504 }, { "epoch": 0.1502969940601188, "grad_norm": 1.216689944267273, "learning_rate": 6.737921057907778e-06, "loss": 0.4698, "step": 2505 }, { "epoch": 0.15035699286014279, "grad_norm": 1.3937078714370728, "learning_rate": 6.737662755098993e-06, "loss": 0.4802, "step": 2506 }, { "epoch": 0.1504169916601668, "grad_norm": 1.4819178581237793, "learning_rate": 6.737404330018436e-06, "loss": 0.5058, "step": 2507 }, { "epoch": 0.1504769904601908, "grad_norm": 1.3538838624954224, "learning_rate": 6.7371457826758684e-06, "loss": 0.505, "step": 2508 }, { "epoch": 0.1505369892602148, "grad_norm": 1.4117237329483032, "learning_rate": 6.736887113081052e-06, "loss": 0.4648, "step": 2509 }, { "epoch": 0.1505969880602388, "grad_norm": 1.3899264335632324, "learning_rate": 6.736628321243756e-06, "loss": 0.502, "step": 2510 }, { "epoch": 0.1506569868602628, "grad_norm": 1.2811787128448486, "learning_rate": 6.736369407173755e-06, "loss": 0.5345, "step": 2511 }, { "epoch": 0.1507169856602868, "grad_norm": 1.5427745580673218, "learning_rate": 6.736110370880827e-06, "loss": 0.4525, "step": 2512 }, { "epoch": 0.1507769844603108, "grad_norm": 1.3909776210784912, "learning_rate": 6.7358512123747534e-06, "loss": 0.5334, "step": 2513 }, { "epoch": 0.1508369832603348, "grad_norm": 1.479507565498352, "learning_rate": 6.735591931665322e-06, "loss": 0.4682, "step": 2514 }, { "epoch": 0.1508969820603588, "grad_norm": 1.3312880992889404, "learning_rate": 6.7353325287623245e-06, "loss": 0.5171, "step": 2515 }, { "epoch": 0.15095698086038278, "grad_norm": 1.4188075065612793, "learning_rate": 6.7350730036755584e-06, "loss": 0.5153, "step": 2516 }, { "epoch": 0.15101697966040678, "grad_norm": 1.3783354759216309, "learning_rate": 6.734813356414823e-06, "loss": 0.4903, "step": 2517 }, { "epoch": 0.1510769784604308, "grad_norm": 1.2674610614776611, "learning_rate": 6.7345535869899256e-06, "loss": 0.5099, "step": 2518 }, { "epoch": 0.1511369772604548, "grad_norm": 1.5029252767562866, "learning_rate": 6.734293695410675e-06, "loss": 0.5304, "step": 2519 }, { "epoch": 0.1511969760604788, "grad_norm": 1.3357197046279907, "learning_rate": 6.734033681686888e-06, "loss": 0.5464, "step": 2520 }, { "epoch": 0.1512569748605028, "grad_norm": 1.151301622390747, "learning_rate": 6.733773545828382e-06, "loss": 0.4481, "step": 2521 }, { "epoch": 0.1513169736605268, "grad_norm": 1.2605069875717163, "learning_rate": 6.733513287844983e-06, "loss": 0.4679, "step": 2522 }, { "epoch": 0.1513769724605508, "grad_norm": 1.2068631649017334, "learning_rate": 6.733252907746518e-06, "loss": 0.4644, "step": 2523 }, { "epoch": 0.1514369712605748, "grad_norm": 1.2139062881469727, "learning_rate": 6.732992405542822e-06, "loss": 0.4375, "step": 2524 }, { "epoch": 0.15149697006059878, "grad_norm": 1.1668659448623657, "learning_rate": 6.732731781243732e-06, "loss": 0.4657, "step": 2525 }, { "epoch": 0.15155696886062278, "grad_norm": 1.3105812072753906, "learning_rate": 6.732471034859091e-06, "loss": 0.499, "step": 2526 }, { "epoch": 0.15161696766064678, "grad_norm": 1.3207112550735474, "learning_rate": 6.732210166398746e-06, "loss": 0.5028, "step": 2527 }, { "epoch": 0.15167696646067078, "grad_norm": 1.3923377990722656, "learning_rate": 6.73194917587255e-06, "loss": 0.4672, "step": 2528 }, { "epoch": 0.15173696526069477, "grad_norm": 1.2770612239837646, "learning_rate": 6.731688063290357e-06, "loss": 0.442, "step": 2529 }, { "epoch": 0.1517969640607188, "grad_norm": 1.1910820007324219, "learning_rate": 6.73142682866203e-06, "loss": 0.4204, "step": 2530 }, { "epoch": 0.1518569628607428, "grad_norm": 1.3826185464859009, "learning_rate": 6.731165471997434e-06, "loss": 0.4309, "step": 2531 }, { "epoch": 0.1519169616607668, "grad_norm": 1.285003900527954, "learning_rate": 6.7309039933064385e-06, "loss": 0.4648, "step": 2532 }, { "epoch": 0.1519769604607908, "grad_norm": 1.3086844682693481, "learning_rate": 6.730642392598921e-06, "loss": 0.4553, "step": 2533 }, { "epoch": 0.15203695926081479, "grad_norm": 1.2751740217208862, "learning_rate": 6.730380669884757e-06, "loss": 0.4732, "step": 2534 }, { "epoch": 0.15209695806083878, "grad_norm": 1.2280744314193726, "learning_rate": 6.730118825173834e-06, "loss": 0.4573, "step": 2535 }, { "epoch": 0.15215695686086278, "grad_norm": 1.2422335147857666, "learning_rate": 6.729856858476039e-06, "loss": 0.5372, "step": 2536 }, { "epoch": 0.15221695566088678, "grad_norm": 1.193542718887329, "learning_rate": 6.729594769801265e-06, "loss": 0.4288, "step": 2537 }, { "epoch": 0.15227695446091077, "grad_norm": 1.4165312051773071, "learning_rate": 6.729332559159412e-06, "loss": 0.4907, "step": 2538 }, { "epoch": 0.15233695326093477, "grad_norm": 1.2462717294692993, "learning_rate": 6.72907022656038e-06, "loss": 0.526, "step": 2539 }, { "epoch": 0.15239695206095877, "grad_norm": 1.296150803565979, "learning_rate": 6.728807772014077e-06, "loss": 0.5066, "step": 2540 }, { "epoch": 0.1524569508609828, "grad_norm": 1.2447367906570435, "learning_rate": 6.728545195530416e-06, "loss": 0.4473, "step": 2541 }, { "epoch": 0.1525169496610068, "grad_norm": 1.2171859741210938, "learning_rate": 6.728282497119311e-06, "loss": 0.4627, "step": 2542 }, { "epoch": 0.1525769484610308, "grad_norm": 1.2276356220245361, "learning_rate": 6.728019676790684e-06, "loss": 0.5128, "step": 2543 }, { "epoch": 0.15263694726105478, "grad_norm": 1.4676313400268555, "learning_rate": 6.727756734554462e-06, "loss": 0.5161, "step": 2544 }, { "epoch": 0.15269694606107878, "grad_norm": 1.3174664974212646, "learning_rate": 6.727493670420573e-06, "loss": 0.4954, "step": 2545 }, { "epoch": 0.15275694486110278, "grad_norm": 1.3024951219558716, "learning_rate": 6.727230484398952e-06, "loss": 0.5128, "step": 2546 }, { "epoch": 0.15281694366112678, "grad_norm": 1.1675447225570679, "learning_rate": 6.72696717649954e-06, "loss": 0.4761, "step": 2547 }, { "epoch": 0.15287694246115077, "grad_norm": 1.248515248298645, "learning_rate": 6.726703746732278e-06, "loss": 0.4879, "step": 2548 }, { "epoch": 0.15293694126117477, "grad_norm": 1.2990086078643799, "learning_rate": 6.726440195107118e-06, "loss": 0.4726, "step": 2549 }, { "epoch": 0.15299694006119877, "grad_norm": 1.2627488374710083, "learning_rate": 6.726176521634011e-06, "loss": 0.4552, "step": 2550 }, { "epoch": 0.15305693886122276, "grad_norm": 1.2547318935394287, "learning_rate": 6.725912726322915e-06, "loss": 0.4645, "step": 2551 }, { "epoch": 0.1531169376612468, "grad_norm": 1.3246852159500122, "learning_rate": 6.725648809183792e-06, "loss": 0.5002, "step": 2552 }, { "epoch": 0.15317693646127079, "grad_norm": 1.2972630262374878, "learning_rate": 6.7253847702266106e-06, "loss": 0.5238, "step": 2553 }, { "epoch": 0.15323693526129478, "grad_norm": 1.483551263809204, "learning_rate": 6.72512060946134e-06, "loss": 0.4756, "step": 2554 }, { "epoch": 0.15329693406131878, "grad_norm": 1.243711233139038, "learning_rate": 6.724856326897958e-06, "loss": 0.4409, "step": 2555 }, { "epoch": 0.15335693286134278, "grad_norm": 1.3438336849212646, "learning_rate": 6.724591922546445e-06, "loss": 0.509, "step": 2556 }, { "epoch": 0.15341693166136677, "grad_norm": 1.5614774227142334, "learning_rate": 6.724327396416787e-06, "loss": 0.4971, "step": 2557 }, { "epoch": 0.15347693046139077, "grad_norm": 1.4199057817459106, "learning_rate": 6.724062748518973e-06, "loss": 0.474, "step": 2558 }, { "epoch": 0.15353692926141477, "grad_norm": 1.3027839660644531, "learning_rate": 6.723797978862998e-06, "loss": 0.4713, "step": 2559 }, { "epoch": 0.15359692806143876, "grad_norm": 1.2458839416503906, "learning_rate": 6.723533087458859e-06, "loss": 0.5374, "step": 2560 }, { "epoch": 0.15365692686146276, "grad_norm": 1.3494373559951782, "learning_rate": 6.723268074316564e-06, "loss": 0.47, "step": 2561 }, { "epoch": 0.15371692566148676, "grad_norm": 1.3220059871673584, "learning_rate": 6.723002939446118e-06, "loss": 0.4421, "step": 2562 }, { "epoch": 0.15377692446151078, "grad_norm": 1.255314588546753, "learning_rate": 6.722737682857534e-06, "loss": 0.4978, "step": 2563 }, { "epoch": 0.15383692326153478, "grad_norm": 1.2622120380401611, "learning_rate": 6.722472304560832e-06, "loss": 0.4889, "step": 2564 }, { "epoch": 0.15389692206155878, "grad_norm": 1.328932523727417, "learning_rate": 6.722206804566033e-06, "loss": 0.4344, "step": 2565 }, { "epoch": 0.15395692086158277, "grad_norm": 1.3250073194503784, "learning_rate": 6.721941182883162e-06, "loss": 0.4551, "step": 2566 }, { "epoch": 0.15401691966160677, "grad_norm": 1.082113265991211, "learning_rate": 6.721675439522252e-06, "loss": 0.4241, "step": 2567 }, { "epoch": 0.15407691846163077, "grad_norm": 1.195054292678833, "learning_rate": 6.721409574493339e-06, "loss": 0.3997, "step": 2568 }, { "epoch": 0.15413691726165477, "grad_norm": 1.4742146730422974, "learning_rate": 6.721143587806462e-06, "loss": 0.5529, "step": 2569 }, { "epoch": 0.15419691606167876, "grad_norm": 1.1827112436294556, "learning_rate": 6.720877479471668e-06, "loss": 0.4328, "step": 2570 }, { "epoch": 0.15425691486170276, "grad_norm": 1.2798669338226318, "learning_rate": 6.720611249499007e-06, "loss": 0.5271, "step": 2571 }, { "epoch": 0.15431691366172676, "grad_norm": 1.3630164861679077, "learning_rate": 6.72034489789853e-06, "loss": 0.4698, "step": 2572 }, { "epoch": 0.15437691246175075, "grad_norm": 1.2689363956451416, "learning_rate": 6.7200784246802995e-06, "loss": 0.4399, "step": 2573 }, { "epoch": 0.15443691126177475, "grad_norm": 1.3047075271606445, "learning_rate": 6.719811829854377e-06, "loss": 0.4716, "step": 2574 }, { "epoch": 0.15449691006179878, "grad_norm": 1.4104325771331787, "learning_rate": 6.719545113430831e-06, "loss": 0.5021, "step": 2575 }, { "epoch": 0.15455690886182277, "grad_norm": 1.2321200370788574, "learning_rate": 6.719278275419735e-06, "loss": 0.5013, "step": 2576 }, { "epoch": 0.15461690766184677, "grad_norm": 1.3682904243469238, "learning_rate": 6.719011315831165e-06, "loss": 0.515, "step": 2577 }, { "epoch": 0.15467690646187077, "grad_norm": 1.193112850189209, "learning_rate": 6.718744234675203e-06, "loss": 0.4349, "step": 2578 }, { "epoch": 0.15473690526189476, "grad_norm": 1.0896046161651611, "learning_rate": 6.718477031961936e-06, "loss": 0.4439, "step": 2579 }, { "epoch": 0.15479690406191876, "grad_norm": 1.2579058408737183, "learning_rate": 6.718209707701455e-06, "loss": 0.4816, "step": 2580 }, { "epoch": 0.15485690286194276, "grad_norm": 1.3638014793395996, "learning_rate": 6.717942261903856e-06, "loss": 0.4573, "step": 2581 }, { "epoch": 0.15491690166196675, "grad_norm": 1.2641139030456543, "learning_rate": 6.7176746945792375e-06, "loss": 0.4341, "step": 2582 }, { "epoch": 0.15497690046199075, "grad_norm": 1.2936557531356812, "learning_rate": 6.717407005737706e-06, "loss": 0.4725, "step": 2583 }, { "epoch": 0.15503689926201475, "grad_norm": 1.3332597017288208, "learning_rate": 6.717139195389371e-06, "loss": 0.4472, "step": 2584 }, { "epoch": 0.15509689806203875, "grad_norm": 1.2717902660369873, "learning_rate": 6.716871263544346e-06, "loss": 0.4571, "step": 2585 }, { "epoch": 0.15515689686206277, "grad_norm": 1.220988392829895, "learning_rate": 6.716603210212748e-06, "loss": 0.4415, "step": 2586 }, { "epoch": 0.15521689566208677, "grad_norm": 1.2243545055389404, "learning_rate": 6.716335035404702e-06, "loss": 0.4666, "step": 2587 }, { "epoch": 0.15527689446211076, "grad_norm": 1.4178142547607422, "learning_rate": 6.716066739130336e-06, "loss": 0.5143, "step": 2588 }, { "epoch": 0.15533689326213476, "grad_norm": 1.2515665292739868, "learning_rate": 6.715798321399782e-06, "loss": 0.5117, "step": 2589 }, { "epoch": 0.15539689206215876, "grad_norm": 1.2562675476074219, "learning_rate": 6.715529782223175e-06, "loss": 0.4652, "step": 2590 }, { "epoch": 0.15545689086218276, "grad_norm": 1.3327277898788452, "learning_rate": 6.715261121610659e-06, "loss": 0.4717, "step": 2591 }, { "epoch": 0.15551688966220675, "grad_norm": 1.3436157703399658, "learning_rate": 6.7149923395723795e-06, "loss": 0.4872, "step": 2592 }, { "epoch": 0.15557688846223075, "grad_norm": 1.3609155416488647, "learning_rate": 6.7147234361184865e-06, "loss": 0.4609, "step": 2593 }, { "epoch": 0.15563688726225475, "grad_norm": 1.2094757556915283, "learning_rate": 6.714454411259134e-06, "loss": 0.4773, "step": 2594 }, { "epoch": 0.15569688606227874, "grad_norm": 1.2557328939437866, "learning_rate": 6.714185265004486e-06, "loss": 0.5205, "step": 2595 }, { "epoch": 0.15575688486230274, "grad_norm": 1.3862097263336182, "learning_rate": 6.7139159973647024e-06, "loss": 0.4395, "step": 2596 }, { "epoch": 0.15581688366232677, "grad_norm": 1.2118057012557983, "learning_rate": 6.713646608349955e-06, "loss": 0.4244, "step": 2597 }, { "epoch": 0.15587688246235076, "grad_norm": 1.200995922088623, "learning_rate": 6.713377097970416e-06, "loss": 0.4304, "step": 2598 }, { "epoch": 0.15593688126237476, "grad_norm": 1.4180747270584106, "learning_rate": 6.713107466236265e-06, "loss": 0.5131, "step": 2599 }, { "epoch": 0.15599688006239876, "grad_norm": 1.2112644910812378, "learning_rate": 6.712837713157683e-06, "loss": 0.4728, "step": 2600 }, { "epoch": 0.15605687886242275, "grad_norm": 1.2087585926055908, "learning_rate": 6.712567838744859e-06, "loss": 0.4659, "step": 2601 }, { "epoch": 0.15611687766244675, "grad_norm": 1.4042686223983765, "learning_rate": 6.712297843007984e-06, "loss": 0.5105, "step": 2602 }, { "epoch": 0.15617687646247075, "grad_norm": 1.291408658027649, "learning_rate": 6.712027725957255e-06, "loss": 0.4997, "step": 2603 }, { "epoch": 0.15623687526249475, "grad_norm": 1.2262336015701294, "learning_rate": 6.711757487602872e-06, "loss": 0.4759, "step": 2604 }, { "epoch": 0.15629687406251874, "grad_norm": 1.3061243295669556, "learning_rate": 6.711487127955041e-06, "loss": 0.4837, "step": 2605 }, { "epoch": 0.15635687286254274, "grad_norm": 1.3012982606887817, "learning_rate": 6.711216647023973e-06, "loss": 0.5336, "step": 2606 }, { "epoch": 0.15641687166256674, "grad_norm": 1.3506696224212646, "learning_rate": 6.710946044819883e-06, "loss": 0.5028, "step": 2607 }, { "epoch": 0.15647687046259076, "grad_norm": 1.3475632667541504, "learning_rate": 6.71067532135299e-06, "loss": 0.5324, "step": 2608 }, { "epoch": 0.15653686926261476, "grad_norm": 1.3653674125671387, "learning_rate": 6.710404476633517e-06, "loss": 0.4287, "step": 2609 }, { "epoch": 0.15659686806263876, "grad_norm": 1.1654090881347656, "learning_rate": 6.710133510671694e-06, "loss": 0.4466, "step": 2610 }, { "epoch": 0.15665686686266275, "grad_norm": 1.3777625560760498, "learning_rate": 6.709862423477754e-06, "loss": 0.4637, "step": 2611 }, { "epoch": 0.15671686566268675, "grad_norm": 1.3915379047393799, "learning_rate": 6.709591215061934e-06, "loss": 0.4898, "step": 2612 }, { "epoch": 0.15677686446271075, "grad_norm": 1.266775369644165, "learning_rate": 6.709319885434477e-06, "loss": 0.4322, "step": 2613 }, { "epoch": 0.15683686326273474, "grad_norm": 1.3241655826568604, "learning_rate": 6.709048434605629e-06, "loss": 0.502, "step": 2614 }, { "epoch": 0.15689686206275874, "grad_norm": 1.1053537130355835, "learning_rate": 6.708776862585643e-06, "loss": 0.4327, "step": 2615 }, { "epoch": 0.15695686086278274, "grad_norm": 1.2501860857009888, "learning_rate": 6.708505169384773e-06, "loss": 0.5252, "step": 2616 }, { "epoch": 0.15701685966280673, "grad_norm": 1.2382566928863525, "learning_rate": 6.708233355013281e-06, "loss": 0.5069, "step": 2617 }, { "epoch": 0.15707685846283073, "grad_norm": 1.1385735273361206, "learning_rate": 6.707961419481431e-06, "loss": 0.4055, "step": 2618 }, { "epoch": 0.15713685726285476, "grad_norm": 1.3790112733840942, "learning_rate": 6.707689362799496e-06, "loss": 0.4872, "step": 2619 }, { "epoch": 0.15719685606287875, "grad_norm": 1.233611822128296, "learning_rate": 6.707417184977746e-06, "loss": 0.4348, "step": 2620 }, { "epoch": 0.15725685486290275, "grad_norm": 1.1662031412124634, "learning_rate": 6.707144886026462e-06, "loss": 0.4468, "step": 2621 }, { "epoch": 0.15731685366292675, "grad_norm": 1.2943345308303833, "learning_rate": 6.706872465955928e-06, "loss": 0.4813, "step": 2622 }, { "epoch": 0.15737685246295074, "grad_norm": 1.085341453552246, "learning_rate": 6.706599924776432e-06, "loss": 0.4906, "step": 2623 }, { "epoch": 0.15743685126297474, "grad_norm": 1.3937216997146606, "learning_rate": 6.706327262498265e-06, "loss": 0.4897, "step": 2624 }, { "epoch": 0.15749685006299874, "grad_norm": 1.123072862625122, "learning_rate": 6.706054479131726e-06, "loss": 0.4298, "step": 2625 }, { "epoch": 0.15755684886302274, "grad_norm": 1.2406185865402222, "learning_rate": 6.705781574687115e-06, "loss": 0.4377, "step": 2626 }, { "epoch": 0.15761684766304673, "grad_norm": 1.2839809656143188, "learning_rate": 6.70550854917474e-06, "loss": 0.4995, "step": 2627 }, { "epoch": 0.15767684646307073, "grad_norm": 1.2367465496063232, "learning_rate": 6.705235402604912e-06, "loss": 0.4768, "step": 2628 }, { "epoch": 0.15773684526309473, "grad_norm": 1.2865490913391113, "learning_rate": 6.704962134987945e-06, "loss": 0.4999, "step": 2629 }, { "epoch": 0.15779684406311872, "grad_norm": 1.2884068489074707, "learning_rate": 6.70468874633416e-06, "loss": 0.5027, "step": 2630 }, { "epoch": 0.15785684286314275, "grad_norm": 1.2954151630401611, "learning_rate": 6.704415236653882e-06, "loss": 0.4508, "step": 2631 }, { "epoch": 0.15791684166316675, "grad_norm": 1.2277498245239258, "learning_rate": 6.704141605957439e-06, "loss": 0.4849, "step": 2632 }, { "epoch": 0.15797684046319074, "grad_norm": 1.2846097946166992, "learning_rate": 6.703867854255166e-06, "loss": 0.4228, "step": 2633 }, { "epoch": 0.15803683926321474, "grad_norm": 1.3069665431976318, "learning_rate": 6.703593981557402e-06, "loss": 0.4924, "step": 2634 }, { "epoch": 0.15809683806323874, "grad_norm": 1.3727149963378906, "learning_rate": 6.7033199878744874e-06, "loss": 0.5244, "step": 2635 }, { "epoch": 0.15815683686326273, "grad_norm": 1.161131501197815, "learning_rate": 6.703045873216772e-06, "loss": 0.46, "step": 2636 }, { "epoch": 0.15821683566328673, "grad_norm": 1.3982959985733032, "learning_rate": 6.702771637594606e-06, "loss": 0.5531, "step": 2637 }, { "epoch": 0.15827683446331073, "grad_norm": 1.2398053407669067, "learning_rate": 6.702497281018348e-06, "loss": 0.4199, "step": 2638 }, { "epoch": 0.15833683326333473, "grad_norm": 1.3747121095657349, "learning_rate": 6.702222803498358e-06, "loss": 0.4662, "step": 2639 }, { "epoch": 0.15839683206335872, "grad_norm": 1.2629132270812988, "learning_rate": 6.701948205045002e-06, "loss": 0.4878, "step": 2640 }, { "epoch": 0.15845683086338272, "grad_norm": 1.326762318611145, "learning_rate": 6.70167348566865e-06, "loss": 0.4626, "step": 2641 }, { "epoch": 0.15851682966340674, "grad_norm": 1.2311174869537354, "learning_rate": 6.701398645379678e-06, "loss": 0.5052, "step": 2642 }, { "epoch": 0.15857682846343074, "grad_norm": 1.2109941244125366, "learning_rate": 6.701123684188464e-06, "loss": 0.4939, "step": 2643 }, { "epoch": 0.15863682726345474, "grad_norm": 1.198530673980713, "learning_rate": 6.700848602105393e-06, "loss": 0.4801, "step": 2644 }, { "epoch": 0.15869682606347874, "grad_norm": 1.2212436199188232, "learning_rate": 6.700573399140853e-06, "loss": 0.4835, "step": 2645 }, { "epoch": 0.15875682486350273, "grad_norm": 1.3675392866134644, "learning_rate": 6.700298075305238e-06, "loss": 0.4539, "step": 2646 }, { "epoch": 0.15881682366352673, "grad_norm": 1.2574738264083862, "learning_rate": 6.700022630608945e-06, "loss": 0.4577, "step": 2647 }, { "epoch": 0.15887682246355073, "grad_norm": 1.318768858909607, "learning_rate": 6.699747065062377e-06, "loss": 0.4984, "step": 2648 }, { "epoch": 0.15893682126357472, "grad_norm": 1.3045086860656738, "learning_rate": 6.699471378675939e-06, "loss": 0.5206, "step": 2649 }, { "epoch": 0.15899682006359872, "grad_norm": 1.3035390377044678, "learning_rate": 6.699195571460045e-06, "loss": 0.4207, "step": 2650 }, { "epoch": 0.15905681886362272, "grad_norm": 1.372961401939392, "learning_rate": 6.6989196434251095e-06, "loss": 0.4729, "step": 2651 }, { "epoch": 0.15911681766364671, "grad_norm": 1.3675981760025024, "learning_rate": 6.698643594581554e-06, "loss": 0.496, "step": 2652 }, { "epoch": 0.15917681646367074, "grad_norm": 1.4280165433883667, "learning_rate": 6.698367424939801e-06, "loss": 0.4331, "step": 2653 }, { "epoch": 0.15923681526369474, "grad_norm": 1.3000519275665283, "learning_rate": 6.698091134510284e-06, "loss": 0.4446, "step": 2654 }, { "epoch": 0.15929681406371873, "grad_norm": 1.331955909729004, "learning_rate": 6.697814723303434e-06, "loss": 0.4932, "step": 2655 }, { "epoch": 0.15935681286374273, "grad_norm": 1.2997373342514038, "learning_rate": 6.697538191329692e-06, "loss": 0.4609, "step": 2656 }, { "epoch": 0.15941681166376673, "grad_norm": 1.2662711143493652, "learning_rate": 6.6972615385995005e-06, "loss": 0.4828, "step": 2657 }, { "epoch": 0.15947681046379072, "grad_norm": 1.2524347305297852, "learning_rate": 6.696984765123306e-06, "loss": 0.4545, "step": 2658 }, { "epoch": 0.15953680926381472, "grad_norm": 1.2402321100234985, "learning_rate": 6.696707870911564e-06, "loss": 0.4668, "step": 2659 }, { "epoch": 0.15959680806383872, "grad_norm": 1.3295438289642334, "learning_rate": 6.696430855974729e-06, "loss": 0.4961, "step": 2660 }, { "epoch": 0.15965680686386272, "grad_norm": 1.4454978704452515, "learning_rate": 6.696153720323264e-06, "loss": 0.5295, "step": 2661 }, { "epoch": 0.1597168056638867, "grad_norm": 1.2736318111419678, "learning_rate": 6.695876463967635e-06, "loss": 0.4426, "step": 2662 }, { "epoch": 0.1597768044639107, "grad_norm": 1.3556886911392212, "learning_rate": 6.695599086918311e-06, "loss": 0.4557, "step": 2663 }, { "epoch": 0.15983680326393473, "grad_norm": 1.336438536643982, "learning_rate": 6.69532158918577e-06, "loss": 0.467, "step": 2664 }, { "epoch": 0.15989680206395873, "grad_norm": 1.1318581104278564, "learning_rate": 6.695043970780489e-06, "loss": 0.4506, "step": 2665 }, { "epoch": 0.15995680086398273, "grad_norm": 1.2628976106643677, "learning_rate": 6.694766231712954e-06, "loss": 0.4948, "step": 2666 }, { "epoch": 0.16001679966400673, "grad_norm": 1.4310321807861328, "learning_rate": 6.694488371993655e-06, "loss": 0.4993, "step": 2667 }, { "epoch": 0.16007679846403072, "grad_norm": 1.1557872295379639, "learning_rate": 6.694210391633083e-06, "loss": 0.398, "step": 2668 }, { "epoch": 0.16013679726405472, "grad_norm": 1.3289802074432373, "learning_rate": 6.693932290641738e-06, "loss": 0.5058, "step": 2669 }, { "epoch": 0.16019679606407872, "grad_norm": 1.1413098573684692, "learning_rate": 6.693654069030122e-06, "loss": 0.4607, "step": 2670 }, { "epoch": 0.16025679486410271, "grad_norm": 1.1974815130233765, "learning_rate": 6.6933757268087415e-06, "loss": 0.4419, "step": 2671 }, { "epoch": 0.1603167936641267, "grad_norm": 1.2032155990600586, "learning_rate": 6.693097263988109e-06, "loss": 0.4603, "step": 2672 }, { "epoch": 0.1603767924641507, "grad_norm": 1.1859567165374756, "learning_rate": 6.692818680578741e-06, "loss": 0.4194, "step": 2673 }, { "epoch": 0.1604367912641747, "grad_norm": 1.507699966430664, "learning_rate": 6.692539976591158e-06, "loss": 0.4969, "step": 2674 }, { "epoch": 0.1604967900641987, "grad_norm": 1.1955784559249878, "learning_rate": 6.692261152035884e-06, "loss": 0.4386, "step": 2675 }, { "epoch": 0.16055678886422273, "grad_norm": 1.558607816696167, "learning_rate": 6.69198220692345e-06, "loss": 0.4804, "step": 2676 }, { "epoch": 0.16061678766424672, "grad_norm": 1.2827949523925781, "learning_rate": 6.691703141264392e-06, "loss": 0.4749, "step": 2677 }, { "epoch": 0.16067678646427072, "grad_norm": 1.1713874340057373, "learning_rate": 6.691423955069247e-06, "loss": 0.4882, "step": 2678 }, { "epoch": 0.16073678526429472, "grad_norm": 1.2620313167572021, "learning_rate": 6.69114464834856e-06, "loss": 0.4963, "step": 2679 }, { "epoch": 0.16079678406431872, "grad_norm": 1.1808342933654785, "learning_rate": 6.690865221112879e-06, "loss": 0.4425, "step": 2680 }, { "epoch": 0.1608567828643427, "grad_norm": 1.215369462966919, "learning_rate": 6.690585673372755e-06, "loss": 0.4587, "step": 2681 }, { "epoch": 0.1609167816643667, "grad_norm": 1.2488651275634766, "learning_rate": 6.690306005138746e-06, "loss": 0.489, "step": 2682 }, { "epoch": 0.1609767804643907, "grad_norm": 1.383729100227356, "learning_rate": 6.690026216421415e-06, "loss": 0.4791, "step": 2683 }, { "epoch": 0.1610367792644147, "grad_norm": 1.2557759284973145, "learning_rate": 6.689746307231328e-06, "loss": 0.5007, "step": 2684 }, { "epoch": 0.1610967780644387, "grad_norm": 1.3278424739837646, "learning_rate": 6.689466277579054e-06, "loss": 0.4476, "step": 2685 }, { "epoch": 0.1611567768644627, "grad_norm": 1.2887535095214844, "learning_rate": 6.689186127475171e-06, "loss": 0.5093, "step": 2686 }, { "epoch": 0.16121677566448672, "grad_norm": 1.3355084657669067, "learning_rate": 6.688905856930257e-06, "loss": 0.5191, "step": 2687 }, { "epoch": 0.16127677446451072, "grad_norm": 1.1437244415283203, "learning_rate": 6.6886254659548985e-06, "loss": 0.4955, "step": 2688 }, { "epoch": 0.16133677326453472, "grad_norm": 1.133088231086731, "learning_rate": 6.688344954559682e-06, "loss": 0.4652, "step": 2689 }, { "epoch": 0.1613967720645587, "grad_norm": 1.2812596559524536, "learning_rate": 6.688064322755204e-06, "loss": 0.506, "step": 2690 }, { "epoch": 0.1614567708645827, "grad_norm": 1.457473874092102, "learning_rate": 6.6877835705520606e-06, "loss": 0.4858, "step": 2691 }, { "epoch": 0.1615167696646067, "grad_norm": 1.3061541318893433, "learning_rate": 6.687502697960856e-06, "loss": 0.5027, "step": 2692 }, { "epoch": 0.1615767684646307, "grad_norm": 1.3812228441238403, "learning_rate": 6.687221704992197e-06, "loss": 0.4932, "step": 2693 }, { "epoch": 0.1616367672646547, "grad_norm": 1.25275456905365, "learning_rate": 6.686940591656693e-06, "loss": 0.5086, "step": 2694 }, { "epoch": 0.1616967660646787, "grad_norm": 1.4225482940673828, "learning_rate": 6.686659357964964e-06, "loss": 0.4825, "step": 2695 }, { "epoch": 0.1617567648647027, "grad_norm": 1.2878865003585815, "learning_rate": 6.68637800392763e-06, "loss": 0.4586, "step": 2696 }, { "epoch": 0.1618167636647267, "grad_norm": 1.1965539455413818, "learning_rate": 6.686096529555315e-06, "loss": 0.4466, "step": 2697 }, { "epoch": 0.16187676246475072, "grad_norm": 1.3065203428268433, "learning_rate": 6.685814934858651e-06, "loss": 0.4402, "step": 2698 }, { "epoch": 0.16193676126477471, "grad_norm": 1.2499803304672241, "learning_rate": 6.68553321984827e-06, "loss": 0.4846, "step": 2699 }, { "epoch": 0.1619967600647987, "grad_norm": 1.2485202550888062, "learning_rate": 6.685251384534812e-06, "loss": 0.523, "step": 2700 }, { "epoch": 0.1620567588648227, "grad_norm": 1.3592453002929688, "learning_rate": 6.684969428928922e-06, "loss": 0.4804, "step": 2701 }, { "epoch": 0.1621167576648467, "grad_norm": 1.2899408340454102, "learning_rate": 6.684687353041248e-06, "loss": 0.5018, "step": 2702 }, { "epoch": 0.1621767564648707, "grad_norm": 1.3099441528320312, "learning_rate": 6.684405156882443e-06, "loss": 0.5104, "step": 2703 }, { "epoch": 0.1622367552648947, "grad_norm": 1.1906076669692993, "learning_rate": 6.684122840463161e-06, "loss": 0.4316, "step": 2704 }, { "epoch": 0.1622967540649187, "grad_norm": 1.1972756385803223, "learning_rate": 6.683840403794067e-06, "loss": 0.4627, "step": 2705 }, { "epoch": 0.1623567528649427, "grad_norm": 1.1679129600524902, "learning_rate": 6.683557846885827e-06, "loss": 0.436, "step": 2706 }, { "epoch": 0.1624167516649667, "grad_norm": 1.3353826999664307, "learning_rate": 6.683275169749111e-06, "loss": 0.4702, "step": 2707 }, { "epoch": 0.1624767504649907, "grad_norm": 1.4309909343719482, "learning_rate": 6.682992372394595e-06, "loss": 0.4709, "step": 2708 }, { "epoch": 0.1625367492650147, "grad_norm": 1.266641616821289, "learning_rate": 6.682709454832959e-06, "loss": 0.4754, "step": 2709 }, { "epoch": 0.1625967480650387, "grad_norm": 1.1029839515686035, "learning_rate": 6.6824264170748855e-06, "loss": 0.4049, "step": 2710 }, { "epoch": 0.1626567468650627, "grad_norm": 1.2350783348083496, "learning_rate": 6.682143259131068e-06, "loss": 0.4646, "step": 2711 }, { "epoch": 0.1627167456650867, "grad_norm": 1.3685674667358398, "learning_rate": 6.6818599810121955e-06, "loss": 0.5317, "step": 2712 }, { "epoch": 0.1627767444651107, "grad_norm": 1.1695102453231812, "learning_rate": 6.681576582728968e-06, "loss": 0.3929, "step": 2713 }, { "epoch": 0.1628367432651347, "grad_norm": 1.2907789945602417, "learning_rate": 6.681293064292089e-06, "loss": 0.4994, "step": 2714 }, { "epoch": 0.1628967420651587, "grad_norm": 1.369640588760376, "learning_rate": 6.681009425712266e-06, "loss": 0.4948, "step": 2715 }, { "epoch": 0.1629567408651827, "grad_norm": 1.2043513059616089, "learning_rate": 6.680725667000208e-06, "loss": 0.4597, "step": 2716 }, { "epoch": 0.1630167396652067, "grad_norm": 1.2901854515075684, "learning_rate": 6.680441788166633e-06, "loss": 0.4348, "step": 2717 }, { "epoch": 0.1630767384652307, "grad_norm": 1.2907415628433228, "learning_rate": 6.680157789222262e-06, "loss": 0.5119, "step": 2718 }, { "epoch": 0.16313673726525468, "grad_norm": 1.202919363975525, "learning_rate": 6.67987367017782e-06, "loss": 0.484, "step": 2719 }, { "epoch": 0.1631967360652787, "grad_norm": 1.2698172330856323, "learning_rate": 6.679589431044037e-06, "loss": 0.4939, "step": 2720 }, { "epoch": 0.1632567348653027, "grad_norm": 1.0956233739852905, "learning_rate": 6.6793050718316475e-06, "loss": 0.4694, "step": 2721 }, { "epoch": 0.1633167336653267, "grad_norm": 1.2108060121536255, "learning_rate": 6.6790205925513906e-06, "loss": 0.4459, "step": 2722 }, { "epoch": 0.1633767324653507, "grad_norm": 1.248656153678894, "learning_rate": 6.6787359932140085e-06, "loss": 0.475, "step": 2723 }, { "epoch": 0.1634367312653747, "grad_norm": 1.2879966497421265, "learning_rate": 6.678451273830251e-06, "loss": 0.4888, "step": 2724 }, { "epoch": 0.1634967300653987, "grad_norm": 1.3199574947357178, "learning_rate": 6.678166434410869e-06, "loss": 0.4617, "step": 2725 }, { "epoch": 0.1635567288654227, "grad_norm": 1.1947410106658936, "learning_rate": 6.6778814749666225e-06, "loss": 0.4267, "step": 2726 }, { "epoch": 0.1636167276654467, "grad_norm": 1.2876818180084229, "learning_rate": 6.67759639550827e-06, "loss": 0.5031, "step": 2727 }, { "epoch": 0.16367672646547068, "grad_norm": 1.3253529071807861, "learning_rate": 6.6773111960465775e-06, "loss": 0.5318, "step": 2728 }, { "epoch": 0.16373672526549468, "grad_norm": 1.2927513122558594, "learning_rate": 6.677025876592318e-06, "loss": 0.444, "step": 2729 }, { "epoch": 0.16379672406551868, "grad_norm": 1.3011739253997803, "learning_rate": 6.676740437156266e-06, "loss": 0.5022, "step": 2730 }, { "epoch": 0.16385672286554268, "grad_norm": 1.19058358669281, "learning_rate": 6.676454877749201e-06, "loss": 0.5017, "step": 2731 }, { "epoch": 0.1639167216655667, "grad_norm": 1.216972827911377, "learning_rate": 6.676169198381906e-06, "loss": 0.4805, "step": 2732 }, { "epoch": 0.1639767204655907, "grad_norm": 1.223198652267456, "learning_rate": 6.675883399065171e-06, "loss": 0.4687, "step": 2733 }, { "epoch": 0.1640367192656147, "grad_norm": 1.2477396726608276, "learning_rate": 6.67559747980979e-06, "loss": 0.4923, "step": 2734 }, { "epoch": 0.1640967180656387, "grad_norm": 1.2432546615600586, "learning_rate": 6.675311440626561e-06, "loss": 0.4687, "step": 2735 }, { "epoch": 0.1641567168656627, "grad_norm": 1.2388331890106201, "learning_rate": 6.675025281526284e-06, "loss": 0.471, "step": 2736 }, { "epoch": 0.16421671566568669, "grad_norm": 1.121193528175354, "learning_rate": 6.674739002519768e-06, "loss": 0.4483, "step": 2737 }, { "epoch": 0.16427671446571068, "grad_norm": 1.3285924196243286, "learning_rate": 6.6744526036178245e-06, "loss": 0.4463, "step": 2738 }, { "epoch": 0.16433671326573468, "grad_norm": 1.237614393234253, "learning_rate": 6.674166084831268e-06, "loss": 0.5198, "step": 2739 }, { "epoch": 0.16439671206575868, "grad_norm": 1.2183616161346436, "learning_rate": 6.67387944617092e-06, "loss": 0.4373, "step": 2740 }, { "epoch": 0.16445671086578267, "grad_norm": 1.3202149868011475, "learning_rate": 6.673592687647607e-06, "loss": 0.4694, "step": 2741 }, { "epoch": 0.16451670966580667, "grad_norm": 1.2839356660842896, "learning_rate": 6.673305809272155e-06, "loss": 0.4759, "step": 2742 }, { "epoch": 0.1645767084658307, "grad_norm": 1.2789993286132812, "learning_rate": 6.673018811055401e-06, "loss": 0.5057, "step": 2743 }, { "epoch": 0.1646367072658547, "grad_norm": 1.4713162183761597, "learning_rate": 6.672731693008183e-06, "loss": 0.4845, "step": 2744 }, { "epoch": 0.1646967060658787, "grad_norm": 1.3314220905303955, "learning_rate": 6.672444455141343e-06, "loss": 0.4844, "step": 2745 }, { "epoch": 0.1647567048659027, "grad_norm": 1.2797837257385254, "learning_rate": 6.672157097465731e-06, "loss": 0.4585, "step": 2746 }, { "epoch": 0.16481670366592668, "grad_norm": 1.3144373893737793, "learning_rate": 6.671869619992198e-06, "loss": 0.5022, "step": 2747 }, { "epoch": 0.16487670246595068, "grad_norm": 1.2001545429229736, "learning_rate": 6.671582022731599e-06, "loss": 0.4908, "step": 2748 }, { "epoch": 0.16493670126597468, "grad_norm": 1.2139049768447876, "learning_rate": 6.6712943056947965e-06, "loss": 0.4781, "step": 2749 }, { "epoch": 0.16499670006599867, "grad_norm": 1.3174768686294556, "learning_rate": 6.671006468892658e-06, "loss": 0.4742, "step": 2750 }, { "epoch": 0.16505669886602267, "grad_norm": 1.3290491104125977, "learning_rate": 6.670718512336051e-06, "loss": 0.5217, "step": 2751 }, { "epoch": 0.16511669766604667, "grad_norm": 1.1866310834884644, "learning_rate": 6.670430436035851e-06, "loss": 0.3806, "step": 2752 }, { "epoch": 0.16517669646607067, "grad_norm": 1.4001760482788086, "learning_rate": 6.670142240002939e-06, "loss": 0.4941, "step": 2753 }, { "epoch": 0.1652366952660947, "grad_norm": 1.328432559967041, "learning_rate": 6.669853924248198e-06, "loss": 0.4625, "step": 2754 }, { "epoch": 0.1652966940661187, "grad_norm": 1.2283815145492554, "learning_rate": 6.669565488782515e-06, "loss": 0.4559, "step": 2755 }, { "epoch": 0.16535669286614268, "grad_norm": 1.2914366722106934, "learning_rate": 6.669276933616785e-06, "loss": 0.4347, "step": 2756 }, { "epoch": 0.16541669166616668, "grad_norm": 1.2580138444900513, "learning_rate": 6.668988258761904e-06, "loss": 0.4389, "step": 2757 }, { "epoch": 0.16547669046619068, "grad_norm": 1.2837004661560059, "learning_rate": 6.668699464228774e-06, "loss": 0.4735, "step": 2758 }, { "epoch": 0.16553668926621468, "grad_norm": 1.3214435577392578, "learning_rate": 6.668410550028303e-06, "loss": 0.4291, "step": 2759 }, { "epoch": 0.16559668806623867, "grad_norm": 1.3016008138656616, "learning_rate": 6.668121516171399e-06, "loss": 0.5202, "step": 2760 }, { "epoch": 0.16565668686626267, "grad_norm": 1.2429059743881226, "learning_rate": 6.667832362668981e-06, "loss": 0.5213, "step": 2761 }, { "epoch": 0.16571668566628667, "grad_norm": 1.3303430080413818, "learning_rate": 6.6675430895319655e-06, "loss": 0.484, "step": 2762 }, { "epoch": 0.16577668446631066, "grad_norm": 1.3096259832382202, "learning_rate": 6.6672536967712805e-06, "loss": 0.4525, "step": 2763 }, { "epoch": 0.16583668326633466, "grad_norm": 1.2586601972579956, "learning_rate": 6.666964184397853e-06, "loss": 0.5157, "step": 2764 }, { "epoch": 0.16589668206635869, "grad_norm": 1.2543513774871826, "learning_rate": 6.666674552422616e-06, "loss": 0.4542, "step": 2765 }, { "epoch": 0.16595668086638268, "grad_norm": 1.2728136777877808, "learning_rate": 6.666384800856509e-06, "loss": 0.4964, "step": 2766 }, { "epoch": 0.16601667966640668, "grad_norm": 1.4491113424301147, "learning_rate": 6.6660949297104744e-06, "loss": 0.5269, "step": 2767 }, { "epoch": 0.16607667846643068, "grad_norm": 1.1231943368911743, "learning_rate": 6.665804938995458e-06, "loss": 0.4389, "step": 2768 }, { "epoch": 0.16613667726645467, "grad_norm": 1.1439907550811768, "learning_rate": 6.665514828722414e-06, "loss": 0.48, "step": 2769 }, { "epoch": 0.16619667606647867, "grad_norm": 1.2581499814987183, "learning_rate": 6.665224598902295e-06, "loss": 0.5132, "step": 2770 }, { "epoch": 0.16625667486650267, "grad_norm": 1.3308780193328857, "learning_rate": 6.664934249546065e-06, "loss": 0.4606, "step": 2771 }, { "epoch": 0.16631667366652667, "grad_norm": 1.2593499422073364, "learning_rate": 6.664643780664688e-06, "loss": 0.4845, "step": 2772 }, { "epoch": 0.16637667246655066, "grad_norm": 1.0431667566299438, "learning_rate": 6.6643531922691326e-06, "loss": 0.4496, "step": 2773 }, { "epoch": 0.16643667126657466, "grad_norm": 1.3230679035186768, "learning_rate": 6.664062484370374e-06, "loss": 0.5095, "step": 2774 }, { "epoch": 0.16649667006659866, "grad_norm": 1.2508958578109741, "learning_rate": 6.663771656979391e-06, "loss": 0.481, "step": 2775 }, { "epoch": 0.16655666886662268, "grad_norm": 1.2553457021713257, "learning_rate": 6.663480710107167e-06, "loss": 0.4626, "step": 2776 }, { "epoch": 0.16661666766664668, "grad_norm": 1.2292590141296387, "learning_rate": 6.663189643764689e-06, "loss": 0.4571, "step": 2777 }, { "epoch": 0.16667666646667068, "grad_norm": 1.2426797151565552, "learning_rate": 6.66289845796295e-06, "loss": 0.4173, "step": 2778 }, { "epoch": 0.16673666526669467, "grad_norm": 1.2512965202331543, "learning_rate": 6.662607152712945e-06, "loss": 0.4961, "step": 2779 }, { "epoch": 0.16679666406671867, "grad_norm": 1.3973075151443481, "learning_rate": 6.662315728025678e-06, "loss": 0.4701, "step": 2780 }, { "epoch": 0.16685666286674267, "grad_norm": 1.1445633172988892, "learning_rate": 6.662024183912153e-06, "loss": 0.4854, "step": 2781 }, { "epoch": 0.16691666166676666, "grad_norm": 1.3311694860458374, "learning_rate": 6.661732520383381e-06, "loss": 0.4717, "step": 2782 }, { "epoch": 0.16697666046679066, "grad_norm": 1.4926278591156006, "learning_rate": 6.661440737450376e-06, "loss": 0.4807, "step": 2783 }, { "epoch": 0.16703665926681466, "grad_norm": 1.3890061378479004, "learning_rate": 6.661148835124159e-06, "loss": 0.4961, "step": 2784 }, { "epoch": 0.16709665806683865, "grad_norm": 1.1417187452316284, "learning_rate": 6.660856813415751e-06, "loss": 0.4184, "step": 2785 }, { "epoch": 0.16715665686686265, "grad_norm": 1.2512106895446777, "learning_rate": 6.660564672336183e-06, "loss": 0.4386, "step": 2786 }, { "epoch": 0.16721665566688665, "grad_norm": 1.2853000164031982, "learning_rate": 6.6602724118964865e-06, "loss": 0.446, "step": 2787 }, { "epoch": 0.16727665446691067, "grad_norm": 1.2119556665420532, "learning_rate": 6.6599800321076996e-06, "loss": 0.46, "step": 2788 }, { "epoch": 0.16733665326693467, "grad_norm": 1.2490272521972656, "learning_rate": 6.659687532980863e-06, "loss": 0.449, "step": 2789 }, { "epoch": 0.16739665206695867, "grad_norm": 1.1075502634048462, "learning_rate": 6.659394914527025e-06, "loss": 0.453, "step": 2790 }, { "epoch": 0.16745665086698266, "grad_norm": 1.3309714794158936, "learning_rate": 6.659102176757234e-06, "loss": 0.5132, "step": 2791 }, { "epoch": 0.16751664966700666, "grad_norm": 1.3002289533615112, "learning_rate": 6.658809319682547e-06, "loss": 0.4599, "step": 2792 }, { "epoch": 0.16757664846703066, "grad_norm": 1.3162330389022827, "learning_rate": 6.658516343314024e-06, "loss": 0.4905, "step": 2793 }, { "epoch": 0.16763664726705466, "grad_norm": 1.2488861083984375, "learning_rate": 6.658223247662728e-06, "loss": 0.4946, "step": 2794 }, { "epoch": 0.16769664606707865, "grad_norm": 1.2848539352416992, "learning_rate": 6.657930032739729e-06, "loss": 0.5064, "step": 2795 }, { "epoch": 0.16775664486710265, "grad_norm": 1.2561627626419067, "learning_rate": 6.657636698556101e-06, "loss": 0.474, "step": 2796 }, { "epoch": 0.16781664366712665, "grad_norm": 1.1923580169677734, "learning_rate": 6.6573432451229196e-06, "loss": 0.4586, "step": 2797 }, { "epoch": 0.16787664246715064, "grad_norm": 1.261975884437561, "learning_rate": 6.65704967245127e-06, "loss": 0.4894, "step": 2798 }, { "epoch": 0.16793664126717467, "grad_norm": 1.3972362279891968, "learning_rate": 6.656755980552237e-06, "loss": 0.5631, "step": 2799 }, { "epoch": 0.16799664006719867, "grad_norm": 1.3410345315933228, "learning_rate": 6.656462169436913e-06, "loss": 0.4856, "step": 2800 }, { "epoch": 0.16805663886722266, "grad_norm": 1.3073889017105103, "learning_rate": 6.656168239116394e-06, "loss": 0.4741, "step": 2801 }, { "epoch": 0.16811663766724666, "grad_norm": 1.2969688177108765, "learning_rate": 6.655874189601779e-06, "loss": 0.4794, "step": 2802 }, { "epoch": 0.16817663646727066, "grad_norm": 1.223936676979065, "learning_rate": 6.655580020904176e-06, "loss": 0.4454, "step": 2803 }, { "epoch": 0.16823663526729465, "grad_norm": 1.0596736669540405, "learning_rate": 6.6552857330346905e-06, "loss": 0.4223, "step": 2804 }, { "epoch": 0.16829663406731865, "grad_norm": 1.2975760698318481, "learning_rate": 6.654991326004439e-06, "loss": 0.4601, "step": 2805 }, { "epoch": 0.16835663286734265, "grad_norm": 1.2680877447128296, "learning_rate": 6.654696799824539e-06, "loss": 0.4477, "step": 2806 }, { "epoch": 0.16841663166736665, "grad_norm": 1.2513542175292969, "learning_rate": 6.654402154506114e-06, "loss": 0.4305, "step": 2807 }, { "epoch": 0.16847663046739064, "grad_norm": 1.3723160028457642, "learning_rate": 6.654107390060291e-06, "loss": 0.4924, "step": 2808 }, { "epoch": 0.16853662926741464, "grad_norm": 1.3960238695144653, "learning_rate": 6.653812506498201e-06, "loss": 0.4797, "step": 2809 }, { "epoch": 0.16859662806743866, "grad_norm": 1.246679425239563, "learning_rate": 6.653517503830983e-06, "loss": 0.491, "step": 2810 }, { "epoch": 0.16865662686746266, "grad_norm": 1.193798542022705, "learning_rate": 6.653222382069775e-06, "loss": 0.4339, "step": 2811 }, { "epoch": 0.16871662566748666, "grad_norm": 1.3255091905593872, "learning_rate": 6.6529271412257245e-06, "loss": 0.5282, "step": 2812 }, { "epoch": 0.16877662446751066, "grad_norm": 1.1854711771011353, "learning_rate": 6.65263178130998e-06, "loss": 0.4969, "step": 2813 }, { "epoch": 0.16883662326753465, "grad_norm": 1.2082979679107666, "learning_rate": 6.652336302333697e-06, "loss": 0.4371, "step": 2814 }, { "epoch": 0.16889662206755865, "grad_norm": 1.2795510292053223, "learning_rate": 6.652040704308033e-06, "loss": 0.4169, "step": 2815 }, { "epoch": 0.16895662086758265, "grad_norm": 1.301944375038147, "learning_rate": 6.651744987244152e-06, "loss": 0.4904, "step": 2816 }, { "epoch": 0.16901661966760664, "grad_norm": 1.3938019275665283, "learning_rate": 6.651449151153223e-06, "loss": 0.507, "step": 2817 }, { "epoch": 0.16907661846763064, "grad_norm": 1.164832592010498, "learning_rate": 6.651153196046417e-06, "loss": 0.4966, "step": 2818 }, { "epoch": 0.16913661726765464, "grad_norm": 1.1105753183364868, "learning_rate": 6.6508571219349125e-06, "loss": 0.4696, "step": 2819 }, { "epoch": 0.16919661606767863, "grad_norm": 1.1179739236831665, "learning_rate": 6.650560928829889e-06, "loss": 0.4896, "step": 2820 }, { "epoch": 0.16925661486770266, "grad_norm": 1.5075180530548096, "learning_rate": 6.650264616742534e-06, "loss": 0.463, "step": 2821 }, { "epoch": 0.16931661366772666, "grad_norm": 1.243773341178894, "learning_rate": 6.649968185684036e-06, "loss": 0.4431, "step": 2822 }, { "epoch": 0.16937661246775065, "grad_norm": 1.309565544128418, "learning_rate": 6.6496716356655915e-06, "loss": 0.4911, "step": 2823 }, { "epoch": 0.16943661126777465, "grad_norm": 1.260608434677124, "learning_rate": 6.6493749666983976e-06, "loss": 0.453, "step": 2824 }, { "epoch": 0.16949661006779865, "grad_norm": 1.4095820188522339, "learning_rate": 6.649078178793661e-06, "loss": 0.4794, "step": 2825 }, { "epoch": 0.16955660886782264, "grad_norm": 1.3896790742874146, "learning_rate": 6.6487812719625895e-06, "loss": 0.4679, "step": 2826 }, { "epoch": 0.16961660766784664, "grad_norm": 1.3254328966140747, "learning_rate": 6.648484246216394e-06, "loss": 0.421, "step": 2827 }, { "epoch": 0.16967660646787064, "grad_norm": 1.1442610025405884, "learning_rate": 6.648187101566293e-06, "loss": 0.4579, "step": 2828 }, { "epoch": 0.16973660526789464, "grad_norm": 1.2865500450134277, "learning_rate": 6.6478898380235095e-06, "loss": 0.482, "step": 2829 }, { "epoch": 0.16979660406791863, "grad_norm": 1.3290742635726929, "learning_rate": 6.647592455599267e-06, "loss": 0.5428, "step": 2830 }, { "epoch": 0.16985660286794263, "grad_norm": 1.2045893669128418, "learning_rate": 6.647294954304799e-06, "loss": 0.4561, "step": 2831 }, { "epoch": 0.16991660166796663, "grad_norm": 1.271549105644226, "learning_rate": 6.646997334151339e-06, "loss": 0.508, "step": 2832 }, { "epoch": 0.16997660046799065, "grad_norm": 1.2707463502883911, "learning_rate": 6.646699595150128e-06, "loss": 0.5201, "step": 2833 }, { "epoch": 0.17003659926801465, "grad_norm": 1.312709927558899, "learning_rate": 6.646401737312409e-06, "loss": 0.4828, "step": 2834 }, { "epoch": 0.17009659806803865, "grad_norm": 1.240262508392334, "learning_rate": 6.646103760649431e-06, "loss": 0.3806, "step": 2835 }, { "epoch": 0.17015659686806264, "grad_norm": 1.2093151807785034, "learning_rate": 6.645805665172448e-06, "loss": 0.5145, "step": 2836 }, { "epoch": 0.17021659566808664, "grad_norm": 1.2575938701629639, "learning_rate": 6.6455074508927175e-06, "loss": 0.4364, "step": 2837 }, { "epoch": 0.17027659446811064, "grad_norm": 1.3275620937347412, "learning_rate": 6.645209117821501e-06, "loss": 0.4788, "step": 2838 }, { "epoch": 0.17033659326813463, "grad_norm": 1.154941201210022, "learning_rate": 6.644910665970066e-06, "loss": 0.4241, "step": 2839 }, { "epoch": 0.17039659206815863, "grad_norm": 1.330559253692627, "learning_rate": 6.644612095349684e-06, "loss": 0.504, "step": 2840 }, { "epoch": 0.17045659086818263, "grad_norm": 1.2260384559631348, "learning_rate": 6.644313405971628e-06, "loss": 0.4625, "step": 2841 }, { "epoch": 0.17051658966820663, "grad_norm": 1.3222415447235107, "learning_rate": 6.644014597847183e-06, "loss": 0.4119, "step": 2842 }, { "epoch": 0.17057658846823062, "grad_norm": 1.3526716232299805, "learning_rate": 6.6437156709876285e-06, "loss": 0.4995, "step": 2843 }, { "epoch": 0.17063658726825465, "grad_norm": 1.4249939918518066, "learning_rate": 6.643416625404256e-06, "loss": 0.4954, "step": 2844 }, { "epoch": 0.17069658606827864, "grad_norm": 1.2768582105636597, "learning_rate": 6.64311746110836e-06, "loss": 0.3966, "step": 2845 }, { "epoch": 0.17075658486830264, "grad_norm": 1.306065559387207, "learning_rate": 6.642818178111236e-06, "loss": 0.439, "step": 2846 }, { "epoch": 0.17081658366832664, "grad_norm": 1.4728291034698486, "learning_rate": 6.642518776424189e-06, "loss": 0.5054, "step": 2847 }, { "epoch": 0.17087658246835064, "grad_norm": 1.2905957698822021, "learning_rate": 6.642219256058526e-06, "loss": 0.4976, "step": 2848 }, { "epoch": 0.17093658126837463, "grad_norm": 1.391547679901123, "learning_rate": 6.641919617025555e-06, "loss": 0.5532, "step": 2849 }, { "epoch": 0.17099658006839863, "grad_norm": 1.1845077276229858, "learning_rate": 6.6416198593365976e-06, "loss": 0.4727, "step": 2850 }, { "epoch": 0.17105657886842263, "grad_norm": 1.3329156637191772, "learning_rate": 6.641319983002969e-06, "loss": 0.4774, "step": 2851 }, { "epoch": 0.17111657766844662, "grad_norm": 1.1995419263839722, "learning_rate": 6.641019988035999e-06, "loss": 0.4493, "step": 2852 }, { "epoch": 0.17117657646847062, "grad_norm": 1.2957977056503296, "learning_rate": 6.640719874447012e-06, "loss": 0.4238, "step": 2853 }, { "epoch": 0.17123657526849462, "grad_norm": 1.364315390586853, "learning_rate": 6.640419642247346e-06, "loss": 0.4751, "step": 2854 }, { "epoch": 0.17129657406851864, "grad_norm": 1.2941111326217651, "learning_rate": 6.640119291448336e-06, "loss": 0.4821, "step": 2855 }, { "epoch": 0.17135657286854264, "grad_norm": 1.2247469425201416, "learning_rate": 6.639818822061329e-06, "loss": 0.4712, "step": 2856 }, { "epoch": 0.17141657166856664, "grad_norm": 1.2141799926757812, "learning_rate": 6.639518234097668e-06, "loss": 0.4216, "step": 2857 }, { "epoch": 0.17147657046859063, "grad_norm": 1.3271632194519043, "learning_rate": 6.639217527568708e-06, "loss": 0.4747, "step": 2858 }, { "epoch": 0.17153656926861463, "grad_norm": 1.1742579936981201, "learning_rate": 6.638916702485805e-06, "loss": 0.4753, "step": 2859 }, { "epoch": 0.17159656806863863, "grad_norm": 1.1796884536743164, "learning_rate": 6.638615758860318e-06, "loss": 0.4649, "step": 2860 }, { "epoch": 0.17165656686866262, "grad_norm": 1.290270209312439, "learning_rate": 6.638314696703613e-06, "loss": 0.4829, "step": 2861 }, { "epoch": 0.17171656566868662, "grad_norm": 1.3879634141921997, "learning_rate": 6.63801351602706e-06, "loss": 0.4913, "step": 2862 }, { "epoch": 0.17177656446871062, "grad_norm": 1.332367181777954, "learning_rate": 6.637712216842035e-06, "loss": 0.4637, "step": 2863 }, { "epoch": 0.17183656326873462, "grad_norm": 1.3829822540283203, "learning_rate": 6.637410799159913e-06, "loss": 0.4566, "step": 2864 }, { "epoch": 0.1718965620687586, "grad_norm": 1.2866859436035156, "learning_rate": 6.63710926299208e-06, "loss": 0.467, "step": 2865 }, { "epoch": 0.17195656086878264, "grad_norm": 1.330733060836792, "learning_rate": 6.636807608349922e-06, "loss": 0.5256, "step": 2866 }, { "epoch": 0.17201655966880663, "grad_norm": 1.3477649688720703, "learning_rate": 6.636505835244832e-06, "loss": 0.4466, "step": 2867 }, { "epoch": 0.17207655846883063, "grad_norm": 1.3209165334701538, "learning_rate": 6.636203943688207e-06, "loss": 0.5233, "step": 2868 }, { "epoch": 0.17213655726885463, "grad_norm": 1.411192536354065, "learning_rate": 6.635901933691449e-06, "loss": 0.4633, "step": 2869 }, { "epoch": 0.17219655606887863, "grad_norm": 1.2252999544143677, "learning_rate": 6.635599805265959e-06, "loss": 0.4395, "step": 2870 }, { "epoch": 0.17225655486890262, "grad_norm": 1.1757577657699585, "learning_rate": 6.635297558423152e-06, "loss": 0.5, "step": 2871 }, { "epoch": 0.17231655366892662, "grad_norm": 1.1847513914108276, "learning_rate": 6.6349951931744395e-06, "loss": 0.5168, "step": 2872 }, { "epoch": 0.17237655246895062, "grad_norm": 1.3281680345535278, "learning_rate": 6.634692709531243e-06, "loss": 0.4694, "step": 2873 }, { "epoch": 0.1724365512689746, "grad_norm": 1.263171672821045, "learning_rate": 6.634390107504984e-06, "loss": 0.4865, "step": 2874 }, { "epoch": 0.1724965500689986, "grad_norm": 1.2270485162734985, "learning_rate": 6.634087387107091e-06, "loss": 0.4847, "step": 2875 }, { "epoch": 0.1725565488690226, "grad_norm": 1.214795708656311, "learning_rate": 6.633784548348995e-06, "loss": 0.4908, "step": 2876 }, { "epoch": 0.17261654766904663, "grad_norm": 1.3382877111434937, "learning_rate": 6.633481591242135e-06, "loss": 0.5164, "step": 2877 }, { "epoch": 0.17267654646907063, "grad_norm": 1.3873008489608765, "learning_rate": 6.633178515797952e-06, "loss": 0.4914, "step": 2878 }, { "epoch": 0.17273654526909463, "grad_norm": 1.224426507949829, "learning_rate": 6.632875322027891e-06, "loss": 0.4928, "step": 2879 }, { "epoch": 0.17279654406911862, "grad_norm": 1.2966136932373047, "learning_rate": 6.632572009943402e-06, "loss": 0.4654, "step": 2880 }, { "epoch": 0.17285654286914262, "grad_norm": 1.183327078819275, "learning_rate": 6.63226857955594e-06, "loss": 0.471, "step": 2881 }, { "epoch": 0.17291654166916662, "grad_norm": 1.2361868619918823, "learning_rate": 6.631965030876964e-06, "loss": 0.4936, "step": 2882 }, { "epoch": 0.17297654046919061, "grad_norm": 1.289534568786621, "learning_rate": 6.631661363917939e-06, "loss": 0.4674, "step": 2883 }, { "epoch": 0.1730365392692146, "grad_norm": 1.4011988639831543, "learning_rate": 6.6313575786903325e-06, "loss": 0.4976, "step": 2884 }, { "epoch": 0.1730965380692386, "grad_norm": 1.3444277048110962, "learning_rate": 6.631053675205614e-06, "loss": 0.4762, "step": 2885 }, { "epoch": 0.1731565368692626, "grad_norm": 1.152142882347107, "learning_rate": 6.630749653475266e-06, "loss": 0.4599, "step": 2886 }, { "epoch": 0.1732165356692866, "grad_norm": 1.3396657705307007, "learning_rate": 6.630445513510766e-06, "loss": 0.5364, "step": 2887 }, { "epoch": 0.1732765344693106, "grad_norm": 1.3759866952896118, "learning_rate": 6.630141255323602e-06, "loss": 0.5451, "step": 2888 }, { "epoch": 0.17333653326933463, "grad_norm": 1.5067435503005981, "learning_rate": 6.629836878925264e-06, "loss": 0.4992, "step": 2889 }, { "epoch": 0.17339653206935862, "grad_norm": 1.3003240823745728, "learning_rate": 6.629532384327245e-06, "loss": 0.4331, "step": 2890 }, { "epoch": 0.17345653086938262, "grad_norm": 1.190643548965454, "learning_rate": 6.6292277715410466e-06, "loss": 0.4559, "step": 2891 }, { "epoch": 0.17351652966940662, "grad_norm": 1.4038363695144653, "learning_rate": 6.628923040578172e-06, "loss": 0.4572, "step": 2892 }, { "epoch": 0.1735765284694306, "grad_norm": 1.2139556407928467, "learning_rate": 6.62861819145013e-06, "loss": 0.4251, "step": 2893 }, { "epoch": 0.1736365272694546, "grad_norm": 1.2451602220535278, "learning_rate": 6.628313224168433e-06, "loss": 0.4565, "step": 2894 }, { "epoch": 0.1736965260694786, "grad_norm": 1.253948450088501, "learning_rate": 6.628008138744597e-06, "loss": 0.4612, "step": 2895 }, { "epoch": 0.1737565248695026, "grad_norm": 1.32517409324646, "learning_rate": 6.627702935190146e-06, "loss": 0.4312, "step": 2896 }, { "epoch": 0.1738165236695266, "grad_norm": 1.3344310522079468, "learning_rate": 6.627397613516604e-06, "loss": 0.4728, "step": 2897 }, { "epoch": 0.1738765224695506, "grad_norm": 1.3253613710403442, "learning_rate": 6.627092173735502e-06, "loss": 0.5162, "step": 2898 }, { "epoch": 0.1739365212695746, "grad_norm": 1.3777542114257812, "learning_rate": 6.626786615858377e-06, "loss": 0.5282, "step": 2899 }, { "epoch": 0.17399652006959862, "grad_norm": 1.3158173561096191, "learning_rate": 6.626480939896767e-06, "loss": 0.4801, "step": 2900 }, { "epoch": 0.17405651886962262, "grad_norm": 1.2489084005355835, "learning_rate": 6.626175145862215e-06, "loss": 0.4119, "step": 2901 }, { "epoch": 0.17411651766964661, "grad_norm": 1.3296891450881958, "learning_rate": 6.625869233766271e-06, "loss": 0.4716, "step": 2902 }, { "epoch": 0.1741765164696706, "grad_norm": 1.2678046226501465, "learning_rate": 6.625563203620487e-06, "loss": 0.4536, "step": 2903 }, { "epoch": 0.1742365152696946, "grad_norm": 1.180971622467041, "learning_rate": 6.625257055436423e-06, "loss": 0.4737, "step": 2904 }, { "epoch": 0.1742965140697186, "grad_norm": 1.2692255973815918, "learning_rate": 6.624950789225637e-06, "loss": 0.4592, "step": 2905 }, { "epoch": 0.1743565128697426, "grad_norm": 1.2273340225219727, "learning_rate": 6.624644404999697e-06, "loss": 0.4611, "step": 2906 }, { "epoch": 0.1744165116697666, "grad_norm": 1.2228022813796997, "learning_rate": 6.624337902770174e-06, "loss": 0.4495, "step": 2907 }, { "epoch": 0.1744765104697906, "grad_norm": 1.1679222583770752, "learning_rate": 6.624031282548642e-06, "loss": 0.4772, "step": 2908 }, { "epoch": 0.1745365092698146, "grad_norm": 1.3563473224639893, "learning_rate": 6.623724544346683e-06, "loss": 0.4468, "step": 2909 }, { "epoch": 0.1745965080698386, "grad_norm": 1.329564094543457, "learning_rate": 6.623417688175878e-06, "loss": 0.4874, "step": 2910 }, { "epoch": 0.17465650686986262, "grad_norm": 1.2393627166748047, "learning_rate": 6.623110714047818e-06, "loss": 0.481, "step": 2911 }, { "epoch": 0.1747165056698866, "grad_norm": 1.3461894989013672, "learning_rate": 6.6228036219740945e-06, "loss": 0.481, "step": 2912 }, { "epoch": 0.1747765044699106, "grad_norm": 1.4140424728393555, "learning_rate": 6.6224964119663065e-06, "loss": 0.5224, "step": 2913 }, { "epoch": 0.1748365032699346, "grad_norm": 1.2950047254562378, "learning_rate": 6.6221890840360545e-06, "loss": 0.5092, "step": 2914 }, { "epoch": 0.1748965020699586, "grad_norm": 1.3489822149276733, "learning_rate": 6.621881638194946e-06, "loss": 0.4906, "step": 2915 }, { "epoch": 0.1749565008699826, "grad_norm": 1.131277322769165, "learning_rate": 6.621574074454592e-06, "loss": 0.4363, "step": 2916 }, { "epoch": 0.1750164996700066, "grad_norm": 1.3878569602966309, "learning_rate": 6.6212663928266056e-06, "loss": 0.4829, "step": 2917 }, { "epoch": 0.1750764984700306, "grad_norm": 1.2987250089645386, "learning_rate": 6.6209585933226084e-06, "loss": 0.496, "step": 2918 }, { "epoch": 0.1751364972700546, "grad_norm": 1.3264654874801636, "learning_rate": 6.620650675954224e-06, "loss": 0.4781, "step": 2919 }, { "epoch": 0.1751964960700786, "grad_norm": 1.159621238708496, "learning_rate": 6.620342640733082e-06, "loss": 0.4126, "step": 2920 }, { "epoch": 0.17525649487010259, "grad_norm": 1.1274811029434204, "learning_rate": 6.620034487670814e-06, "loss": 0.4265, "step": 2921 }, { "epoch": 0.1753164936701266, "grad_norm": 1.359793782234192, "learning_rate": 6.619726216779058e-06, "loss": 0.4893, "step": 2922 }, { "epoch": 0.1753764924701506, "grad_norm": 1.2850329875946045, "learning_rate": 6.619417828069457e-06, "loss": 0.4891, "step": 2923 }, { "epoch": 0.1754364912701746, "grad_norm": 1.1996196508407593, "learning_rate": 6.6191093215536564e-06, "loss": 0.4385, "step": 2924 }, { "epoch": 0.1754964900701986, "grad_norm": 1.2087939977645874, "learning_rate": 6.618800697243307e-06, "loss": 0.4355, "step": 2925 }, { "epoch": 0.1755564888702226, "grad_norm": 1.39821195602417, "learning_rate": 6.618491955150065e-06, "loss": 0.5013, "step": 2926 }, { "epoch": 0.1756164876702466, "grad_norm": 1.352124571800232, "learning_rate": 6.61818309528559e-06, "loss": 0.545, "step": 2927 }, { "epoch": 0.1756764864702706, "grad_norm": 1.3284391164779663, "learning_rate": 6.617874117661545e-06, "loss": 0.5154, "step": 2928 }, { "epoch": 0.1757364852702946, "grad_norm": 1.351455807685852, "learning_rate": 6.6175650222896e-06, "loss": 0.4992, "step": 2929 }, { "epoch": 0.1757964840703186, "grad_norm": 1.4334968328475952, "learning_rate": 6.617255809181428e-06, "loss": 0.4586, "step": 2930 }, { "epoch": 0.17585648287034258, "grad_norm": 1.4190661907196045, "learning_rate": 6.616946478348706e-06, "loss": 0.4936, "step": 2931 }, { "epoch": 0.17591648167036658, "grad_norm": 1.2617589235305786, "learning_rate": 6.616637029803116e-06, "loss": 0.4678, "step": 2932 }, { "epoch": 0.1759764804703906, "grad_norm": 1.1939198970794678, "learning_rate": 6.616327463556345e-06, "loss": 0.4391, "step": 2933 }, { "epoch": 0.1760364792704146, "grad_norm": 1.294045090675354, "learning_rate": 6.616017779620083e-06, "loss": 0.4659, "step": 2934 }, { "epoch": 0.1760964780704386, "grad_norm": 1.2289310693740845, "learning_rate": 6.6157079780060274e-06, "loss": 0.4899, "step": 2935 }, { "epoch": 0.1761564768704626, "grad_norm": 1.410333275794983, "learning_rate": 6.615398058725876e-06, "loss": 0.5367, "step": 2936 }, { "epoch": 0.1762164756704866, "grad_norm": 1.5503736734390259, "learning_rate": 6.615088021791333e-06, "loss": 0.4781, "step": 2937 }, { "epoch": 0.1762764744705106, "grad_norm": 1.2561795711517334, "learning_rate": 6.614777867214109e-06, "loss": 0.5033, "step": 2938 }, { "epoch": 0.1763364732705346, "grad_norm": 1.2110363245010376, "learning_rate": 6.614467595005914e-06, "loss": 0.4308, "step": 2939 }, { "epoch": 0.17639647207055859, "grad_norm": 1.3187825679779053, "learning_rate": 6.614157205178469e-06, "loss": 0.4654, "step": 2940 }, { "epoch": 0.17645647087058258, "grad_norm": 1.3174594640731812, "learning_rate": 6.613846697743494e-06, "loss": 0.454, "step": 2941 }, { "epoch": 0.17651646967060658, "grad_norm": 1.408671259880066, "learning_rate": 6.613536072712715e-06, "loss": 0.514, "step": 2942 }, { "epoch": 0.17657646847063058, "grad_norm": 1.3000246286392212, "learning_rate": 6.613225330097865e-06, "loss": 0.4859, "step": 2943 }, { "epoch": 0.17663646727065457, "grad_norm": 1.382196307182312, "learning_rate": 6.612914469910677e-06, "loss": 0.5234, "step": 2944 }, { "epoch": 0.1766964660706786, "grad_norm": 1.2447867393493652, "learning_rate": 6.6126034921628925e-06, "loss": 0.4824, "step": 2945 }, { "epoch": 0.1767564648707026, "grad_norm": 1.3535152673721313, "learning_rate": 6.612292396866255e-06, "loss": 0.4852, "step": 2946 }, { "epoch": 0.1768164636707266, "grad_norm": 1.221083164215088, "learning_rate": 6.611981184032514e-06, "loss": 0.4895, "step": 2947 }, { "epoch": 0.1768764624707506, "grad_norm": 1.4299976825714111, "learning_rate": 6.611669853673421e-06, "loss": 0.4905, "step": 2948 }, { "epoch": 0.1769364612707746, "grad_norm": 1.4141570329666138, "learning_rate": 6.611358405800735e-06, "loss": 0.4804, "step": 2949 }, { "epoch": 0.17699646007079858, "grad_norm": 1.1895252466201782, "learning_rate": 6.6110468404262155e-06, "loss": 0.4478, "step": 2950 }, { "epoch": 0.17705645887082258, "grad_norm": 1.3198683261871338, "learning_rate": 6.610735157561633e-06, "loss": 0.4785, "step": 2951 }, { "epoch": 0.17711645767084658, "grad_norm": 1.3019077777862549, "learning_rate": 6.610423357218755e-06, "loss": 0.4622, "step": 2952 }, { "epoch": 0.17717645647087057, "grad_norm": 1.2035175561904907, "learning_rate": 6.610111439409358e-06, "loss": 0.4379, "step": 2953 }, { "epoch": 0.17723645527089457, "grad_norm": 1.3061490058898926, "learning_rate": 6.609799404145222e-06, "loss": 0.487, "step": 2954 }, { "epoch": 0.17729645407091857, "grad_norm": 1.3067368268966675, "learning_rate": 6.609487251438129e-06, "loss": 0.4444, "step": 2955 }, { "epoch": 0.1773564528709426, "grad_norm": 1.2019978761672974, "learning_rate": 6.609174981299872e-06, "loss": 0.4475, "step": 2956 }, { "epoch": 0.1774164516709666, "grad_norm": 1.3168654441833496, "learning_rate": 6.6088625937422386e-06, "loss": 0.4909, "step": 2957 }, { "epoch": 0.1774764504709906, "grad_norm": 1.2949209213256836, "learning_rate": 6.608550088777031e-06, "loss": 0.4706, "step": 2958 }, { "epoch": 0.17753644927101458, "grad_norm": 1.2087260484695435, "learning_rate": 6.608237466416048e-06, "loss": 0.5069, "step": 2959 }, { "epoch": 0.17759644807103858, "grad_norm": 1.335288166999817, "learning_rate": 6.607924726671097e-06, "loss": 0.4381, "step": 2960 }, { "epoch": 0.17765644687106258, "grad_norm": 1.4172990322113037, "learning_rate": 6.60761186955399e-06, "loss": 0.4308, "step": 2961 }, { "epoch": 0.17771644567108658, "grad_norm": 1.188673973083496, "learning_rate": 6.60729889507654e-06, "loss": 0.4202, "step": 2962 }, { "epoch": 0.17777644447111057, "grad_norm": 1.3029602766036987, "learning_rate": 6.6069858032505675e-06, "loss": 0.4539, "step": 2963 }, { "epoch": 0.17783644327113457, "grad_norm": 1.2179597616195679, "learning_rate": 6.606672594087897e-06, "loss": 0.5592, "step": 2964 }, { "epoch": 0.17789644207115857, "grad_norm": 1.3270350694656372, "learning_rate": 6.6063592676003555e-06, "loss": 0.4434, "step": 2965 }, { "epoch": 0.17795644087118256, "grad_norm": 1.3844633102416992, "learning_rate": 6.606045823799778e-06, "loss": 0.5038, "step": 2966 }, { "epoch": 0.1780164396712066, "grad_norm": 1.2984691858291626, "learning_rate": 6.6057322626980005e-06, "loss": 0.4968, "step": 2967 }, { "epoch": 0.17807643847123059, "grad_norm": 1.3064076900482178, "learning_rate": 6.605418584306866e-06, "loss": 0.431, "step": 2968 }, { "epoch": 0.17813643727125458, "grad_norm": 1.0667951107025146, "learning_rate": 6.605104788638219e-06, "loss": 0.4288, "step": 2969 }, { "epoch": 0.17819643607127858, "grad_norm": 1.2432125806808472, "learning_rate": 6.60479087570391e-06, "loss": 0.4823, "step": 2970 }, { "epoch": 0.17825643487130258, "grad_norm": 1.1546355485916138, "learning_rate": 6.604476845515796e-06, "loss": 0.4211, "step": 2971 }, { "epoch": 0.17831643367132657, "grad_norm": 1.191998839378357, "learning_rate": 6.604162698085736e-06, "loss": 0.4621, "step": 2972 }, { "epoch": 0.17837643247135057, "grad_norm": 1.5033247470855713, "learning_rate": 6.6038484334255926e-06, "loss": 0.5501, "step": 2973 }, { "epoch": 0.17843643127137457, "grad_norm": 1.1663367748260498, "learning_rate": 6.603534051547236e-06, "loss": 0.3917, "step": 2974 }, { "epoch": 0.17849643007139857, "grad_norm": 1.2173482179641724, "learning_rate": 6.603219552462536e-06, "loss": 0.4666, "step": 2975 }, { "epoch": 0.17855642887142256, "grad_norm": 1.164631962776184, "learning_rate": 6.602904936183373e-06, "loss": 0.4433, "step": 2976 }, { "epoch": 0.17861642767144656, "grad_norm": 1.1503610610961914, "learning_rate": 6.602590202721628e-06, "loss": 0.5188, "step": 2977 }, { "epoch": 0.17867642647147058, "grad_norm": 1.150716781616211, "learning_rate": 6.602275352089186e-06, "loss": 0.4534, "step": 2978 }, { "epoch": 0.17873642527149458, "grad_norm": 1.2031033039093018, "learning_rate": 6.601960384297938e-06, "loss": 0.4646, "step": 2979 }, { "epoch": 0.17879642407151858, "grad_norm": 1.2543835639953613, "learning_rate": 6.601645299359778e-06, "loss": 0.4727, "step": 2980 }, { "epoch": 0.17885642287154258, "grad_norm": 1.23977530002594, "learning_rate": 6.601330097286606e-06, "loss": 0.5027, "step": 2981 }, { "epoch": 0.17891642167156657, "grad_norm": 1.2647535800933838, "learning_rate": 6.601014778090327e-06, "loss": 0.4443, "step": 2982 }, { "epoch": 0.17897642047159057, "grad_norm": 1.1634621620178223, "learning_rate": 6.600699341782847e-06, "loss": 0.4548, "step": 2983 }, { "epoch": 0.17903641927161457, "grad_norm": 1.286020040512085, "learning_rate": 6.6003837883760805e-06, "loss": 0.4449, "step": 2984 }, { "epoch": 0.17909641807163856, "grad_norm": 1.1556029319763184, "learning_rate": 6.600068117881943e-06, "loss": 0.4632, "step": 2985 }, { "epoch": 0.17915641687166256, "grad_norm": 1.3203741312026978, "learning_rate": 6.5997523303123565e-06, "loss": 0.4706, "step": 2986 }, { "epoch": 0.17921641567168656, "grad_norm": 1.2756179571151733, "learning_rate": 6.599436425679248e-06, "loss": 0.4812, "step": 2987 }, { "epoch": 0.17927641447171055, "grad_norm": 1.4103881120681763, "learning_rate": 6.599120403994545e-06, "loss": 0.5036, "step": 2988 }, { "epoch": 0.17933641327173455, "grad_norm": 1.198941946029663, "learning_rate": 6.598804265270185e-06, "loss": 0.495, "step": 2989 }, { "epoch": 0.17939641207175858, "grad_norm": 1.2595837116241455, "learning_rate": 6.598488009518105e-06, "loss": 0.5078, "step": 2990 }, { "epoch": 0.17945641087178257, "grad_norm": 1.3826875686645508, "learning_rate": 6.59817163675025e-06, "loss": 0.4562, "step": 2991 }, { "epoch": 0.17951640967180657, "grad_norm": 1.296682357788086, "learning_rate": 6.597855146978568e-06, "loss": 0.478, "step": 2992 }, { "epoch": 0.17957640847183057, "grad_norm": 1.154099702835083, "learning_rate": 6.597538540215011e-06, "loss": 0.4158, "step": 2993 }, { "epoch": 0.17963640727185456, "grad_norm": 1.2448228597640991, "learning_rate": 6.597221816471536e-06, "loss": 0.5082, "step": 2994 }, { "epoch": 0.17969640607187856, "grad_norm": 1.2696313858032227, "learning_rate": 6.596904975760103e-06, "loss": 0.4759, "step": 2995 }, { "epoch": 0.17975640487190256, "grad_norm": 1.2483086585998535, "learning_rate": 6.596588018092678e-06, "loss": 0.452, "step": 2996 }, { "epoch": 0.17981640367192656, "grad_norm": 1.1886845827102661, "learning_rate": 6.596270943481233e-06, "loss": 0.4456, "step": 2997 }, { "epoch": 0.17987640247195055, "grad_norm": 1.229233741760254, "learning_rate": 6.5959537519377405e-06, "loss": 0.5185, "step": 2998 }, { "epoch": 0.17993640127197455, "grad_norm": 1.1794086694717407, "learning_rate": 6.595636443474179e-06, "loss": 0.4299, "step": 2999 }, { "epoch": 0.17999640007199855, "grad_norm": 1.2980481386184692, "learning_rate": 6.5953190181025345e-06, "loss": 0.4849, "step": 3000 }, { "epoch": 0.18005639887202257, "grad_norm": 1.3207296133041382, "learning_rate": 6.595001475834792e-06, "loss": 0.491, "step": 3001 }, { "epoch": 0.18011639767204657, "grad_norm": 1.3199658393859863, "learning_rate": 6.594683816682945e-06, "loss": 0.4741, "step": 3002 }, { "epoch": 0.18017639647207057, "grad_norm": 1.2068264484405518, "learning_rate": 6.5943660406589895e-06, "loss": 0.4104, "step": 3003 }, { "epoch": 0.18023639527209456, "grad_norm": 1.4014643430709839, "learning_rate": 6.594048147774925e-06, "loss": 0.4957, "step": 3004 }, { "epoch": 0.18029639407211856, "grad_norm": 1.2212868928909302, "learning_rate": 6.59373013804276e-06, "loss": 0.497, "step": 3005 }, { "epoch": 0.18035639287214256, "grad_norm": 1.0838556289672852, "learning_rate": 6.593412011474503e-06, "loss": 0.4272, "step": 3006 }, { "epoch": 0.18041639167216655, "grad_norm": 1.226822853088379, "learning_rate": 6.593093768082166e-06, "loss": 0.4439, "step": 3007 }, { "epoch": 0.18047639047219055, "grad_norm": 1.2357892990112305, "learning_rate": 6.592775407877771e-06, "loss": 0.4713, "step": 3008 }, { "epoch": 0.18053638927221455, "grad_norm": 1.3746196031570435, "learning_rate": 6.5924569308733395e-06, "loss": 0.4859, "step": 3009 }, { "epoch": 0.18059638807223855, "grad_norm": 1.367794156074524, "learning_rate": 6.592138337080899e-06, "loss": 0.4804, "step": 3010 }, { "epoch": 0.18065638687226254, "grad_norm": 1.1840885877609253, "learning_rate": 6.5918196265124795e-06, "loss": 0.472, "step": 3011 }, { "epoch": 0.18071638567228657, "grad_norm": 1.2598415613174438, "learning_rate": 6.59150079918012e-06, "loss": 0.4598, "step": 3012 }, { "epoch": 0.18077638447231056, "grad_norm": 1.3176482915878296, "learning_rate": 6.591181855095862e-06, "loss": 0.4688, "step": 3013 }, { "epoch": 0.18083638327233456, "grad_norm": 1.1723064184188843, "learning_rate": 6.590862794271746e-06, "loss": 0.4243, "step": 3014 }, { "epoch": 0.18089638207235856, "grad_norm": 1.3337494134902954, "learning_rate": 6.590543616719825e-06, "loss": 0.4531, "step": 3015 }, { "epoch": 0.18095638087238256, "grad_norm": 1.0905694961547852, "learning_rate": 6.590224322452154e-06, "loss": 0.4595, "step": 3016 }, { "epoch": 0.18101637967240655, "grad_norm": 1.3668192625045776, "learning_rate": 6.589904911480788e-06, "loss": 0.462, "step": 3017 }, { "epoch": 0.18107637847243055, "grad_norm": 1.2338565587997437, "learning_rate": 6.5895853838177896e-06, "loss": 0.4737, "step": 3018 }, { "epoch": 0.18113637727245455, "grad_norm": 1.2946550846099854, "learning_rate": 6.589265739475229e-06, "loss": 0.4873, "step": 3019 }, { "epoch": 0.18119637607247854, "grad_norm": 1.2530893087387085, "learning_rate": 6.5889459784651755e-06, "loss": 0.5273, "step": 3020 }, { "epoch": 0.18125637487250254, "grad_norm": 1.2229009866714478, "learning_rate": 6.588626100799706e-06, "loss": 0.4432, "step": 3021 }, { "epoch": 0.18131637367252654, "grad_norm": 1.3192206621170044, "learning_rate": 6.5883061064909005e-06, "loss": 0.4645, "step": 3022 }, { "epoch": 0.18137637247255056, "grad_norm": 1.2176401615142822, "learning_rate": 6.587985995550843e-06, "loss": 0.4309, "step": 3023 }, { "epoch": 0.18143637127257456, "grad_norm": 1.1658165454864502, "learning_rate": 6.587665767991624e-06, "loss": 0.4501, "step": 3024 }, { "epoch": 0.18149637007259856, "grad_norm": 1.2664316892623901, "learning_rate": 6.587345423825336e-06, "loss": 0.4287, "step": 3025 }, { "epoch": 0.18155636887262255, "grad_norm": 1.359723448753357, "learning_rate": 6.587024963064078e-06, "loss": 0.5073, "step": 3026 }, { "epoch": 0.18161636767264655, "grad_norm": 1.2278971672058105, "learning_rate": 6.586704385719949e-06, "loss": 0.419, "step": 3027 }, { "epoch": 0.18167636647267055, "grad_norm": 1.3818283081054688, "learning_rate": 6.586383691805061e-06, "loss": 0.4968, "step": 3028 }, { "epoch": 0.18173636527269454, "grad_norm": 1.3425822257995605, "learning_rate": 6.586062881331522e-06, "loss": 0.4905, "step": 3029 }, { "epoch": 0.18179636407271854, "grad_norm": 1.2889991998672485, "learning_rate": 6.585741954311448e-06, "loss": 0.51, "step": 3030 }, { "epoch": 0.18185636287274254, "grad_norm": 1.220348596572876, "learning_rate": 6.585420910756958e-06, "loss": 0.4608, "step": 3031 }, { "epoch": 0.18191636167276654, "grad_norm": 1.1979354619979858, "learning_rate": 6.5850997506801785e-06, "loss": 0.5236, "step": 3032 }, { "epoch": 0.18197636047279053, "grad_norm": 1.2065110206604004, "learning_rate": 6.584778474093237e-06, "loss": 0.4285, "step": 3033 }, { "epoch": 0.18203635927281456, "grad_norm": 1.2603360414505005, "learning_rate": 6.584457081008266e-06, "loss": 0.459, "step": 3034 }, { "epoch": 0.18209635807283855, "grad_norm": 1.103788137435913, "learning_rate": 6.584135571437404e-06, "loss": 0.4282, "step": 3035 }, { "epoch": 0.18215635687286255, "grad_norm": 1.2687838077545166, "learning_rate": 6.583813945392793e-06, "loss": 0.4811, "step": 3036 }, { "epoch": 0.18221635567288655, "grad_norm": 1.3360241651535034, "learning_rate": 6.583492202886578e-06, "loss": 0.4736, "step": 3037 }, { "epoch": 0.18227635447291055, "grad_norm": 1.210573673248291, "learning_rate": 6.5831703439309125e-06, "loss": 0.4783, "step": 3038 }, { "epoch": 0.18233635327293454, "grad_norm": 1.4153650999069214, "learning_rate": 6.58284836853795e-06, "loss": 0.4682, "step": 3039 }, { "epoch": 0.18239635207295854, "grad_norm": 1.2049161195755005, "learning_rate": 6.582526276719849e-06, "loss": 0.4662, "step": 3040 }, { "epoch": 0.18245635087298254, "grad_norm": 1.2729369401931763, "learning_rate": 6.582204068488775e-06, "loss": 0.4857, "step": 3041 }, { "epoch": 0.18251634967300653, "grad_norm": 1.1305574178695679, "learning_rate": 6.581881743856896e-06, "loss": 0.4139, "step": 3042 }, { "epoch": 0.18257634847303053, "grad_norm": 1.1568565368652344, "learning_rate": 6.581559302836384e-06, "loss": 0.4223, "step": 3043 }, { "epoch": 0.18263634727305453, "grad_norm": 1.2051643133163452, "learning_rate": 6.581236745439417e-06, "loss": 0.5039, "step": 3044 }, { "epoch": 0.18269634607307852, "grad_norm": 1.4277421236038208, "learning_rate": 6.580914071678177e-06, "loss": 0.5068, "step": 3045 }, { "epoch": 0.18275634487310255, "grad_norm": 1.2019699811935425, "learning_rate": 6.580591281564848e-06, "loss": 0.4744, "step": 3046 }, { "epoch": 0.18281634367312655, "grad_norm": 1.2243990898132324, "learning_rate": 6.580268375111622e-06, "loss": 0.4533, "step": 3047 }, { "epoch": 0.18287634247315054, "grad_norm": 1.3427810668945312, "learning_rate": 6.579945352330692e-06, "loss": 0.4876, "step": 3048 }, { "epoch": 0.18293634127317454, "grad_norm": 1.4212980270385742, "learning_rate": 6.579622213234259e-06, "loss": 0.4602, "step": 3049 }, { "epoch": 0.18299634007319854, "grad_norm": 1.273923635482788, "learning_rate": 6.579298957834526e-06, "loss": 0.4291, "step": 3050 }, { "epoch": 0.18305633887322253, "grad_norm": 1.2657878398895264, "learning_rate": 6.578975586143701e-06, "loss": 0.4823, "step": 3051 }, { "epoch": 0.18311633767324653, "grad_norm": 1.2293362617492676, "learning_rate": 6.578652098173995e-06, "loss": 0.4556, "step": 3052 }, { "epoch": 0.18317633647327053, "grad_norm": 1.3295565843582153, "learning_rate": 6.578328493937627e-06, "loss": 0.4757, "step": 3053 }, { "epoch": 0.18323633527329453, "grad_norm": 1.3747270107269287, "learning_rate": 6.578004773446814e-06, "loss": 0.5516, "step": 3054 }, { "epoch": 0.18329633407331852, "grad_norm": 1.1808782815933228, "learning_rate": 6.577680936713786e-06, "loss": 0.3922, "step": 3055 }, { "epoch": 0.18335633287334252, "grad_norm": 1.255203127861023, "learning_rate": 6.5773569837507704e-06, "loss": 0.4548, "step": 3056 }, { "epoch": 0.18341633167336654, "grad_norm": 1.2389761209487915, "learning_rate": 6.577032914570002e-06, "loss": 0.4543, "step": 3057 }, { "epoch": 0.18347633047339054, "grad_norm": 1.3917990922927856, "learning_rate": 6.57670872918372e-06, "loss": 0.4385, "step": 3058 }, { "epoch": 0.18353632927341454, "grad_norm": 1.356640100479126, "learning_rate": 6.5763844276041655e-06, "loss": 0.4248, "step": 3059 }, { "epoch": 0.18359632807343854, "grad_norm": 1.2401267290115356, "learning_rate": 6.576060009843589e-06, "loss": 0.4727, "step": 3060 }, { "epoch": 0.18365632687346253, "grad_norm": 1.2406691312789917, "learning_rate": 6.575735475914238e-06, "loss": 0.4834, "step": 3061 }, { "epoch": 0.18371632567348653, "grad_norm": 1.3706276416778564, "learning_rate": 6.575410825828373e-06, "loss": 0.4595, "step": 3062 }, { "epoch": 0.18377632447351053, "grad_norm": 1.287865400314331, "learning_rate": 6.575086059598251e-06, "loss": 0.4833, "step": 3063 }, { "epoch": 0.18383632327353452, "grad_norm": 1.2318795919418335, "learning_rate": 6.5747611772361395e-06, "loss": 0.4745, "step": 3064 }, { "epoch": 0.18389632207355852, "grad_norm": 1.1822073459625244, "learning_rate": 6.574436178754308e-06, "loss": 0.4283, "step": 3065 }, { "epoch": 0.18395632087358252, "grad_norm": 1.204763650894165, "learning_rate": 6.574111064165027e-06, "loss": 0.4715, "step": 3066 }, { "epoch": 0.18401631967360652, "grad_norm": 1.2158123254776, "learning_rate": 6.573785833480578e-06, "loss": 0.47, "step": 3067 }, { "epoch": 0.18407631847363054, "grad_norm": 1.2563514709472656, "learning_rate": 6.573460486713243e-06, "loss": 0.4452, "step": 3068 }, { "epoch": 0.18413631727365454, "grad_norm": 1.311702847480774, "learning_rate": 6.573135023875306e-06, "loss": 0.5013, "step": 3069 }, { "epoch": 0.18419631607367853, "grad_norm": 1.3334877490997314, "learning_rate": 6.572809444979062e-06, "loss": 0.503, "step": 3070 }, { "epoch": 0.18425631487370253, "grad_norm": 1.2513320446014404, "learning_rate": 6.572483750036804e-06, "loss": 0.4351, "step": 3071 }, { "epoch": 0.18431631367372653, "grad_norm": 1.3152105808258057, "learning_rate": 6.572157939060833e-06, "loss": 0.4128, "step": 3072 }, { "epoch": 0.18437631247375053, "grad_norm": 1.36702299118042, "learning_rate": 6.571832012063454e-06, "loss": 0.5074, "step": 3073 }, { "epoch": 0.18443631127377452, "grad_norm": 1.2661441564559937, "learning_rate": 6.5715059690569744e-06, "loss": 0.539, "step": 3074 }, { "epoch": 0.18449631007379852, "grad_norm": 1.3485305309295654, "learning_rate": 6.571179810053709e-06, "loss": 0.4656, "step": 3075 }, { "epoch": 0.18455630887382252, "grad_norm": 1.2123078107833862, "learning_rate": 6.5708535350659734e-06, "loss": 0.5102, "step": 3076 }, { "epoch": 0.1846163076738465, "grad_norm": 1.3489913940429688, "learning_rate": 6.570527144106091e-06, "loss": 0.5102, "step": 3077 }, { "epoch": 0.1846763064738705, "grad_norm": 1.218820571899414, "learning_rate": 6.570200637186388e-06, "loss": 0.4696, "step": 3078 }, { "epoch": 0.18473630527389454, "grad_norm": 1.1283738613128662, "learning_rate": 6.5698740143191946e-06, "loss": 0.4274, "step": 3079 }, { "epoch": 0.18479630407391853, "grad_norm": 1.3045579195022583, "learning_rate": 6.569547275516845e-06, "loss": 0.4438, "step": 3080 }, { "epoch": 0.18485630287394253, "grad_norm": 1.2731094360351562, "learning_rate": 6.569220420791681e-06, "loss": 0.4607, "step": 3081 }, { "epoch": 0.18491630167396653, "grad_norm": 1.29570734500885, "learning_rate": 6.568893450156044e-06, "loss": 0.4811, "step": 3082 }, { "epoch": 0.18497630047399052, "grad_norm": 1.2141073942184448, "learning_rate": 6.568566363622285e-06, "loss": 0.4623, "step": 3083 }, { "epoch": 0.18503629927401452, "grad_norm": 1.2352442741394043, "learning_rate": 6.568239161202753e-06, "loss": 0.486, "step": 3084 }, { "epoch": 0.18509629807403852, "grad_norm": 1.326344609260559, "learning_rate": 6.567911842909807e-06, "loss": 0.4509, "step": 3085 }, { "epoch": 0.18515629687406251, "grad_norm": 1.315399169921875, "learning_rate": 6.567584408755809e-06, "loss": 0.5063, "step": 3086 }, { "epoch": 0.1852162956740865, "grad_norm": 1.3714827299118042, "learning_rate": 6.567256858753122e-06, "loss": 0.4885, "step": 3087 }, { "epoch": 0.1852762944741105, "grad_norm": 1.467037558555603, "learning_rate": 6.566929192914119e-06, "loss": 0.4794, "step": 3088 }, { "epoch": 0.1853362932741345, "grad_norm": 1.3526172637939453, "learning_rate": 6.566601411251173e-06, "loss": 0.514, "step": 3089 }, { "epoch": 0.1853962920741585, "grad_norm": 1.2535629272460938, "learning_rate": 6.566273513776664e-06, "loss": 0.4266, "step": 3090 }, { "epoch": 0.18545629087418253, "grad_norm": 1.3403164148330688, "learning_rate": 6.565945500502974e-06, "loss": 0.4377, "step": 3091 }, { "epoch": 0.18551628967420652, "grad_norm": 1.2024990320205688, "learning_rate": 6.56561737144249e-06, "loss": 0.4672, "step": 3092 }, { "epoch": 0.18557628847423052, "grad_norm": 1.2971547842025757, "learning_rate": 6.565289126607606e-06, "loss": 0.4793, "step": 3093 }, { "epoch": 0.18563628727425452, "grad_norm": 1.3463488817214966, "learning_rate": 6.564960766010716e-06, "loss": 0.4522, "step": 3094 }, { "epoch": 0.18569628607427852, "grad_norm": 1.2495043277740479, "learning_rate": 6.564632289664222e-06, "loss": 0.4683, "step": 3095 }, { "epoch": 0.1857562848743025, "grad_norm": 1.5494433641433716, "learning_rate": 6.5643036975805294e-06, "loss": 0.4383, "step": 3096 }, { "epoch": 0.1858162836743265, "grad_norm": 1.2550567388534546, "learning_rate": 6.563974989772047e-06, "loss": 0.5072, "step": 3097 }, { "epoch": 0.1858762824743505, "grad_norm": 1.395216464996338, "learning_rate": 6.563646166251187e-06, "loss": 0.4754, "step": 3098 }, { "epoch": 0.1859362812743745, "grad_norm": 1.2527014017105103, "learning_rate": 6.5633172270303715e-06, "loss": 0.4743, "step": 3099 }, { "epoch": 0.1859962800743985, "grad_norm": 1.3295940160751343, "learning_rate": 6.56298817212202e-06, "loss": 0.5044, "step": 3100 }, { "epoch": 0.1860562788744225, "grad_norm": 1.324886441230774, "learning_rate": 6.562659001538559e-06, "loss": 0.4694, "step": 3101 }, { "epoch": 0.18611627767444652, "grad_norm": 1.1277434825897217, "learning_rate": 6.562329715292423e-06, "loss": 0.4667, "step": 3102 }, { "epoch": 0.18617627647447052, "grad_norm": 1.287142276763916, "learning_rate": 6.5620003133960444e-06, "loss": 0.4936, "step": 3103 }, { "epoch": 0.18623627527449452, "grad_norm": 1.370613694190979, "learning_rate": 6.561670795861864e-06, "loss": 0.4706, "step": 3104 }, { "epoch": 0.18629627407451851, "grad_norm": 1.4117075204849243, "learning_rate": 6.561341162702328e-06, "loss": 0.464, "step": 3105 }, { "epoch": 0.1863562728745425, "grad_norm": 1.3262206315994263, "learning_rate": 6.561011413929883e-06, "loss": 0.4905, "step": 3106 }, { "epoch": 0.1864162716745665, "grad_norm": 1.3265364170074463, "learning_rate": 6.560681549556983e-06, "loss": 0.4351, "step": 3107 }, { "epoch": 0.1864762704745905, "grad_norm": 1.165659785270691, "learning_rate": 6.560351569596084e-06, "loss": 0.4933, "step": 3108 }, { "epoch": 0.1865362692746145, "grad_norm": 1.3403444290161133, "learning_rate": 6.56002147405965e-06, "loss": 0.4657, "step": 3109 }, { "epoch": 0.1865962680746385, "grad_norm": 1.149807333946228, "learning_rate": 6.5596912629601465e-06, "loss": 0.4227, "step": 3110 }, { "epoch": 0.1866562668746625, "grad_norm": 1.2197202444076538, "learning_rate": 6.5593609363100446e-06, "loss": 0.4627, "step": 3111 }, { "epoch": 0.1867162656746865, "grad_norm": 1.3712238073349, "learning_rate": 6.559030494121818e-06, "loss": 0.4757, "step": 3112 }, { "epoch": 0.18677626447471052, "grad_norm": 1.261871576309204, "learning_rate": 6.558699936407946e-06, "loss": 0.4566, "step": 3113 }, { "epoch": 0.18683626327473452, "grad_norm": 1.2162271738052368, "learning_rate": 6.558369263180913e-06, "loss": 0.4411, "step": 3114 }, { "epoch": 0.1868962620747585, "grad_norm": 1.251023769378662, "learning_rate": 6.558038474453208e-06, "loss": 0.4493, "step": 3115 }, { "epoch": 0.1869562608747825, "grad_norm": 1.1933000087738037, "learning_rate": 6.55770757023732e-06, "loss": 0.4479, "step": 3116 }, { "epoch": 0.1870162596748065, "grad_norm": 1.4580693244934082, "learning_rate": 6.55737655054575e-06, "loss": 0.4106, "step": 3117 }, { "epoch": 0.1870762584748305, "grad_norm": 1.294975757598877, "learning_rate": 6.5570454153909965e-06, "loss": 0.4616, "step": 3118 }, { "epoch": 0.1871362572748545, "grad_norm": 1.2864155769348145, "learning_rate": 6.556714164785564e-06, "loss": 0.4544, "step": 3119 }, { "epoch": 0.1871962560748785, "grad_norm": 1.3015135526657104, "learning_rate": 6.556382798741966e-06, "loss": 0.4799, "step": 3120 }, { "epoch": 0.1872562548749025, "grad_norm": 1.3698819875717163, "learning_rate": 6.556051317272714e-06, "loss": 0.5132, "step": 3121 }, { "epoch": 0.1873162536749265, "grad_norm": 1.2826191186904907, "learning_rate": 6.5557197203903265e-06, "loss": 0.4768, "step": 3122 }, { "epoch": 0.1873762524749505, "grad_norm": 1.289846658706665, "learning_rate": 6.555388008107327e-06, "loss": 0.4687, "step": 3123 }, { "epoch": 0.1874362512749745, "grad_norm": 1.2662142515182495, "learning_rate": 6.555056180436244e-06, "loss": 0.4596, "step": 3124 }, { "epoch": 0.1874962500749985, "grad_norm": 1.524031639099121, "learning_rate": 6.554724237389606e-06, "loss": 0.5354, "step": 3125 }, { "epoch": 0.1875562488750225, "grad_norm": 1.5195432901382446, "learning_rate": 6.5543921789799516e-06, "loss": 0.5147, "step": 3126 }, { "epoch": 0.1876162476750465, "grad_norm": 1.2075618505477905, "learning_rate": 6.554060005219821e-06, "loss": 0.4319, "step": 3127 }, { "epoch": 0.1876762464750705, "grad_norm": 1.1117208003997803, "learning_rate": 6.553727716121758e-06, "loss": 0.3999, "step": 3128 }, { "epoch": 0.1877362452750945, "grad_norm": 1.3710325956344604, "learning_rate": 6.553395311698311e-06, "loss": 0.4946, "step": 3129 }, { "epoch": 0.1877962440751185, "grad_norm": 1.3381245136260986, "learning_rate": 6.553062791962036e-06, "loss": 0.4833, "step": 3130 }, { "epoch": 0.1878562428751425, "grad_norm": 1.3936386108398438, "learning_rate": 6.552730156925487e-06, "loss": 0.4693, "step": 3131 }, { "epoch": 0.1879162416751665, "grad_norm": 1.0846961736679077, "learning_rate": 6.552397406601229e-06, "loss": 0.4553, "step": 3132 }, { "epoch": 0.1879762404751905, "grad_norm": 1.1553239822387695, "learning_rate": 6.552064541001829e-06, "loss": 0.4395, "step": 3133 }, { "epoch": 0.18803623927521448, "grad_norm": 1.201381802558899, "learning_rate": 6.551731560139855e-06, "loss": 0.4333, "step": 3134 }, { "epoch": 0.1880962380752385, "grad_norm": 1.203903079032898, "learning_rate": 6.551398464027884e-06, "loss": 0.4466, "step": 3135 }, { "epoch": 0.1881562368752625, "grad_norm": 1.178918719291687, "learning_rate": 6.551065252678496e-06, "loss": 0.4241, "step": 3136 }, { "epoch": 0.1882162356752865, "grad_norm": 1.255184292793274, "learning_rate": 6.550731926104272e-06, "loss": 0.5094, "step": 3137 }, { "epoch": 0.1882762344753105, "grad_norm": 1.162623643875122, "learning_rate": 6.550398484317803e-06, "loss": 0.4383, "step": 3138 }, { "epoch": 0.1883362332753345, "grad_norm": 1.19636070728302, "learning_rate": 6.5500649273316826e-06, "loss": 0.4837, "step": 3139 }, { "epoch": 0.1883962320753585, "grad_norm": 1.2774224281311035, "learning_rate": 6.549731255158504e-06, "loss": 0.5041, "step": 3140 }, { "epoch": 0.1884562308753825, "grad_norm": 1.3146283626556396, "learning_rate": 6.54939746781087e-06, "loss": 0.4102, "step": 3141 }, { "epoch": 0.1885162296754065, "grad_norm": 1.2471274137496948, "learning_rate": 6.549063565301389e-06, "loss": 0.4926, "step": 3142 }, { "epoch": 0.18857622847543049, "grad_norm": 1.3656131029129028, "learning_rate": 6.548729547642667e-06, "loss": 0.4995, "step": 3143 }, { "epoch": 0.18863622727545448, "grad_norm": 1.3405030965805054, "learning_rate": 6.5483954148473205e-06, "loss": 0.4867, "step": 3144 }, { "epoch": 0.18869622607547848, "grad_norm": 1.324657678604126, "learning_rate": 6.548061166927967e-06, "loss": 0.479, "step": 3145 }, { "epoch": 0.18875622487550248, "grad_norm": 1.2695412635803223, "learning_rate": 6.547726803897231e-06, "loss": 0.5131, "step": 3146 }, { "epoch": 0.1888162236755265, "grad_norm": 1.3575493097305298, "learning_rate": 6.547392325767739e-06, "loss": 0.4936, "step": 3147 }, { "epoch": 0.1888762224755505, "grad_norm": 1.1340703964233398, "learning_rate": 6.5470577325521214e-06, "loss": 0.4328, "step": 3148 }, { "epoch": 0.1889362212755745, "grad_norm": 1.182188868522644, "learning_rate": 6.546723024263017e-06, "loss": 0.4969, "step": 3149 }, { "epoch": 0.1889962200755985, "grad_norm": 1.1676111221313477, "learning_rate": 6.5463882009130635e-06, "loss": 0.4875, "step": 3150 }, { "epoch": 0.1890562188756225, "grad_norm": 1.315891981124878, "learning_rate": 6.546053262514908e-06, "loss": 0.4255, "step": 3151 }, { "epoch": 0.1891162176756465, "grad_norm": 1.1436771154403687, "learning_rate": 6.545718209081199e-06, "loss": 0.4673, "step": 3152 }, { "epoch": 0.18917621647567048, "grad_norm": 1.4371904134750366, "learning_rate": 6.545383040624589e-06, "loss": 0.4982, "step": 3153 }, { "epoch": 0.18923621527569448, "grad_norm": 1.1526120901107788, "learning_rate": 6.5450477571577365e-06, "loss": 0.4478, "step": 3154 }, { "epoch": 0.18929621407571848, "grad_norm": 1.4210838079452515, "learning_rate": 6.544712358693303e-06, "loss": 0.4652, "step": 3155 }, { "epoch": 0.18935621287574247, "grad_norm": 1.1936593055725098, "learning_rate": 6.544376845243955e-06, "loss": 0.4775, "step": 3156 }, { "epoch": 0.18941621167576647, "grad_norm": 1.2312036752700806, "learning_rate": 6.544041216822365e-06, "loss": 0.4837, "step": 3157 }, { "epoch": 0.1894762104757905, "grad_norm": 1.2131808996200562, "learning_rate": 6.543705473441205e-06, "loss": 0.4277, "step": 3158 }, { "epoch": 0.1895362092758145, "grad_norm": 1.2365825176239014, "learning_rate": 6.543369615113158e-06, "loss": 0.466, "step": 3159 }, { "epoch": 0.1895962080758385, "grad_norm": 1.4274944067001343, "learning_rate": 6.543033641850905e-06, "loss": 0.5771, "step": 3160 }, { "epoch": 0.1896562068758625, "grad_norm": 1.2543483972549438, "learning_rate": 6.542697553667137e-06, "loss": 0.4378, "step": 3161 }, { "epoch": 0.18971620567588648, "grad_norm": 1.1236857175827026, "learning_rate": 6.542361350574543e-06, "loss": 0.4581, "step": 3162 }, { "epoch": 0.18977620447591048, "grad_norm": 1.3356600999832153, "learning_rate": 6.542025032585823e-06, "loss": 0.514, "step": 3163 }, { "epoch": 0.18983620327593448, "grad_norm": 1.3351035118103027, "learning_rate": 6.541688599713676e-06, "loss": 0.5037, "step": 3164 }, { "epoch": 0.18989620207595848, "grad_norm": 1.215469479560852, "learning_rate": 6.5413520519708094e-06, "loss": 0.4337, "step": 3165 }, { "epoch": 0.18995620087598247, "grad_norm": 1.3030295372009277, "learning_rate": 6.541015389369931e-06, "loss": 0.5011, "step": 3166 }, { "epoch": 0.19001619967600647, "grad_norm": 1.2847415208816528, "learning_rate": 6.540678611923758e-06, "loss": 0.4482, "step": 3167 }, { "epoch": 0.19007619847603047, "grad_norm": 1.3268998861312866, "learning_rate": 6.5403417196450055e-06, "loss": 0.4449, "step": 3168 }, { "epoch": 0.1901361972760545, "grad_norm": 1.2155364751815796, "learning_rate": 6.540004712546398e-06, "loss": 0.4584, "step": 3169 }, { "epoch": 0.1901961960760785, "grad_norm": 1.2830533981323242, "learning_rate": 6.539667590640663e-06, "loss": 0.4718, "step": 3170 }, { "epoch": 0.19025619487610249, "grad_norm": 1.1619431972503662, "learning_rate": 6.539330353940532e-06, "loss": 0.4162, "step": 3171 }, { "epoch": 0.19031619367612648, "grad_norm": 1.2793971300125122, "learning_rate": 6.538993002458742e-06, "loss": 0.5111, "step": 3172 }, { "epoch": 0.19037619247615048, "grad_norm": 1.158492088317871, "learning_rate": 6.53865553620803e-06, "loss": 0.4732, "step": 3173 }, { "epoch": 0.19043619127617448, "grad_norm": 1.331345796585083, "learning_rate": 6.538317955201144e-06, "loss": 0.5109, "step": 3174 }, { "epoch": 0.19049619007619847, "grad_norm": 1.2602108716964722, "learning_rate": 6.53798025945083e-06, "loss": 0.4538, "step": 3175 }, { "epoch": 0.19055618887622247, "grad_norm": 1.0675586462020874, "learning_rate": 6.537642448969844e-06, "loss": 0.418, "step": 3176 }, { "epoch": 0.19061618767624647, "grad_norm": 1.3166546821594238, "learning_rate": 6.537304523770941e-06, "loss": 0.4425, "step": 3177 }, { "epoch": 0.19067618647627047, "grad_norm": 1.2737337350845337, "learning_rate": 6.536966483866884e-06, "loss": 0.5112, "step": 3178 }, { "epoch": 0.19073618527629446, "grad_norm": 1.0651224851608276, "learning_rate": 6.536628329270441e-06, "loss": 0.4165, "step": 3179 }, { "epoch": 0.1907961840763185, "grad_norm": 1.3104777336120605, "learning_rate": 6.5362900599943795e-06, "loss": 0.4782, "step": 3180 }, { "epoch": 0.19085618287634248, "grad_norm": 1.3814001083374023, "learning_rate": 6.535951676051475e-06, "loss": 0.4593, "step": 3181 }, { "epoch": 0.19091618167636648, "grad_norm": 1.1562321186065674, "learning_rate": 6.535613177454508e-06, "loss": 0.4557, "step": 3182 }, { "epoch": 0.19097618047639048, "grad_norm": 1.347975730895996, "learning_rate": 6.535274564216262e-06, "loss": 0.4748, "step": 3183 }, { "epoch": 0.19103617927641448, "grad_norm": 1.4453808069229126, "learning_rate": 6.534935836349524e-06, "loss": 0.5012, "step": 3184 }, { "epoch": 0.19109617807643847, "grad_norm": 1.36714506149292, "learning_rate": 6.534596993867087e-06, "loss": 0.5275, "step": 3185 }, { "epoch": 0.19115617687646247, "grad_norm": 1.2101556062698364, "learning_rate": 6.534258036781746e-06, "loss": 0.4858, "step": 3186 }, { "epoch": 0.19121617567648647, "grad_norm": 1.2865241765975952, "learning_rate": 6.5339189651063055e-06, "loss": 0.4491, "step": 3187 }, { "epoch": 0.19127617447651046, "grad_norm": 1.3014687299728394, "learning_rate": 6.533579778853566e-06, "loss": 0.4921, "step": 3188 }, { "epoch": 0.19133617327653446, "grad_norm": 1.3043218851089478, "learning_rate": 6.53324047803634e-06, "loss": 0.4333, "step": 3189 }, { "epoch": 0.19139617207655846, "grad_norm": 1.3146822452545166, "learning_rate": 6.53290106266744e-06, "loss": 0.5041, "step": 3190 }, { "epoch": 0.19145617087658248, "grad_norm": 1.359640121459961, "learning_rate": 6.532561532759686e-06, "loss": 0.4729, "step": 3191 }, { "epoch": 0.19151616967660648, "grad_norm": 1.2067087888717651, "learning_rate": 6.532221888325897e-06, "loss": 0.4331, "step": 3192 }, { "epoch": 0.19157616847663048, "grad_norm": 1.2931896448135376, "learning_rate": 6.531882129378904e-06, "loss": 0.442, "step": 3193 }, { "epoch": 0.19163616727665447, "grad_norm": 1.3591445684432983, "learning_rate": 6.531542255931535e-06, "loss": 0.4582, "step": 3194 }, { "epoch": 0.19169616607667847, "grad_norm": 1.2557759284973145, "learning_rate": 6.5312022679966275e-06, "loss": 0.4508, "step": 3195 }, { "epoch": 0.19175616487670247, "grad_norm": 1.3314284086227417, "learning_rate": 6.53086216558702e-06, "loss": 0.4317, "step": 3196 }, { "epoch": 0.19181616367672646, "grad_norm": 1.3486615419387817, "learning_rate": 6.530521948715557e-06, "loss": 0.4806, "step": 3197 }, { "epoch": 0.19187616247675046, "grad_norm": 1.4613412618637085, "learning_rate": 6.530181617395088e-06, "loss": 0.5378, "step": 3198 }, { "epoch": 0.19193616127677446, "grad_norm": 1.294685959815979, "learning_rate": 6.529841171638464e-06, "loss": 0.5007, "step": 3199 }, { "epoch": 0.19199616007679846, "grad_norm": 1.216357946395874, "learning_rate": 6.529500611458542e-06, "loss": 0.4493, "step": 3200 }, { "epoch": 0.19205615887682245, "grad_norm": 1.2381864786148071, "learning_rate": 6.5291599368681855e-06, "loss": 0.4775, "step": 3201 }, { "epoch": 0.19211615767684645, "grad_norm": 1.1679326295852661, "learning_rate": 6.528819147880259e-06, "loss": 0.4928, "step": 3202 }, { "epoch": 0.19217615647687047, "grad_norm": 1.280661702156067, "learning_rate": 6.5284782445076325e-06, "loss": 0.4605, "step": 3203 }, { "epoch": 0.19223615527689447, "grad_norm": 1.2782994508743286, "learning_rate": 6.52813722676318e-06, "loss": 0.4661, "step": 3204 }, { "epoch": 0.19229615407691847, "grad_norm": 1.3138872385025024, "learning_rate": 6.52779609465978e-06, "loss": 0.5165, "step": 3205 }, { "epoch": 0.19235615287694247, "grad_norm": 1.1862163543701172, "learning_rate": 6.527454848210318e-06, "loss": 0.4595, "step": 3206 }, { "epoch": 0.19241615167696646, "grad_norm": 1.150865912437439, "learning_rate": 6.527113487427678e-06, "loss": 0.4456, "step": 3207 }, { "epoch": 0.19247615047699046, "grad_norm": 1.3231556415557861, "learning_rate": 6.526772012324753e-06, "loss": 0.4589, "step": 3208 }, { "epoch": 0.19253614927701446, "grad_norm": 1.2863116264343262, "learning_rate": 6.52643042291444e-06, "loss": 0.4433, "step": 3209 }, { "epoch": 0.19259614807703845, "grad_norm": 1.327567458152771, "learning_rate": 6.526088719209638e-06, "loss": 0.4571, "step": 3210 }, { "epoch": 0.19265614687706245, "grad_norm": 1.2810784578323364, "learning_rate": 6.525746901223251e-06, "loss": 0.4395, "step": 3211 }, { "epoch": 0.19271614567708645, "grad_norm": 1.2863142490386963, "learning_rate": 6.52540496896819e-06, "loss": 0.5008, "step": 3212 }, { "epoch": 0.19277614447711044, "grad_norm": 1.3166860342025757, "learning_rate": 6.525062922457366e-06, "loss": 0.4739, "step": 3213 }, { "epoch": 0.19283614327713447, "grad_norm": 1.3605047464370728, "learning_rate": 6.524720761703698e-06, "loss": 0.482, "step": 3214 }, { "epoch": 0.19289614207715847, "grad_norm": 1.424134612083435, "learning_rate": 6.5243784867201074e-06, "loss": 0.5218, "step": 3215 }, { "epoch": 0.19295614087718246, "grad_norm": 1.2953245639801025, "learning_rate": 6.52403609751952e-06, "loss": 0.4854, "step": 3216 }, { "epoch": 0.19301613967720646, "grad_norm": 1.2038078308105469, "learning_rate": 6.523693594114868e-06, "loss": 0.3991, "step": 3217 }, { "epoch": 0.19307613847723046, "grad_norm": 1.29043710231781, "learning_rate": 6.523350976519083e-06, "loss": 0.4472, "step": 3218 }, { "epoch": 0.19313613727725445, "grad_norm": 1.2481718063354492, "learning_rate": 6.523008244745108e-06, "loss": 0.4432, "step": 3219 }, { "epoch": 0.19319613607727845, "grad_norm": 1.2554534673690796, "learning_rate": 6.522665398805883e-06, "loss": 0.4564, "step": 3220 }, { "epoch": 0.19325613487730245, "grad_norm": 1.2185120582580566, "learning_rate": 6.522322438714358e-06, "loss": 0.4295, "step": 3221 }, { "epoch": 0.19331613367732645, "grad_norm": 1.1879165172576904, "learning_rate": 6.521979364483483e-06, "loss": 0.4367, "step": 3222 }, { "epoch": 0.19337613247735044, "grad_norm": 1.1872700452804565, "learning_rate": 6.521636176126216e-06, "loss": 0.431, "step": 3223 }, { "epoch": 0.19343613127737444, "grad_norm": 1.1816942691802979, "learning_rate": 6.5212928736555175e-06, "loss": 0.4429, "step": 3224 }, { "epoch": 0.19349613007739846, "grad_norm": 1.2234666347503662, "learning_rate": 6.520949457084352e-06, "loss": 0.4209, "step": 3225 }, { "epoch": 0.19355612887742246, "grad_norm": 1.2127909660339355, "learning_rate": 6.520605926425689e-06, "loss": 0.4909, "step": 3226 }, { "epoch": 0.19361612767744646, "grad_norm": 1.2242686748504639, "learning_rate": 6.520262281692501e-06, "loss": 0.4697, "step": 3227 }, { "epoch": 0.19367612647747046, "grad_norm": 1.2810983657836914, "learning_rate": 6.519918522897768e-06, "loss": 0.4328, "step": 3228 }, { "epoch": 0.19373612527749445, "grad_norm": 1.294248104095459, "learning_rate": 6.5195746500544705e-06, "loss": 0.4714, "step": 3229 }, { "epoch": 0.19379612407751845, "grad_norm": 1.2951302528381348, "learning_rate": 6.519230663175596e-06, "loss": 0.5125, "step": 3230 }, { "epoch": 0.19385612287754245, "grad_norm": 1.277607798576355, "learning_rate": 6.518886562274135e-06, "loss": 0.4896, "step": 3231 }, { "epoch": 0.19391612167756644, "grad_norm": 1.2946879863739014, "learning_rate": 6.518542347363083e-06, "loss": 0.451, "step": 3232 }, { "epoch": 0.19397612047759044, "grad_norm": 1.3101575374603271, "learning_rate": 6.518198018455438e-06, "loss": 0.5028, "step": 3233 }, { "epoch": 0.19403611927761444, "grad_norm": 1.193848729133606, "learning_rate": 6.517853575564205e-06, "loss": 0.4322, "step": 3234 }, { "epoch": 0.19409611807763844, "grad_norm": 1.3954333066940308, "learning_rate": 6.517509018702392e-06, "loss": 0.5127, "step": 3235 }, { "epoch": 0.19415611687766246, "grad_norm": 1.2344545125961304, "learning_rate": 6.517164347883012e-06, "loss": 0.477, "step": 3236 }, { "epoch": 0.19421611567768646, "grad_norm": 1.2871967554092407, "learning_rate": 6.516819563119079e-06, "loss": 0.425, "step": 3237 }, { "epoch": 0.19427611447771045, "grad_norm": 1.4868505001068115, "learning_rate": 6.516474664423617e-06, "loss": 0.4119, "step": 3238 }, { "epoch": 0.19433611327773445, "grad_norm": 1.253006100654602, "learning_rate": 6.5161296518096486e-06, "loss": 0.4301, "step": 3239 }, { "epoch": 0.19439611207775845, "grad_norm": 1.227340817451477, "learning_rate": 6.515784525290205e-06, "loss": 0.5125, "step": 3240 }, { "epoch": 0.19445611087778245, "grad_norm": 1.1965413093566895, "learning_rate": 6.515439284878321e-06, "loss": 0.4982, "step": 3241 }, { "epoch": 0.19451610967780644, "grad_norm": 1.2102398872375488, "learning_rate": 6.515093930587032e-06, "loss": 0.4673, "step": 3242 }, { "epoch": 0.19457610847783044, "grad_norm": 1.2752057313919067, "learning_rate": 6.514748462429384e-06, "loss": 0.5102, "step": 3243 }, { "epoch": 0.19463610727785444, "grad_norm": 1.3249931335449219, "learning_rate": 6.51440288041842e-06, "loss": 0.4737, "step": 3244 }, { "epoch": 0.19469610607787843, "grad_norm": 1.187432885169983, "learning_rate": 6.514057184567194e-06, "loss": 0.4631, "step": 3245 }, { "epoch": 0.19475610487790243, "grad_norm": 1.3321003913879395, "learning_rate": 6.513711374888759e-06, "loss": 0.467, "step": 3246 }, { "epoch": 0.19481610367792643, "grad_norm": 1.321317434310913, "learning_rate": 6.513365451396177e-06, "loss": 0.4215, "step": 3247 }, { "epoch": 0.19487610247795045, "grad_norm": 1.370285987854004, "learning_rate": 6.51301941410251e-06, "loss": 0.4761, "step": 3248 }, { "epoch": 0.19493610127797445, "grad_norm": 1.2238409519195557, "learning_rate": 6.5126732630208264e-06, "loss": 0.4505, "step": 3249 }, { "epoch": 0.19499610007799845, "grad_norm": 1.2775837182998657, "learning_rate": 6.5123269981642e-06, "loss": 0.4981, "step": 3250 }, { "epoch": 0.19505609887802244, "grad_norm": 1.1389342546463013, "learning_rate": 6.511980619545707e-06, "loss": 0.4291, "step": 3251 }, { "epoch": 0.19511609767804644, "grad_norm": 1.2129292488098145, "learning_rate": 6.511634127178429e-06, "loss": 0.4293, "step": 3252 }, { "epoch": 0.19517609647807044, "grad_norm": 1.3861918449401855, "learning_rate": 6.51128752107545e-06, "loss": 0.4881, "step": 3253 }, { "epoch": 0.19523609527809443, "grad_norm": 1.1986639499664307, "learning_rate": 6.510940801249861e-06, "loss": 0.4448, "step": 3254 }, { "epoch": 0.19529609407811843, "grad_norm": 1.2284626960754395, "learning_rate": 6.510593967714757e-06, "loss": 0.4536, "step": 3255 }, { "epoch": 0.19535609287814243, "grad_norm": 1.283158302307129, "learning_rate": 6.510247020483233e-06, "loss": 0.505, "step": 3256 }, { "epoch": 0.19541609167816643, "grad_norm": 1.370451807975769, "learning_rate": 6.509899959568395e-06, "loss": 0.4302, "step": 3257 }, { "epoch": 0.19547609047819042, "grad_norm": 1.3746384382247925, "learning_rate": 6.509552784983349e-06, "loss": 0.4783, "step": 3258 }, { "epoch": 0.19553608927821445, "grad_norm": 1.3190330266952515, "learning_rate": 6.509205496741205e-06, "loss": 0.5162, "step": 3259 }, { "epoch": 0.19559608807823844, "grad_norm": 1.2503596544265747, "learning_rate": 6.508858094855079e-06, "loss": 0.4257, "step": 3260 }, { "epoch": 0.19565608687826244, "grad_norm": 1.200048565864563, "learning_rate": 6.508510579338092e-06, "loss": 0.4335, "step": 3261 }, { "epoch": 0.19571608567828644, "grad_norm": 1.395951271057129, "learning_rate": 6.508162950203367e-06, "loss": 0.4332, "step": 3262 }, { "epoch": 0.19577608447831044, "grad_norm": 1.2171070575714111, "learning_rate": 6.507815207464032e-06, "loss": 0.4376, "step": 3263 }, { "epoch": 0.19583608327833443, "grad_norm": 1.3184456825256348, "learning_rate": 6.50746735113322e-06, "loss": 0.4775, "step": 3264 }, { "epoch": 0.19589608207835843, "grad_norm": 1.2781046628952026, "learning_rate": 6.507119381224069e-06, "loss": 0.5047, "step": 3265 }, { "epoch": 0.19595608087838243, "grad_norm": 1.147655725479126, "learning_rate": 6.506771297749719e-06, "loss": 0.4332, "step": 3266 }, { "epoch": 0.19601607967840642, "grad_norm": 1.2191135883331299, "learning_rate": 6.506423100723316e-06, "loss": 0.452, "step": 3267 }, { "epoch": 0.19607607847843042, "grad_norm": 1.3789395093917847, "learning_rate": 6.5060747901580094e-06, "loss": 0.4675, "step": 3268 }, { "epoch": 0.19613607727845442, "grad_norm": 1.2710702419281006, "learning_rate": 6.505726366066953e-06, "loss": 0.4812, "step": 3269 }, { "epoch": 0.19619607607847844, "grad_norm": 1.2954219579696655, "learning_rate": 6.505377828463307e-06, "loss": 0.4622, "step": 3270 }, { "epoch": 0.19625607487850244, "grad_norm": 1.2186683416366577, "learning_rate": 6.505029177360233e-06, "loss": 0.4661, "step": 3271 }, { "epoch": 0.19631607367852644, "grad_norm": 1.1618852615356445, "learning_rate": 6.504680412770897e-06, "loss": 0.4276, "step": 3272 }, { "epoch": 0.19637607247855043, "grad_norm": 1.0920658111572266, "learning_rate": 6.504331534708471e-06, "loss": 0.427, "step": 3273 }, { "epoch": 0.19643607127857443, "grad_norm": 1.314353346824646, "learning_rate": 6.503982543186132e-06, "loss": 0.4835, "step": 3274 }, { "epoch": 0.19649607007859843, "grad_norm": 1.223265528678894, "learning_rate": 6.5036334382170575e-06, "loss": 0.3977, "step": 3275 }, { "epoch": 0.19655606887862243, "grad_norm": 1.3714468479156494, "learning_rate": 6.503284219814432e-06, "loss": 0.5111, "step": 3276 }, { "epoch": 0.19661606767864642, "grad_norm": 1.2429938316345215, "learning_rate": 6.502934887991445e-06, "loss": 0.4452, "step": 3277 }, { "epoch": 0.19667606647867042, "grad_norm": 1.2673195600509644, "learning_rate": 6.502585442761288e-06, "loss": 0.4348, "step": 3278 }, { "epoch": 0.19673606527869442, "grad_norm": 1.186706304550171, "learning_rate": 6.5022358841371595e-06, "loss": 0.4775, "step": 3279 }, { "epoch": 0.1967960640787184, "grad_norm": 1.4205414056777954, "learning_rate": 6.501886212132259e-06, "loss": 0.4877, "step": 3280 }, { "epoch": 0.19685606287874244, "grad_norm": 1.2172634601593018, "learning_rate": 6.501536426759795e-06, "loss": 0.4239, "step": 3281 }, { "epoch": 0.19691606167876644, "grad_norm": 1.2632713317871094, "learning_rate": 6.501186528032973e-06, "loss": 0.4445, "step": 3282 }, { "epoch": 0.19697606047879043, "grad_norm": 1.2197171449661255, "learning_rate": 6.500836515965009e-06, "loss": 0.4623, "step": 3283 }, { "epoch": 0.19703605927881443, "grad_norm": 1.2915579080581665, "learning_rate": 6.500486390569122e-06, "loss": 0.5057, "step": 3284 }, { "epoch": 0.19709605807883843, "grad_norm": 1.4110766649246216, "learning_rate": 6.500136151858535e-06, "loss": 0.4889, "step": 3285 }, { "epoch": 0.19715605687886242, "grad_norm": 1.3684282302856445, "learning_rate": 6.499785799846474e-06, "loss": 0.4093, "step": 3286 }, { "epoch": 0.19721605567888642, "grad_norm": 1.237488031387329, "learning_rate": 6.49943533454617e-06, "loss": 0.4695, "step": 3287 }, { "epoch": 0.19727605447891042, "grad_norm": 1.486723780632019, "learning_rate": 6.499084755970858e-06, "loss": 0.4773, "step": 3288 }, { "epoch": 0.19733605327893441, "grad_norm": 1.2351537942886353, "learning_rate": 6.4987340641337796e-06, "loss": 0.4221, "step": 3289 }, { "epoch": 0.1973960520789584, "grad_norm": 1.1488076448440552, "learning_rate": 6.498383259048177e-06, "loss": 0.436, "step": 3290 }, { "epoch": 0.1974560508789824, "grad_norm": 1.322835087776184, "learning_rate": 6.4980323407273e-06, "loss": 0.4572, "step": 3291 }, { "epoch": 0.19751604967900643, "grad_norm": 1.2151789665222168, "learning_rate": 6.4976813091844e-06, "loss": 0.4664, "step": 3292 }, { "epoch": 0.19757604847903043, "grad_norm": 1.3250981569290161, "learning_rate": 6.497330164432735e-06, "loss": 0.424, "step": 3293 }, { "epoch": 0.19763604727905443, "grad_norm": 1.1340172290802002, "learning_rate": 6.496978906485564e-06, "loss": 0.4016, "step": 3294 }, { "epoch": 0.19769604607907842, "grad_norm": 1.3525902032852173, "learning_rate": 6.4966275353561556e-06, "loss": 0.4752, "step": 3295 }, { "epoch": 0.19775604487910242, "grad_norm": 1.2698945999145508, "learning_rate": 6.496276051057777e-06, "loss": 0.498, "step": 3296 }, { "epoch": 0.19781604367912642, "grad_norm": 1.3158514499664307, "learning_rate": 6.495924453603704e-06, "loss": 0.4467, "step": 3297 }, { "epoch": 0.19787604247915042, "grad_norm": 1.3431541919708252, "learning_rate": 6.495572743007213e-06, "loss": 0.4246, "step": 3298 }, { "epoch": 0.1979360412791744, "grad_norm": 1.305747151374817, "learning_rate": 6.495220919281587e-06, "loss": 0.4692, "step": 3299 }, { "epoch": 0.1979960400791984, "grad_norm": 1.149579405784607, "learning_rate": 6.494868982440114e-06, "loss": 0.4137, "step": 3300 }, { "epoch": 0.1980560388792224, "grad_norm": 1.2927417755126953, "learning_rate": 6.494516932496083e-06, "loss": 0.5052, "step": 3301 }, { "epoch": 0.1981160376792464, "grad_norm": 1.3390501737594604, "learning_rate": 6.494164769462792e-06, "loss": 0.4522, "step": 3302 }, { "epoch": 0.1981760364792704, "grad_norm": 1.3920471668243408, "learning_rate": 6.493812493353537e-06, "loss": 0.4808, "step": 3303 }, { "epoch": 0.19823603527929443, "grad_norm": 1.3303204774856567, "learning_rate": 6.493460104181624e-06, "loss": 0.4572, "step": 3304 }, { "epoch": 0.19829603407931842, "grad_norm": 1.252161979675293, "learning_rate": 6.493107601960362e-06, "loss": 0.4978, "step": 3305 }, { "epoch": 0.19835603287934242, "grad_norm": 1.2613427639007568, "learning_rate": 6.4927549867030635e-06, "loss": 0.3915, "step": 3306 }, { "epoch": 0.19841603167936642, "grad_norm": 1.2671202421188354, "learning_rate": 6.492402258423042e-06, "loss": 0.4869, "step": 3307 }, { "epoch": 0.19847603047939041, "grad_norm": 1.1560418605804443, "learning_rate": 6.492049417133621e-06, "loss": 0.4191, "step": 3308 }, { "epoch": 0.1985360292794144, "grad_norm": 1.2076857089996338, "learning_rate": 6.491696462848127e-06, "loss": 0.4033, "step": 3309 }, { "epoch": 0.1985960280794384, "grad_norm": 1.1824289560317993, "learning_rate": 6.4913433955798855e-06, "loss": 0.4255, "step": 3310 }, { "epoch": 0.1986560268794624, "grad_norm": 1.3502860069274902, "learning_rate": 6.490990215342234e-06, "loss": 0.4488, "step": 3311 }, { "epoch": 0.1987160256794864, "grad_norm": 1.2744500637054443, "learning_rate": 6.490636922148509e-06, "loss": 0.4707, "step": 3312 }, { "epoch": 0.1987760244795104, "grad_norm": 1.1716667413711548, "learning_rate": 6.490283516012051e-06, "loss": 0.472, "step": 3313 }, { "epoch": 0.1988360232795344, "grad_norm": 1.2081001996994019, "learning_rate": 6.48992999694621e-06, "loss": 0.4069, "step": 3314 }, { "epoch": 0.19889602207955842, "grad_norm": 1.258925199508667, "learning_rate": 6.489576364964334e-06, "loss": 0.4576, "step": 3315 }, { "epoch": 0.19895602087958242, "grad_norm": 1.3843576908111572, "learning_rate": 6.48922262007978e-06, "loss": 0.4705, "step": 3316 }, { "epoch": 0.19901601967960642, "grad_norm": 1.1889934539794922, "learning_rate": 6.488868762305907e-06, "loss": 0.448, "step": 3317 }, { "epoch": 0.1990760184796304, "grad_norm": 1.300238013267517, "learning_rate": 6.488514791656077e-06, "loss": 0.4908, "step": 3318 }, { "epoch": 0.1991360172796544, "grad_norm": 1.25884211063385, "learning_rate": 6.488160708143659e-06, "loss": 0.4443, "step": 3319 }, { "epoch": 0.1991960160796784, "grad_norm": 1.3446460962295532, "learning_rate": 6.487806511782025e-06, "loss": 0.4995, "step": 3320 }, { "epoch": 0.1992560148797024, "grad_norm": 1.2489336729049683, "learning_rate": 6.487452202584553e-06, "loss": 0.49, "step": 3321 }, { "epoch": 0.1993160136797264, "grad_norm": 1.1861380338668823, "learning_rate": 6.48709778056462e-06, "loss": 0.5079, "step": 3322 }, { "epoch": 0.1993760124797504, "grad_norm": 1.493830919265747, "learning_rate": 6.486743245735614e-06, "loss": 0.4935, "step": 3323 }, { "epoch": 0.1994360112797744, "grad_norm": 1.3279997110366821, "learning_rate": 6.486388598110924e-06, "loss": 0.508, "step": 3324 }, { "epoch": 0.1994960100797984, "grad_norm": 1.311071753501892, "learning_rate": 6.486033837703941e-06, "loss": 0.4565, "step": 3325 }, { "epoch": 0.19955600887982242, "grad_norm": 1.326796054840088, "learning_rate": 6.485678964528066e-06, "loss": 0.4685, "step": 3326 }, { "epoch": 0.1996160076798464, "grad_norm": 1.4519245624542236, "learning_rate": 6.485323978596697e-06, "loss": 0.4224, "step": 3327 }, { "epoch": 0.1996760064798704, "grad_norm": 1.1418787240982056, "learning_rate": 6.484968879923244e-06, "loss": 0.4435, "step": 3328 }, { "epoch": 0.1997360052798944, "grad_norm": 1.1886228322982788, "learning_rate": 6.484613668521116e-06, "loss": 0.4221, "step": 3329 }, { "epoch": 0.1997960040799184, "grad_norm": 1.314761996269226, "learning_rate": 6.484258344403727e-06, "loss": 0.5257, "step": 3330 }, { "epoch": 0.1998560028799424, "grad_norm": 1.3136897087097168, "learning_rate": 6.483902907584497e-06, "loss": 0.4624, "step": 3331 }, { "epoch": 0.1999160016799664, "grad_norm": 1.2262431383132935, "learning_rate": 6.483547358076849e-06, "loss": 0.4557, "step": 3332 }, { "epoch": 0.1999760004799904, "grad_norm": 1.1769721508026123, "learning_rate": 6.483191695894209e-06, "loss": 0.4386, "step": 3333 }, { "epoch": 0.2000359992800144, "grad_norm": 1.234459400177002, "learning_rate": 6.482835921050011e-06, "loss": 0.4454, "step": 3334 }, { "epoch": 0.2000959980800384, "grad_norm": 1.175899624824524, "learning_rate": 6.48248003355769e-06, "loss": 0.403, "step": 3335 }, { "epoch": 0.2001559968800624, "grad_norm": 1.324913501739502, "learning_rate": 6.482124033430686e-06, "loss": 0.4709, "step": 3336 }, { "epoch": 0.2002159956800864, "grad_norm": 1.4012526273727417, "learning_rate": 6.481767920682444e-06, "loss": 0.4774, "step": 3337 }, { "epoch": 0.2002759944801104, "grad_norm": 1.1454962491989136, "learning_rate": 6.481411695326413e-06, "loss": 0.4411, "step": 3338 }, { "epoch": 0.2003359932801344, "grad_norm": 1.2508395910263062, "learning_rate": 6.481055357376045e-06, "loss": 0.5044, "step": 3339 }, { "epoch": 0.2003959920801584, "grad_norm": 1.1607459783554077, "learning_rate": 6.480698906844797e-06, "loss": 0.4399, "step": 3340 }, { "epoch": 0.2004559908801824, "grad_norm": 1.3237416744232178, "learning_rate": 6.480342343746132e-06, "loss": 0.4677, "step": 3341 }, { "epoch": 0.2005159896802064, "grad_norm": 1.2135261297225952, "learning_rate": 6.4799856680935166e-06, "loss": 0.4668, "step": 3342 }, { "epoch": 0.2005759884802304, "grad_norm": 1.1931086778640747, "learning_rate": 6.479628879900418e-06, "loss": 0.4348, "step": 3343 }, { "epoch": 0.2006359872802544, "grad_norm": 1.3120191097259521, "learning_rate": 6.479271979180311e-06, "loss": 0.3872, "step": 3344 }, { "epoch": 0.2006959860802784, "grad_norm": 1.2065496444702148, "learning_rate": 6.478914965946676e-06, "loss": 0.46, "step": 3345 }, { "epoch": 0.20075598488030238, "grad_norm": 1.4236716032028198, "learning_rate": 6.478557840212994e-06, "loss": 0.5104, "step": 3346 }, { "epoch": 0.20081598368032638, "grad_norm": 1.3605815172195435, "learning_rate": 6.478200601992753e-06, "loss": 0.4762, "step": 3347 }, { "epoch": 0.2008759824803504, "grad_norm": 1.33522629737854, "learning_rate": 6.477843251299445e-06, "loss": 0.4576, "step": 3348 }, { "epoch": 0.2009359812803744, "grad_norm": 1.1252357959747314, "learning_rate": 6.477485788146562e-06, "loss": 0.4024, "step": 3349 }, { "epoch": 0.2009959800803984, "grad_norm": 1.2008064985275269, "learning_rate": 6.477128212547607e-06, "loss": 0.4444, "step": 3350 }, { "epoch": 0.2010559788804224, "grad_norm": 1.187577247619629, "learning_rate": 6.476770524516084e-06, "loss": 0.4284, "step": 3351 }, { "epoch": 0.2011159776804464, "grad_norm": 1.2266649007797241, "learning_rate": 6.4764127240655006e-06, "loss": 0.4548, "step": 3352 }, { "epoch": 0.2011759764804704, "grad_norm": 1.4803128242492676, "learning_rate": 6.476054811209369e-06, "loss": 0.4471, "step": 3353 }, { "epoch": 0.2012359752804944, "grad_norm": 1.3774724006652832, "learning_rate": 6.475696785961205e-06, "loss": 0.4882, "step": 3354 }, { "epoch": 0.2012959740805184, "grad_norm": 1.2228888273239136, "learning_rate": 6.4753386483345315e-06, "loss": 0.4054, "step": 3355 }, { "epoch": 0.20135597288054238, "grad_norm": 1.2344272136688232, "learning_rate": 6.474980398342873e-06, "loss": 0.4353, "step": 3356 }, { "epoch": 0.20141597168056638, "grad_norm": 1.4046590328216553, "learning_rate": 6.4746220359997574e-06, "loss": 0.5222, "step": 3357 }, { "epoch": 0.20147597048059038, "grad_norm": 1.3385499715805054, "learning_rate": 6.474263561318721e-06, "loss": 0.4792, "step": 3358 }, { "epoch": 0.20153596928061437, "grad_norm": 1.419704556465149, "learning_rate": 6.473904974313301e-06, "loss": 0.4633, "step": 3359 }, { "epoch": 0.2015959680806384, "grad_norm": 1.2669309377670288, "learning_rate": 6.473546274997038e-06, "loss": 0.4364, "step": 3360 }, { "epoch": 0.2016559668806624, "grad_norm": 1.3514260053634644, "learning_rate": 6.473187463383481e-06, "loss": 0.4508, "step": 3361 }, { "epoch": 0.2017159656806864, "grad_norm": 1.2942426204681396, "learning_rate": 6.472828539486179e-06, "loss": 0.3783, "step": 3362 }, { "epoch": 0.2017759644807104, "grad_norm": 1.177966833114624, "learning_rate": 6.472469503318686e-06, "loss": 0.3953, "step": 3363 }, { "epoch": 0.2018359632807344, "grad_norm": 1.2287229299545288, "learning_rate": 6.472110354894564e-06, "loss": 0.4564, "step": 3364 }, { "epoch": 0.20189596208075838, "grad_norm": 1.3408401012420654, "learning_rate": 6.4717510942273736e-06, "loss": 0.4524, "step": 3365 }, { "epoch": 0.20195596088078238, "grad_norm": 1.2134590148925781, "learning_rate": 6.471391721330684e-06, "loss": 0.4095, "step": 3366 }, { "epoch": 0.20201595968080638, "grad_norm": 1.383989691734314, "learning_rate": 6.471032236218067e-06, "loss": 0.4783, "step": 3367 }, { "epoch": 0.20207595848083038, "grad_norm": 1.344543695449829, "learning_rate": 6.470672638903098e-06, "loss": 0.4958, "step": 3368 }, { "epoch": 0.20213595728085437, "grad_norm": 1.3154151439666748, "learning_rate": 6.470312929399358e-06, "loss": 0.4315, "step": 3369 }, { "epoch": 0.20219595608087837, "grad_norm": 1.3726657629013062, "learning_rate": 6.469953107720431e-06, "loss": 0.4812, "step": 3370 }, { "epoch": 0.2022559548809024, "grad_norm": 1.2464803457260132, "learning_rate": 6.4695931738799075e-06, "loss": 0.4413, "step": 3371 }, { "epoch": 0.2023159536809264, "grad_norm": 1.146419882774353, "learning_rate": 6.469233127891379e-06, "loss": 0.4811, "step": 3372 }, { "epoch": 0.2023759524809504, "grad_norm": 1.4363347291946411, "learning_rate": 6.4688729697684415e-06, "loss": 0.4766, "step": 3373 }, { "epoch": 0.20243595128097439, "grad_norm": 1.2003834247589111, "learning_rate": 6.468512699524698e-06, "loss": 0.4814, "step": 3374 }, { "epoch": 0.20249595008099838, "grad_norm": 1.2339985370635986, "learning_rate": 6.468152317173756e-06, "loss": 0.4699, "step": 3375 }, { "epoch": 0.20255594888102238, "grad_norm": 1.2148958444595337, "learning_rate": 6.467791822729223e-06, "loss": 0.4219, "step": 3376 }, { "epoch": 0.20261594768104638, "grad_norm": 1.2280850410461426, "learning_rate": 6.467431216204714e-06, "loss": 0.4471, "step": 3377 }, { "epoch": 0.20267594648107037, "grad_norm": 1.1640433073043823, "learning_rate": 6.467070497613848e-06, "loss": 0.4245, "step": 3378 }, { "epoch": 0.20273594528109437, "grad_norm": 1.2441506385803223, "learning_rate": 6.466709666970247e-06, "loss": 0.4687, "step": 3379 }, { "epoch": 0.20279594408111837, "grad_norm": 1.250058889389038, "learning_rate": 6.466348724287539e-06, "loss": 0.5031, "step": 3380 }, { "epoch": 0.20285594288114236, "grad_norm": 1.3911651372909546, "learning_rate": 6.465987669579353e-06, "loss": 0.475, "step": 3381 }, { "epoch": 0.2029159416811664, "grad_norm": 1.1751524209976196, "learning_rate": 6.465626502859327e-06, "loss": 0.4767, "step": 3382 }, { "epoch": 0.2029759404811904, "grad_norm": 1.3946384191513062, "learning_rate": 6.465265224141098e-06, "loss": 0.4901, "step": 3383 }, { "epoch": 0.20303593928121438, "grad_norm": 1.380028247833252, "learning_rate": 6.464903833438312e-06, "loss": 0.5046, "step": 3384 }, { "epoch": 0.20309593808123838, "grad_norm": 1.3066099882125854, "learning_rate": 6.4645423307646155e-06, "loss": 0.4728, "step": 3385 }, { "epoch": 0.20315593688126238, "grad_norm": 1.2552005052566528, "learning_rate": 6.4641807161336624e-06, "loss": 0.4276, "step": 3386 }, { "epoch": 0.20321593568128637, "grad_norm": 1.390914797782898, "learning_rate": 6.463818989559109e-06, "loss": 0.551, "step": 3387 }, { "epoch": 0.20327593448131037, "grad_norm": 1.1752325296401978, "learning_rate": 6.4634571510546134e-06, "loss": 0.4412, "step": 3388 }, { "epoch": 0.20333593328133437, "grad_norm": 1.2699803113937378, "learning_rate": 6.4630952006338445e-06, "loss": 0.5272, "step": 3389 }, { "epoch": 0.20339593208135837, "grad_norm": 1.2825274467468262, "learning_rate": 6.4627331383104695e-06, "loss": 0.4318, "step": 3390 }, { "epoch": 0.20345593088138236, "grad_norm": 1.3874887228012085, "learning_rate": 6.462370964098161e-06, "loss": 0.4914, "step": 3391 }, { "epoch": 0.20351592968140636, "grad_norm": 1.3361860513687134, "learning_rate": 6.462008678010599e-06, "loss": 0.4945, "step": 3392 }, { "epoch": 0.20357592848143038, "grad_norm": 1.4310081005096436, "learning_rate": 6.461646280061464e-06, "loss": 0.5148, "step": 3393 }, { "epoch": 0.20363592728145438, "grad_norm": 1.3333072662353516, "learning_rate": 6.461283770264442e-06, "loss": 0.4597, "step": 3394 }, { "epoch": 0.20369592608147838, "grad_norm": 1.3845926523208618, "learning_rate": 6.460921148633223e-06, "loss": 0.5038, "step": 3395 }, { "epoch": 0.20375592488150238, "grad_norm": 1.2366487979888916, "learning_rate": 6.4605584151815014e-06, "loss": 0.4741, "step": 3396 }, { "epoch": 0.20381592368152637, "grad_norm": 1.2856535911560059, "learning_rate": 6.460195569922979e-06, "loss": 0.4778, "step": 3397 }, { "epoch": 0.20387592248155037, "grad_norm": 1.5052663087844849, "learning_rate": 6.459832612871355e-06, "loss": 0.4822, "step": 3398 }, { "epoch": 0.20393592128157437, "grad_norm": 1.0862653255462646, "learning_rate": 6.4594695440403385e-06, "loss": 0.4301, "step": 3399 }, { "epoch": 0.20399592008159836, "grad_norm": 1.280012845993042, "learning_rate": 6.459106363443641e-06, "loss": 0.5049, "step": 3400 }, { "epoch": 0.20405591888162236, "grad_norm": 1.1742308139801025, "learning_rate": 6.4587430710949775e-06, "loss": 0.4062, "step": 3401 }, { "epoch": 0.20411591768164636, "grad_norm": 1.3886011838912964, "learning_rate": 6.458379667008067e-06, "loss": 0.4877, "step": 3402 }, { "epoch": 0.20417591648167036, "grad_norm": 1.1055662631988525, "learning_rate": 6.458016151196637e-06, "loss": 0.4197, "step": 3403 }, { "epoch": 0.20423591528169435, "grad_norm": 1.3155219554901123, "learning_rate": 6.4576525236744126e-06, "loss": 0.4901, "step": 3404 }, { "epoch": 0.20429591408171838, "grad_norm": 1.147862195968628, "learning_rate": 6.457288784455128e-06, "loss": 0.4125, "step": 3405 }, { "epoch": 0.20435591288174237, "grad_norm": 1.4000356197357178, "learning_rate": 6.4569249335525176e-06, "loss": 0.4752, "step": 3406 }, { "epoch": 0.20441591168176637, "grad_norm": 1.3064470291137695, "learning_rate": 6.456560970980326e-06, "loss": 0.4861, "step": 3407 }, { "epoch": 0.20447591048179037, "grad_norm": 1.3085904121398926, "learning_rate": 6.4561968967522956e-06, "loss": 0.4898, "step": 3408 }, { "epoch": 0.20453590928181437, "grad_norm": 1.235594630241394, "learning_rate": 6.4558327108821776e-06, "loss": 0.5372, "step": 3409 }, { "epoch": 0.20459590808183836, "grad_norm": 1.2660720348358154, "learning_rate": 6.4554684133837245e-06, "loss": 0.4555, "step": 3410 }, { "epoch": 0.20465590688186236, "grad_norm": 1.3342102766036987, "learning_rate": 6.455104004270695e-06, "loss": 0.4282, "step": 3411 }, { "epoch": 0.20471590568188636, "grad_norm": 1.278317928314209, "learning_rate": 6.454739483556849e-06, "loss": 0.513, "step": 3412 }, { "epoch": 0.20477590448191035, "grad_norm": 1.4758704900741577, "learning_rate": 6.454374851255956e-06, "loss": 0.4855, "step": 3413 }, { "epoch": 0.20483590328193435, "grad_norm": 1.2879732847213745, "learning_rate": 6.454010107381785e-06, "loss": 0.439, "step": 3414 }, { "epoch": 0.20489590208195835, "grad_norm": 1.1471575498580933, "learning_rate": 6.4536452519481105e-06, "loss": 0.4439, "step": 3415 }, { "epoch": 0.20495590088198237, "grad_norm": 1.205715298652649, "learning_rate": 6.4532802849687115e-06, "loss": 0.4151, "step": 3416 }, { "epoch": 0.20501589968200637, "grad_norm": 1.1995481252670288, "learning_rate": 6.4529152064573715e-06, "loss": 0.48, "step": 3417 }, { "epoch": 0.20507589848203037, "grad_norm": 1.3551244735717773, "learning_rate": 6.452550016427877e-06, "loss": 0.4605, "step": 3418 }, { "epoch": 0.20513589728205436, "grad_norm": 1.1745092868804932, "learning_rate": 6.4521847148940205e-06, "loss": 0.4186, "step": 3419 }, { "epoch": 0.20519589608207836, "grad_norm": 1.2333256006240845, "learning_rate": 6.451819301869597e-06, "loss": 0.4742, "step": 3420 }, { "epoch": 0.20525589488210236, "grad_norm": 1.3165189027786255, "learning_rate": 6.451453777368407e-06, "loss": 0.4698, "step": 3421 }, { "epoch": 0.20531589368212635, "grad_norm": 1.3194538354873657, "learning_rate": 6.451088141404255e-06, "loss": 0.4552, "step": 3422 }, { "epoch": 0.20537589248215035, "grad_norm": 1.3524054288864136, "learning_rate": 6.450722393990949e-06, "loss": 0.4808, "step": 3423 }, { "epoch": 0.20543589128217435, "grad_norm": 1.203706979751587, "learning_rate": 6.450356535142302e-06, "loss": 0.4676, "step": 3424 }, { "epoch": 0.20549589008219835, "grad_norm": 1.2482541799545288, "learning_rate": 6.44999056487213e-06, "loss": 0.5022, "step": 3425 }, { "epoch": 0.20555588888222234, "grad_norm": 1.2462048530578613, "learning_rate": 6.449624483194254e-06, "loss": 0.5156, "step": 3426 }, { "epoch": 0.20561588768224637, "grad_norm": 1.3798620700836182, "learning_rate": 6.449258290122501e-06, "loss": 0.4677, "step": 3427 }, { "epoch": 0.20567588648227036, "grad_norm": 1.2678332328796387, "learning_rate": 6.448891985670698e-06, "loss": 0.4416, "step": 3428 }, { "epoch": 0.20573588528229436, "grad_norm": 1.1575379371643066, "learning_rate": 6.44852556985268e-06, "loss": 0.4488, "step": 3429 }, { "epoch": 0.20579588408231836, "grad_norm": 1.20200514793396, "learning_rate": 6.448159042682286e-06, "loss": 0.4783, "step": 3430 }, { "epoch": 0.20585588288234236, "grad_norm": 1.3075824975967407, "learning_rate": 6.447792404173354e-06, "loss": 0.4936, "step": 3431 }, { "epoch": 0.20591588168236635, "grad_norm": 1.2238547801971436, "learning_rate": 6.447425654339735e-06, "loss": 0.441, "step": 3432 }, { "epoch": 0.20597588048239035, "grad_norm": 1.3659172058105469, "learning_rate": 6.447058793195277e-06, "loss": 0.4588, "step": 3433 }, { "epoch": 0.20603587928241435, "grad_norm": 1.2173237800598145, "learning_rate": 6.446691820753835e-06, "loss": 0.4597, "step": 3434 }, { "epoch": 0.20609587808243834, "grad_norm": 1.3283182382583618, "learning_rate": 6.446324737029268e-06, "loss": 0.4667, "step": 3435 }, { "epoch": 0.20615587688246234, "grad_norm": 1.2732397317886353, "learning_rate": 6.44595754203544e-06, "loss": 0.4894, "step": 3436 }, { "epoch": 0.20621587568248634, "grad_norm": 1.2998926639556885, "learning_rate": 6.445590235786217e-06, "loss": 0.5109, "step": 3437 }, { "epoch": 0.20627587448251036, "grad_norm": 1.2348655462265015, "learning_rate": 6.445222818295471e-06, "loss": 0.4548, "step": 3438 }, { "epoch": 0.20633587328253436, "grad_norm": 1.2133731842041016, "learning_rate": 6.4448552895770774e-06, "loss": 0.4261, "step": 3439 }, { "epoch": 0.20639587208255836, "grad_norm": 1.322342038154602, "learning_rate": 6.444487649644916e-06, "loss": 0.4638, "step": 3440 }, { "epoch": 0.20645587088258235, "grad_norm": 1.315812587738037, "learning_rate": 6.4441198985128715e-06, "loss": 0.4731, "step": 3441 }, { "epoch": 0.20651586968260635, "grad_norm": 1.3990843296051025, "learning_rate": 6.44375203619483e-06, "loss": 0.4628, "step": 3442 }, { "epoch": 0.20657586848263035, "grad_norm": 1.2257682085037231, "learning_rate": 6.443384062704688e-06, "loss": 0.4132, "step": 3443 }, { "epoch": 0.20663586728265435, "grad_norm": 1.4136826992034912, "learning_rate": 6.443015978056339e-06, "loss": 0.4672, "step": 3444 }, { "epoch": 0.20669586608267834, "grad_norm": 1.196946144104004, "learning_rate": 6.4426477822636854e-06, "loss": 0.409, "step": 3445 }, { "epoch": 0.20675586488270234, "grad_norm": 1.1615015268325806, "learning_rate": 6.442279475340631e-06, "loss": 0.4757, "step": 3446 }, { "epoch": 0.20681586368272634, "grad_norm": 1.2383224964141846, "learning_rate": 6.441911057301087e-06, "loss": 0.4597, "step": 3447 }, { "epoch": 0.20687586248275033, "grad_norm": 1.18915855884552, "learning_rate": 6.4415425281589646e-06, "loss": 0.3905, "step": 3448 }, { "epoch": 0.20693586128277436, "grad_norm": 1.2915773391723633, "learning_rate": 6.441173887928183e-06, "loss": 0.4717, "step": 3449 }, { "epoch": 0.20699586008279836, "grad_norm": 1.1307493448257446, "learning_rate": 6.4408051366226635e-06, "loss": 0.4463, "step": 3450 }, { "epoch": 0.20705585888282235, "grad_norm": 1.2330695390701294, "learning_rate": 6.440436274256333e-06, "loss": 0.4086, "step": 3451 }, { "epoch": 0.20711585768284635, "grad_norm": 1.347133994102478, "learning_rate": 6.44006730084312e-06, "loss": 0.4273, "step": 3452 }, { "epoch": 0.20717585648287035, "grad_norm": 1.2252222299575806, "learning_rate": 6.439698216396959e-06, "loss": 0.4246, "step": 3453 }, { "epoch": 0.20723585528289434, "grad_norm": 1.4703606367111206, "learning_rate": 6.4393290209317914e-06, "loss": 0.4956, "step": 3454 }, { "epoch": 0.20729585408291834, "grad_norm": 1.2989436388015747, "learning_rate": 6.438959714461558e-06, "loss": 0.4436, "step": 3455 }, { "epoch": 0.20735585288294234, "grad_norm": 1.3385223150253296, "learning_rate": 6.438590297000205e-06, "loss": 0.4846, "step": 3456 }, { "epoch": 0.20741585168296633, "grad_norm": 1.305420160293579, "learning_rate": 6.438220768561686e-06, "loss": 0.4818, "step": 3457 }, { "epoch": 0.20747585048299033, "grad_norm": 1.3202319145202637, "learning_rate": 6.437851129159954e-06, "loss": 0.4485, "step": 3458 }, { "epoch": 0.20753584928301433, "grad_norm": 1.3137279748916626, "learning_rate": 6.4374813788089704e-06, "loss": 0.4736, "step": 3459 }, { "epoch": 0.20759584808303833, "grad_norm": 1.1584391593933105, "learning_rate": 6.437111517522698e-06, "loss": 0.4809, "step": 3460 }, { "epoch": 0.20765584688306235, "grad_norm": 1.2496763467788696, "learning_rate": 6.4367415453151055e-06, "loss": 0.4674, "step": 3461 }, { "epoch": 0.20771584568308635, "grad_norm": 1.4436726570129395, "learning_rate": 6.436371462200163e-06, "loss": 0.4959, "step": 3462 }, { "epoch": 0.20777584448311034, "grad_norm": 1.3092694282531738, "learning_rate": 6.43600126819185e-06, "loss": 0.4883, "step": 3463 }, { "epoch": 0.20783584328313434, "grad_norm": 1.1796938180923462, "learning_rate": 6.435630963304145e-06, "loss": 0.4588, "step": 3464 }, { "epoch": 0.20789584208315834, "grad_norm": 1.2429882287979126, "learning_rate": 6.435260547551034e-06, "loss": 0.4857, "step": 3465 }, { "epoch": 0.20795584088318234, "grad_norm": 1.3005915880203247, "learning_rate": 6.434890020946504e-06, "loss": 0.4451, "step": 3466 }, { "epoch": 0.20801583968320633, "grad_norm": 1.3862141370773315, "learning_rate": 6.43451938350455e-06, "loss": 0.4594, "step": 3467 }, { "epoch": 0.20807583848323033, "grad_norm": 1.3043831586837769, "learning_rate": 6.434148635239168e-06, "loss": 0.4526, "step": 3468 }, { "epoch": 0.20813583728325433, "grad_norm": 1.112055778503418, "learning_rate": 6.43377777616436e-06, "loss": 0.4577, "step": 3469 }, { "epoch": 0.20819583608327832, "grad_norm": 1.2628717422485352, "learning_rate": 6.433406806294131e-06, "loss": 0.4321, "step": 3470 }, { "epoch": 0.20825583488330232, "grad_norm": 1.228916883468628, "learning_rate": 6.433035725642492e-06, "loss": 0.4993, "step": 3471 }, { "epoch": 0.20831583368332635, "grad_norm": 1.2002509832382202, "learning_rate": 6.432664534223458e-06, "loss": 0.4547, "step": 3472 }, { "epoch": 0.20837583248335034, "grad_norm": 1.2420700788497925, "learning_rate": 6.432293232051044e-06, "loss": 0.4963, "step": 3473 }, { "epoch": 0.20843583128337434, "grad_norm": 1.2976242303848267, "learning_rate": 6.431921819139274e-06, "loss": 0.4776, "step": 3474 }, { "epoch": 0.20849583008339834, "grad_norm": 1.2545043230056763, "learning_rate": 6.4315502955021755e-06, "loss": 0.4798, "step": 3475 }, { "epoch": 0.20855582888342233, "grad_norm": 1.3126873970031738, "learning_rate": 6.431178661153779e-06, "loss": 0.5029, "step": 3476 }, { "epoch": 0.20861582768344633, "grad_norm": 1.258595585823059, "learning_rate": 6.430806916108118e-06, "loss": 0.4689, "step": 3477 }, { "epoch": 0.20867582648347033, "grad_norm": 1.2596259117126465, "learning_rate": 6.430435060379232e-06, "loss": 0.4897, "step": 3478 }, { "epoch": 0.20873582528349433, "grad_norm": 1.4018577337265015, "learning_rate": 6.430063093981166e-06, "loss": 0.4795, "step": 3479 }, { "epoch": 0.20879582408351832, "grad_norm": 1.1946322917938232, "learning_rate": 6.429691016927966e-06, "loss": 0.4506, "step": 3480 }, { "epoch": 0.20885582288354232, "grad_norm": 1.1976691484451294, "learning_rate": 6.429318829233684e-06, "loss": 0.4472, "step": 3481 }, { "epoch": 0.20891582168356632, "grad_norm": 1.2666701078414917, "learning_rate": 6.428946530912375e-06, "loss": 0.4853, "step": 3482 }, { "epoch": 0.20897582048359034, "grad_norm": 1.3237463235855103, "learning_rate": 6.4285741219781e-06, "loss": 0.4589, "step": 3483 }, { "epoch": 0.20903581928361434, "grad_norm": 1.247715950012207, "learning_rate": 6.428201602444924e-06, "loss": 0.4497, "step": 3484 }, { "epoch": 0.20909581808363834, "grad_norm": 1.4283078908920288, "learning_rate": 6.427828972326913e-06, "loss": 0.4893, "step": 3485 }, { "epoch": 0.20915581688366233, "grad_norm": 1.1045321226119995, "learning_rate": 6.427456231638141e-06, "loss": 0.4373, "step": 3486 }, { "epoch": 0.20921581568368633, "grad_norm": 1.22886061668396, "learning_rate": 6.427083380392685e-06, "loss": 0.4686, "step": 3487 }, { "epoch": 0.20927581448371033, "grad_norm": 1.1683692932128906, "learning_rate": 6.426710418604627e-06, "loss": 0.4343, "step": 3488 }, { "epoch": 0.20933581328373432, "grad_norm": 1.4050776958465576, "learning_rate": 6.4263373462880475e-06, "loss": 0.453, "step": 3489 }, { "epoch": 0.20939581208375832, "grad_norm": 1.1201145648956299, "learning_rate": 6.425964163457041e-06, "loss": 0.4211, "step": 3490 }, { "epoch": 0.20945581088378232, "grad_norm": 1.2505909204483032, "learning_rate": 6.425590870125698e-06, "loss": 0.4711, "step": 3491 }, { "epoch": 0.20951580968380631, "grad_norm": 1.2472704648971558, "learning_rate": 6.425217466308116e-06, "loss": 0.4628, "step": 3492 }, { "epoch": 0.2095758084838303, "grad_norm": 1.1179994344711304, "learning_rate": 6.4248439520183985e-06, "loss": 0.4446, "step": 3493 }, { "epoch": 0.20963580728385434, "grad_norm": 1.239121913909912, "learning_rate": 6.4244703272706504e-06, "loss": 0.5127, "step": 3494 }, { "epoch": 0.20969580608387833, "grad_norm": 1.325535535812378, "learning_rate": 6.424096592078983e-06, "loss": 0.4202, "step": 3495 }, { "epoch": 0.20975580488390233, "grad_norm": 1.2613221406936646, "learning_rate": 6.423722746457507e-06, "loss": 0.45, "step": 3496 }, { "epoch": 0.20981580368392633, "grad_norm": 1.3484907150268555, "learning_rate": 6.423348790420345e-06, "loss": 0.4477, "step": 3497 }, { "epoch": 0.20987580248395032, "grad_norm": 1.3062598705291748, "learning_rate": 6.422974723981618e-06, "loss": 0.3951, "step": 3498 }, { "epoch": 0.20993580128397432, "grad_norm": 1.2244317531585693, "learning_rate": 6.422600547155452e-06, "loss": 0.4392, "step": 3499 }, { "epoch": 0.20999580008399832, "grad_norm": 1.2042601108551025, "learning_rate": 6.422226259955979e-06, "loss": 0.4591, "step": 3500 }, { "epoch": 0.21005579888402232, "grad_norm": 1.3681148290634155, "learning_rate": 6.421851862397333e-06, "loss": 0.4497, "step": 3501 }, { "epoch": 0.2101157976840463, "grad_norm": 1.2407569885253906, "learning_rate": 6.421477354493656e-06, "loss": 0.4888, "step": 3502 }, { "epoch": 0.2101757964840703, "grad_norm": 1.1233481168746948, "learning_rate": 6.421102736259088e-06, "loss": 0.41, "step": 3503 }, { "epoch": 0.2102357952840943, "grad_norm": 1.386710524559021, "learning_rate": 6.420728007707778e-06, "loss": 0.4931, "step": 3504 }, { "epoch": 0.2102957940841183, "grad_norm": 1.2737433910369873, "learning_rate": 6.420353168853879e-06, "loss": 0.4937, "step": 3505 }, { "epoch": 0.21035579288414233, "grad_norm": 1.147253155708313, "learning_rate": 6.419978219711545e-06, "loss": 0.4337, "step": 3506 }, { "epoch": 0.21041579168416633, "grad_norm": 1.2479569911956787, "learning_rate": 6.419603160294938e-06, "loss": 0.4439, "step": 3507 }, { "epoch": 0.21047579048419032, "grad_norm": 1.2997444868087769, "learning_rate": 6.41922799061822e-06, "loss": 0.4574, "step": 3508 }, { "epoch": 0.21053578928421432, "grad_norm": 1.2599122524261475, "learning_rate": 6.418852710695562e-06, "loss": 0.4687, "step": 3509 }, { "epoch": 0.21059578808423832, "grad_norm": 1.292550802230835, "learning_rate": 6.418477320541134e-06, "loss": 0.4657, "step": 3510 }, { "epoch": 0.2106557868842623, "grad_norm": 1.353806972503662, "learning_rate": 6.418101820169115e-06, "loss": 0.4881, "step": 3511 }, { "epoch": 0.2107157856842863, "grad_norm": 1.2265671491622925, "learning_rate": 6.4177262095936855e-06, "loss": 0.4274, "step": 3512 }, { "epoch": 0.2107757844843103, "grad_norm": 1.4367657899856567, "learning_rate": 6.41735048882903e-06, "loss": 0.47, "step": 3513 }, { "epoch": 0.2108357832843343, "grad_norm": 1.3745338916778564, "learning_rate": 6.416974657889338e-06, "loss": 0.5312, "step": 3514 }, { "epoch": 0.2108957820843583, "grad_norm": 1.3257304430007935, "learning_rate": 6.4165987167888025e-06, "loss": 0.4669, "step": 3515 }, { "epoch": 0.2109557808843823, "grad_norm": 1.3290681838989258, "learning_rate": 6.416222665541622e-06, "loss": 0.4231, "step": 3516 }, { "epoch": 0.21101577968440632, "grad_norm": 1.1161221265792847, "learning_rate": 6.4158465041619965e-06, "loss": 0.3775, "step": 3517 }, { "epoch": 0.21107577848443032, "grad_norm": 1.4519751071929932, "learning_rate": 6.415470232664134e-06, "loss": 0.4962, "step": 3518 }, { "epoch": 0.21113577728445432, "grad_norm": 1.4057068824768066, "learning_rate": 6.415093851062244e-06, "loss": 0.4459, "step": 3519 }, { "epoch": 0.21119577608447831, "grad_norm": 1.2887789011001587, "learning_rate": 6.41471735937054e-06, "loss": 0.4937, "step": 3520 }, { "epoch": 0.2112557748845023, "grad_norm": 1.152634620666504, "learning_rate": 6.41434075760324e-06, "loss": 0.4546, "step": 3521 }, { "epoch": 0.2113157736845263, "grad_norm": 1.3708117008209229, "learning_rate": 6.413964045774568e-06, "loss": 0.5027, "step": 3522 }, { "epoch": 0.2113757724845503, "grad_norm": 1.2367799282073975, "learning_rate": 6.413587223898751e-06, "loss": 0.4512, "step": 3523 }, { "epoch": 0.2114357712845743, "grad_norm": 1.3872103691101074, "learning_rate": 6.413210291990017e-06, "loss": 0.456, "step": 3524 }, { "epoch": 0.2114957700845983, "grad_norm": 1.078437328338623, "learning_rate": 6.412833250062604e-06, "loss": 0.4589, "step": 3525 }, { "epoch": 0.2115557688846223, "grad_norm": 1.2383005619049072, "learning_rate": 6.4124560981307495e-06, "loss": 0.4404, "step": 3526 }, { "epoch": 0.2116157676846463, "grad_norm": 1.2972455024719238, "learning_rate": 6.412078836208697e-06, "loss": 0.4472, "step": 3527 }, { "epoch": 0.21167576648467032, "grad_norm": 1.2178994417190552, "learning_rate": 6.411701464310695e-06, "loss": 0.4385, "step": 3528 }, { "epoch": 0.21173576528469432, "grad_norm": 1.3631303310394287, "learning_rate": 6.411323982450993e-06, "loss": 0.4794, "step": 3529 }, { "epoch": 0.2117957640847183, "grad_norm": 1.2493486404418945, "learning_rate": 6.410946390643849e-06, "loss": 0.4635, "step": 3530 }, { "epoch": 0.2118557628847423, "grad_norm": 1.3564023971557617, "learning_rate": 6.410568688903523e-06, "loss": 0.4362, "step": 3531 }, { "epoch": 0.2119157616847663, "grad_norm": 1.3298814296722412, "learning_rate": 6.410190877244277e-06, "loss": 0.4759, "step": 3532 }, { "epoch": 0.2119757604847903, "grad_norm": 1.2619719505310059, "learning_rate": 6.409812955680381e-06, "loss": 0.4742, "step": 3533 }, { "epoch": 0.2120357592848143, "grad_norm": 1.1580575704574585, "learning_rate": 6.409434924226107e-06, "loss": 0.4713, "step": 3534 }, { "epoch": 0.2120957580848383, "grad_norm": 1.1812069416046143, "learning_rate": 6.40905678289573e-06, "loss": 0.4484, "step": 3535 }, { "epoch": 0.2121557568848623, "grad_norm": 1.2239694595336914, "learning_rate": 6.408678531703533e-06, "loss": 0.4457, "step": 3536 }, { "epoch": 0.2122157556848863, "grad_norm": 1.3340529203414917, "learning_rate": 6.4083001706638e-06, "loss": 0.5157, "step": 3537 }, { "epoch": 0.2122757544849103, "grad_norm": 1.3256683349609375, "learning_rate": 6.407921699790819e-06, "loss": 0.4524, "step": 3538 }, { "epoch": 0.21233575328493431, "grad_norm": 1.2315800189971924, "learning_rate": 6.407543119098884e-06, "loss": 0.499, "step": 3539 }, { "epoch": 0.2123957520849583, "grad_norm": 1.160311222076416, "learning_rate": 6.4071644286022925e-06, "loss": 0.4027, "step": 3540 }, { "epoch": 0.2124557508849823, "grad_norm": 1.3439905643463135, "learning_rate": 6.406785628315344e-06, "loss": 0.4656, "step": 3541 }, { "epoch": 0.2125157496850063, "grad_norm": 1.2668859958648682, "learning_rate": 6.406406718252347e-06, "loss": 0.4433, "step": 3542 }, { "epoch": 0.2125757484850303, "grad_norm": 1.2970080375671387, "learning_rate": 6.40602769842761e-06, "loss": 0.4925, "step": 3543 }, { "epoch": 0.2126357472850543, "grad_norm": 1.3881405591964722, "learning_rate": 6.405648568855446e-06, "loss": 0.4697, "step": 3544 }, { "epoch": 0.2126957460850783, "grad_norm": 1.2160600423812866, "learning_rate": 6.405269329550174e-06, "loss": 0.4658, "step": 3545 }, { "epoch": 0.2127557448851023, "grad_norm": 1.2349374294281006, "learning_rate": 6.404889980526116e-06, "loss": 0.369, "step": 3546 }, { "epoch": 0.2128157436851263, "grad_norm": 1.2429133653640747, "learning_rate": 6.404510521797597e-06, "loss": 0.4456, "step": 3547 }, { "epoch": 0.2128757424851503, "grad_norm": 1.2183109521865845, "learning_rate": 6.404130953378949e-06, "loss": 0.4481, "step": 3548 }, { "epoch": 0.21293574128517428, "grad_norm": 1.2424951791763306, "learning_rate": 6.403751275284507e-06, "loss": 0.4403, "step": 3549 }, { "epoch": 0.2129957400851983, "grad_norm": 1.2554527521133423, "learning_rate": 6.403371487528608e-06, "loss": 0.4341, "step": 3550 }, { "epoch": 0.2130557388852223, "grad_norm": 1.4165377616882324, "learning_rate": 6.402991590125596e-06, "loss": 0.5151, "step": 3551 }, { "epoch": 0.2131157376852463, "grad_norm": 1.26150381565094, "learning_rate": 6.402611583089817e-06, "loss": 0.4443, "step": 3552 }, { "epoch": 0.2131757364852703, "grad_norm": 1.379469871520996, "learning_rate": 6.402231466435624e-06, "loss": 0.4882, "step": 3553 }, { "epoch": 0.2132357352852943, "grad_norm": 1.1914571523666382, "learning_rate": 6.401851240177369e-06, "loss": 0.4409, "step": 3554 }, { "epoch": 0.2132957340853183, "grad_norm": 1.15175199508667, "learning_rate": 6.401470904329415e-06, "loss": 0.4645, "step": 3555 }, { "epoch": 0.2133557328853423, "grad_norm": 1.4045740365982056, "learning_rate": 6.401090458906124e-06, "loss": 0.4853, "step": 3556 }, { "epoch": 0.2134157316853663, "grad_norm": 1.2145415544509888, "learning_rate": 6.400709903921864e-06, "loss": 0.4478, "step": 3557 }, { "epoch": 0.21347573048539029, "grad_norm": 1.2941060066223145, "learning_rate": 6.400329239391006e-06, "loss": 0.5151, "step": 3558 }, { "epoch": 0.21353572928541428, "grad_norm": 1.547370195388794, "learning_rate": 6.3999484653279265e-06, "loss": 0.4837, "step": 3559 }, { "epoch": 0.21359572808543828, "grad_norm": 1.2402750253677368, "learning_rate": 6.399567581747006e-06, "loss": 0.4462, "step": 3560 }, { "epoch": 0.21365572688546228, "grad_norm": 1.1740401983261108, "learning_rate": 6.399186588662627e-06, "loss": 0.4077, "step": 3561 }, { "epoch": 0.2137157256854863, "grad_norm": 1.1315281391143799, "learning_rate": 6.39880548608918e-06, "loss": 0.5098, "step": 3562 }, { "epoch": 0.2137757244855103, "grad_norm": 1.1473180055618286, "learning_rate": 6.3984242740410576e-06, "loss": 0.4542, "step": 3563 }, { "epoch": 0.2138357232855343, "grad_norm": 1.2592350244522095, "learning_rate": 6.3980429525326545e-06, "loss": 0.446, "step": 3564 }, { "epoch": 0.2138957220855583, "grad_norm": 1.4207351207733154, "learning_rate": 6.397661521578372e-06, "loss": 0.473, "step": 3565 }, { "epoch": 0.2139557208855823, "grad_norm": 1.336727499961853, "learning_rate": 6.397279981192618e-06, "loss": 0.4527, "step": 3566 }, { "epoch": 0.2140157196856063, "grad_norm": 1.3534740209579468, "learning_rate": 6.396898331389796e-06, "loss": 0.4523, "step": 3567 }, { "epoch": 0.21407571848563028, "grad_norm": 1.3683723211288452, "learning_rate": 6.396516572184325e-06, "loss": 0.5132, "step": 3568 }, { "epoch": 0.21413571728565428, "grad_norm": 1.2030175924301147, "learning_rate": 6.396134703590618e-06, "loss": 0.4783, "step": 3569 }, { "epoch": 0.21419571608567828, "grad_norm": 1.3003637790679932, "learning_rate": 6.395752725623098e-06, "loss": 0.4746, "step": 3570 }, { "epoch": 0.21425571488570228, "grad_norm": 1.312707781791687, "learning_rate": 6.395370638296191e-06, "loss": 0.4852, "step": 3571 }, { "epoch": 0.21431571368572627, "grad_norm": 1.3305952548980713, "learning_rate": 6.394988441624326e-06, "loss": 0.4711, "step": 3572 }, { "epoch": 0.2143757124857503, "grad_norm": 1.3249001502990723, "learning_rate": 6.394606135621937e-06, "loss": 0.4274, "step": 3573 }, { "epoch": 0.2144357112857743, "grad_norm": 1.2689329385757446, "learning_rate": 6.3942237203034625e-06, "loss": 0.4495, "step": 3574 }, { "epoch": 0.2144957100857983, "grad_norm": 1.4344167709350586, "learning_rate": 6.393841195683344e-06, "loss": 0.4596, "step": 3575 }, { "epoch": 0.2145557088858223, "grad_norm": 1.255125880241394, "learning_rate": 6.393458561776027e-06, "loss": 0.4835, "step": 3576 }, { "epoch": 0.21461570768584629, "grad_norm": 1.2401421070098877, "learning_rate": 6.393075818595964e-06, "loss": 0.4716, "step": 3577 }, { "epoch": 0.21467570648587028, "grad_norm": 1.298575520515442, "learning_rate": 6.392692966157607e-06, "loss": 0.443, "step": 3578 }, { "epoch": 0.21473570528589428, "grad_norm": 1.2058305740356445, "learning_rate": 6.392310004475416e-06, "loss": 0.4311, "step": 3579 }, { "epoch": 0.21479570408591828, "grad_norm": 1.2861779928207397, "learning_rate": 6.3919269335638545e-06, "loss": 0.4885, "step": 3580 }, { "epoch": 0.21485570288594227, "grad_norm": 1.2609765529632568, "learning_rate": 6.391543753437388e-06, "loss": 0.4313, "step": 3581 }, { "epoch": 0.21491570168596627, "grad_norm": 1.3437020778656006, "learning_rate": 6.391160464110487e-06, "loss": 0.4582, "step": 3582 }, { "epoch": 0.21497570048599027, "grad_norm": 1.204616665840149, "learning_rate": 6.3907770655976275e-06, "loss": 0.4263, "step": 3583 }, { "epoch": 0.2150356992860143, "grad_norm": 1.2504761219024658, "learning_rate": 6.390393557913288e-06, "loss": 0.5224, "step": 3584 }, { "epoch": 0.2150956980860383, "grad_norm": 1.305232048034668, "learning_rate": 6.390009941071955e-06, "loss": 0.4908, "step": 3585 }, { "epoch": 0.2151556968860623, "grad_norm": 1.2145559787750244, "learning_rate": 6.389626215088111e-06, "loss": 0.4537, "step": 3586 }, { "epoch": 0.21521569568608628, "grad_norm": 1.2173982858657837, "learning_rate": 6.38924237997625e-06, "loss": 0.4458, "step": 3587 }, { "epoch": 0.21527569448611028, "grad_norm": 1.2641905546188354, "learning_rate": 6.3888584357508685e-06, "loss": 0.428, "step": 3588 }, { "epoch": 0.21533569328613428, "grad_norm": 1.4889953136444092, "learning_rate": 6.388474382426465e-06, "loss": 0.4967, "step": 3589 }, { "epoch": 0.21539569208615827, "grad_norm": 1.2899525165557861, "learning_rate": 6.388090220017544e-06, "loss": 0.4655, "step": 3590 }, { "epoch": 0.21545569088618227, "grad_norm": 1.2153650522232056, "learning_rate": 6.3877059485386145e-06, "loss": 0.4621, "step": 3591 }, { "epoch": 0.21551568968620627, "grad_norm": 1.25634765625, "learning_rate": 6.387321568004187e-06, "loss": 0.4534, "step": 3592 }, { "epoch": 0.21557568848623027, "grad_norm": 1.3170750141143799, "learning_rate": 6.386937078428777e-06, "loss": 0.4911, "step": 3593 }, { "epoch": 0.21563568728625426, "grad_norm": 1.1348053216934204, "learning_rate": 6.386552479826908e-06, "loss": 0.4416, "step": 3594 }, { "epoch": 0.2156956860862783, "grad_norm": 1.2665022611618042, "learning_rate": 6.386167772213103e-06, "loss": 0.458, "step": 3595 }, { "epoch": 0.21575568488630228, "grad_norm": 1.2273880243301392, "learning_rate": 6.38578295560189e-06, "loss": 0.461, "step": 3596 }, { "epoch": 0.21581568368632628, "grad_norm": 1.282764196395874, "learning_rate": 6.385398030007803e-06, "loss": 0.4706, "step": 3597 }, { "epoch": 0.21587568248635028, "grad_norm": 1.3317245244979858, "learning_rate": 6.385012995445378e-06, "loss": 0.4757, "step": 3598 }, { "epoch": 0.21593568128637428, "grad_norm": 1.3147549629211426, "learning_rate": 6.3846278519291555e-06, "loss": 0.4927, "step": 3599 }, { "epoch": 0.21599568008639827, "grad_norm": 1.1876835823059082, "learning_rate": 6.384242599473683e-06, "loss": 0.4927, "step": 3600 }, { "epoch": 0.21605567888642227, "grad_norm": 1.2716009616851807, "learning_rate": 6.383857238093506e-06, "loss": 0.5059, "step": 3601 }, { "epoch": 0.21611567768644627, "grad_norm": 1.329424262046814, "learning_rate": 6.383471767803181e-06, "loss": 0.4503, "step": 3602 }, { "epoch": 0.21617567648647026, "grad_norm": 1.2504801750183105, "learning_rate": 6.3830861886172645e-06, "loss": 0.4205, "step": 3603 }, { "epoch": 0.21623567528649426, "grad_norm": 1.1503678560256958, "learning_rate": 6.382700500550319e-06, "loss": 0.4612, "step": 3604 }, { "epoch": 0.21629567408651826, "grad_norm": 1.1778002977371216, "learning_rate": 6.382314703616907e-06, "loss": 0.4642, "step": 3605 }, { "epoch": 0.21635567288654228, "grad_norm": 1.234927773475647, "learning_rate": 6.381928797831602e-06, "loss": 0.4451, "step": 3606 }, { "epoch": 0.21641567168656628, "grad_norm": 1.242445707321167, "learning_rate": 6.381542783208975e-06, "loss": 0.4475, "step": 3607 }, { "epoch": 0.21647567048659028, "grad_norm": 1.2605775594711304, "learning_rate": 6.3811566597636066e-06, "loss": 0.5023, "step": 3608 }, { "epoch": 0.21653566928661427, "grad_norm": 1.3025360107421875, "learning_rate": 6.380770427510078e-06, "loss": 0.49, "step": 3609 }, { "epoch": 0.21659566808663827, "grad_norm": 1.1581593751907349, "learning_rate": 6.380384086462974e-06, "loss": 0.4652, "step": 3610 }, { "epoch": 0.21665566688666227, "grad_norm": 1.329654574394226, "learning_rate": 6.379997636636886e-06, "loss": 0.4461, "step": 3611 }, { "epoch": 0.21671566568668627, "grad_norm": 1.2445094585418701, "learning_rate": 6.3796110780464085e-06, "loss": 0.4516, "step": 3612 }, { "epoch": 0.21677566448671026, "grad_norm": 1.4504474401474, "learning_rate": 6.379224410706141e-06, "loss": 0.5211, "step": 3613 }, { "epoch": 0.21683566328673426, "grad_norm": 1.251981496810913, "learning_rate": 6.378837634630684e-06, "loss": 0.4947, "step": 3614 }, { "epoch": 0.21689566208675826, "grad_norm": 1.250940203666687, "learning_rate": 6.378450749834647e-06, "loss": 0.4527, "step": 3615 }, { "epoch": 0.21695566088678225, "grad_norm": 1.154672384262085, "learning_rate": 6.3780637563326385e-06, "loss": 0.4305, "step": 3616 }, { "epoch": 0.21701565968680625, "grad_norm": 1.2460341453552246, "learning_rate": 6.3776766541392736e-06, "loss": 0.4895, "step": 3617 }, { "epoch": 0.21707565848683028, "grad_norm": 1.262850284576416, "learning_rate": 6.3772894432691725e-06, "loss": 0.4505, "step": 3618 }, { "epoch": 0.21713565728685427, "grad_norm": 1.21098792552948, "learning_rate": 6.3769021237369585e-06, "loss": 0.4131, "step": 3619 }, { "epoch": 0.21719565608687827, "grad_norm": 1.305269718170166, "learning_rate": 6.376514695557259e-06, "loss": 0.4846, "step": 3620 }, { "epoch": 0.21725565488690227, "grad_norm": 1.400224208831787, "learning_rate": 6.376127158744704e-06, "loss": 0.4415, "step": 3621 }, { "epoch": 0.21731565368692626, "grad_norm": 1.1259369850158691, "learning_rate": 6.37573951331393e-06, "loss": 0.4539, "step": 3622 }, { "epoch": 0.21737565248695026, "grad_norm": 1.3476320505142212, "learning_rate": 6.375351759279577e-06, "loss": 0.4499, "step": 3623 }, { "epoch": 0.21743565128697426, "grad_norm": 1.2384475469589233, "learning_rate": 6.3749638966562865e-06, "loss": 0.4762, "step": 3624 }, { "epoch": 0.21749565008699825, "grad_norm": 1.2144609689712524, "learning_rate": 6.3745759254587096e-06, "loss": 0.458, "step": 3625 }, { "epoch": 0.21755564888702225, "grad_norm": 1.4111138582229614, "learning_rate": 6.3741878457014955e-06, "loss": 0.5396, "step": 3626 }, { "epoch": 0.21761564768704625, "grad_norm": 1.2026703357696533, "learning_rate": 6.373799657399302e-06, "loss": 0.5146, "step": 3627 }, { "epoch": 0.21767564648707025, "grad_norm": 1.3614146709442139, "learning_rate": 6.373411360566787e-06, "loss": 0.5326, "step": 3628 }, { "epoch": 0.21773564528709427, "grad_norm": 1.3102359771728516, "learning_rate": 6.3730229552186185e-06, "loss": 0.4439, "step": 3629 }, { "epoch": 0.21779564408711827, "grad_norm": 1.4095438718795776, "learning_rate": 6.37263444136946e-06, "loss": 0.544, "step": 3630 }, { "epoch": 0.21785564288714226, "grad_norm": 1.1832454204559326, "learning_rate": 6.372245819033988e-06, "loss": 0.4533, "step": 3631 }, { "epoch": 0.21791564168716626, "grad_norm": 1.2513724565505981, "learning_rate": 6.371857088226877e-06, "loss": 0.4503, "step": 3632 }, { "epoch": 0.21797564048719026, "grad_norm": 1.29209566116333, "learning_rate": 6.371468248962809e-06, "loss": 0.464, "step": 3633 }, { "epoch": 0.21803563928721426, "grad_norm": 1.3334561586380005, "learning_rate": 6.371079301256465e-06, "loss": 0.4725, "step": 3634 }, { "epoch": 0.21809563808723825, "grad_norm": 1.2116024494171143, "learning_rate": 6.370690245122538e-06, "loss": 0.4546, "step": 3635 }, { "epoch": 0.21815563688726225, "grad_norm": 1.3689751625061035, "learning_rate": 6.370301080575719e-06, "loss": 0.4782, "step": 3636 }, { "epoch": 0.21821563568728625, "grad_norm": 1.223038673400879, "learning_rate": 6.369911807630706e-06, "loss": 0.4242, "step": 3637 }, { "epoch": 0.21827563448731024, "grad_norm": 1.352288842201233, "learning_rate": 6.369522426302198e-06, "loss": 0.4776, "step": 3638 }, { "epoch": 0.21833563328733424, "grad_norm": 1.336685061454773, "learning_rate": 6.3691329366049016e-06, "loss": 0.4715, "step": 3639 }, { "epoch": 0.21839563208735827, "grad_norm": 1.301431655883789, "learning_rate": 6.368743338553527e-06, "loss": 0.4615, "step": 3640 }, { "epoch": 0.21845563088738226, "grad_norm": 1.3943781852722168, "learning_rate": 6.368353632162785e-06, "loss": 0.4584, "step": 3641 }, { "epoch": 0.21851562968740626, "grad_norm": 1.4003980159759521, "learning_rate": 6.367963817447396e-06, "loss": 0.4937, "step": 3642 }, { "epoch": 0.21857562848743026, "grad_norm": 1.2006770372390747, "learning_rate": 6.367573894422079e-06, "loss": 0.4239, "step": 3643 }, { "epoch": 0.21863562728745425, "grad_norm": 1.185314655303955, "learning_rate": 6.367183863101561e-06, "loss": 0.4588, "step": 3644 }, { "epoch": 0.21869562608747825, "grad_norm": 1.2719650268554688, "learning_rate": 6.366793723500571e-06, "loss": 0.4362, "step": 3645 }, { "epoch": 0.21875562488750225, "grad_norm": 1.3468877077102661, "learning_rate": 6.366403475633844e-06, "loss": 0.4492, "step": 3646 }, { "epoch": 0.21881562368752625, "grad_norm": 1.2520567178726196, "learning_rate": 6.366013119516116e-06, "loss": 0.4879, "step": 3647 }, { "epoch": 0.21887562248755024, "grad_norm": 1.373459815979004, "learning_rate": 6.365622655162131e-06, "loss": 0.4529, "step": 3648 }, { "epoch": 0.21893562128757424, "grad_norm": 1.1906050443649292, "learning_rate": 6.365232082586634e-06, "loss": 0.4519, "step": 3649 }, { "epoch": 0.21899562008759824, "grad_norm": 1.3601127862930298, "learning_rate": 6.364841401804374e-06, "loss": 0.4833, "step": 3650 }, { "epoch": 0.21905561888762226, "grad_norm": 1.2546284198760986, "learning_rate": 6.364450612830107e-06, "loss": 0.4545, "step": 3651 }, { "epoch": 0.21911561768764626, "grad_norm": 1.2758374214172363, "learning_rate": 6.364059715678591e-06, "loss": 0.4467, "step": 3652 }, { "epoch": 0.21917561648767026, "grad_norm": 1.128134846687317, "learning_rate": 6.363668710364588e-06, "loss": 0.489, "step": 3653 }, { "epoch": 0.21923561528769425, "grad_norm": 1.1292225122451782, "learning_rate": 6.363277596902865e-06, "loss": 0.4229, "step": 3654 }, { "epoch": 0.21929561408771825, "grad_norm": 1.2141457796096802, "learning_rate": 6.362886375308193e-06, "loss": 0.4295, "step": 3655 }, { "epoch": 0.21935561288774225, "grad_norm": 1.2828508615493774, "learning_rate": 6.362495045595345e-06, "loss": 0.4975, "step": 3656 }, { "epoch": 0.21941561168776624, "grad_norm": 1.1698153018951416, "learning_rate": 6.362103607779101e-06, "loss": 0.3999, "step": 3657 }, { "epoch": 0.21947561048779024, "grad_norm": 1.3173253536224365, "learning_rate": 6.361712061874242e-06, "loss": 0.4965, "step": 3658 }, { "epoch": 0.21953560928781424, "grad_norm": 1.243665337562561, "learning_rate": 6.361320407895557e-06, "loss": 0.4962, "step": 3659 }, { "epoch": 0.21959560808783823, "grad_norm": 1.2817538976669312, "learning_rate": 6.360928645857837e-06, "loss": 0.5008, "step": 3660 }, { "epoch": 0.21965560688786223, "grad_norm": 1.2053477764129639, "learning_rate": 6.360536775775877e-06, "loss": 0.5018, "step": 3661 }, { "epoch": 0.21971560568788623, "grad_norm": 1.2216776609420776, "learning_rate": 6.360144797664474e-06, "loss": 0.4584, "step": 3662 }, { "epoch": 0.21977560448791025, "grad_norm": 1.1292225122451782, "learning_rate": 6.359752711538433e-06, "loss": 0.4606, "step": 3663 }, { "epoch": 0.21983560328793425, "grad_norm": 1.299281358718872, "learning_rate": 6.359360517412562e-06, "loss": 0.4911, "step": 3664 }, { "epoch": 0.21989560208795825, "grad_norm": 1.1730555295944214, "learning_rate": 6.358968215301671e-06, "loss": 0.4469, "step": 3665 }, { "epoch": 0.21995560088798224, "grad_norm": 1.17316734790802, "learning_rate": 6.3585758052205765e-06, "loss": 0.4327, "step": 3666 }, { "epoch": 0.22001559968800624, "grad_norm": 1.2148945331573486, "learning_rate": 6.358183287184096e-06, "loss": 0.4432, "step": 3667 }, { "epoch": 0.22007559848803024, "grad_norm": 1.356542944908142, "learning_rate": 6.357790661207055e-06, "loss": 0.528, "step": 3668 }, { "epoch": 0.22013559728805424, "grad_norm": 1.320222020149231, "learning_rate": 6.3573979273042814e-06, "loss": 0.491, "step": 3669 }, { "epoch": 0.22019559608807823, "grad_norm": 1.1929128170013428, "learning_rate": 6.3570050854906066e-06, "loss": 0.3875, "step": 3670 }, { "epoch": 0.22025559488810223, "grad_norm": 1.3777687549591064, "learning_rate": 6.356612135780866e-06, "loss": 0.5162, "step": 3671 }, { "epoch": 0.22031559368812623, "grad_norm": 1.3069260120391846, "learning_rate": 6.356219078189899e-06, "loss": 0.4247, "step": 3672 }, { "epoch": 0.22037559248815022, "grad_norm": 1.2709347009658813, "learning_rate": 6.3558259127325515e-06, "loss": 0.457, "step": 3673 }, { "epoch": 0.22043559128817425, "grad_norm": 1.3579919338226318, "learning_rate": 6.35543263942367e-06, "loss": 0.5065, "step": 3674 }, { "epoch": 0.22049559008819825, "grad_norm": 1.2550485134124756, "learning_rate": 6.3550392582781055e-06, "loss": 0.4633, "step": 3675 }, { "epoch": 0.22055558888822224, "grad_norm": 1.3388524055480957, "learning_rate": 6.3546457693107175e-06, "loss": 0.4847, "step": 3676 }, { "epoch": 0.22061558768824624, "grad_norm": 1.3865545988082886, "learning_rate": 6.354252172536364e-06, "loss": 0.4446, "step": 3677 }, { "epoch": 0.22067558648827024, "grad_norm": 1.24614679813385, "learning_rate": 6.353858467969911e-06, "loss": 0.4552, "step": 3678 }, { "epoch": 0.22073558528829423, "grad_norm": 1.307706594467163, "learning_rate": 6.353464655626224e-06, "loss": 0.4374, "step": 3679 }, { "epoch": 0.22079558408831823, "grad_norm": 1.35868501663208, "learning_rate": 6.353070735520179e-06, "loss": 0.4929, "step": 3680 }, { "epoch": 0.22085558288834223, "grad_norm": 1.2882390022277832, "learning_rate": 6.352676707666649e-06, "loss": 0.4551, "step": 3681 }, { "epoch": 0.22091558168836622, "grad_norm": 1.3088845014572144, "learning_rate": 6.352282572080518e-06, "loss": 0.4911, "step": 3682 }, { "epoch": 0.22097558048839022, "grad_norm": 1.1822423934936523, "learning_rate": 6.351888328776668e-06, "loss": 0.4784, "step": 3683 }, { "epoch": 0.22103557928841422, "grad_norm": 1.2105594873428345, "learning_rate": 6.351493977769989e-06, "loss": 0.4598, "step": 3684 }, { "epoch": 0.22109557808843824, "grad_norm": 1.383577823638916, "learning_rate": 6.3510995190753755e-06, "loss": 0.5061, "step": 3685 }, { "epoch": 0.22115557688846224, "grad_norm": 1.1709665060043335, "learning_rate": 6.350704952707722e-06, "loss": 0.4737, "step": 3686 }, { "epoch": 0.22121557568848624, "grad_norm": 1.2478150129318237, "learning_rate": 6.3503102786819295e-06, "loss": 0.4391, "step": 3687 }, { "epoch": 0.22127557448851023, "grad_norm": 1.3936206102371216, "learning_rate": 6.349915497012904e-06, "loss": 0.4465, "step": 3688 }, { "epoch": 0.22133557328853423, "grad_norm": 1.2725673913955688, "learning_rate": 6.3495206077155555e-06, "loss": 0.4769, "step": 3689 }, { "epoch": 0.22139557208855823, "grad_norm": 1.274554967880249, "learning_rate": 6.349125610804794e-06, "loss": 0.4622, "step": 3690 }, { "epoch": 0.22145557088858223, "grad_norm": 1.3242554664611816, "learning_rate": 6.34873050629554e-06, "loss": 0.4615, "step": 3691 }, { "epoch": 0.22151556968860622, "grad_norm": 1.2299084663391113, "learning_rate": 6.348335294202713e-06, "loss": 0.4474, "step": 3692 }, { "epoch": 0.22157556848863022, "grad_norm": 1.1730931997299194, "learning_rate": 6.34793997454124e-06, "loss": 0.4364, "step": 3693 }, { "epoch": 0.22163556728865422, "grad_norm": 1.2206264734268188, "learning_rate": 6.347544547326049e-06, "loss": 0.4202, "step": 3694 }, { "epoch": 0.22169556608867821, "grad_norm": 1.1869345903396606, "learning_rate": 6.347149012572073e-06, "loss": 0.4429, "step": 3695 }, { "epoch": 0.22175556488870224, "grad_norm": 1.2754135131835938, "learning_rate": 6.346753370294252e-06, "loss": 0.4611, "step": 3696 }, { "epoch": 0.22181556368872624, "grad_norm": 1.2624330520629883, "learning_rate": 6.346357620507524e-06, "loss": 0.4785, "step": 3697 }, { "epoch": 0.22187556248875023, "grad_norm": 1.1508835554122925, "learning_rate": 6.345961763226837e-06, "loss": 0.4278, "step": 3698 }, { "epoch": 0.22193556128877423, "grad_norm": 1.4131865501403809, "learning_rate": 6.345565798467142e-06, "loss": 0.4762, "step": 3699 }, { "epoch": 0.22199556008879823, "grad_norm": 1.2657170295715332, "learning_rate": 6.345169726243389e-06, "loss": 0.4335, "step": 3700 }, { "epoch": 0.22205555888882222, "grad_norm": 1.3606199026107788, "learning_rate": 6.3447735465705396e-06, "loss": 0.4626, "step": 3701 }, { "epoch": 0.22211555768884622, "grad_norm": 1.210343837738037, "learning_rate": 6.344377259463554e-06, "loss": 0.5211, "step": 3702 }, { "epoch": 0.22217555648887022, "grad_norm": 1.257196307182312, "learning_rate": 6.343980864937398e-06, "loss": 0.4868, "step": 3703 }, { "epoch": 0.22223555528889422, "grad_norm": 1.417188286781311, "learning_rate": 6.343584363007041e-06, "loss": 0.4997, "step": 3704 }, { "epoch": 0.2222955540889182, "grad_norm": 1.3341466188430786, "learning_rate": 6.343187753687459e-06, "loss": 0.4732, "step": 3705 }, { "epoch": 0.2223555528889422, "grad_norm": 1.2892816066741943, "learning_rate": 6.342791036993629e-06, "loss": 0.4627, "step": 3706 }, { "epoch": 0.22241555168896623, "grad_norm": 1.2950364351272583, "learning_rate": 6.342394212940532e-06, "loss": 0.3963, "step": 3707 }, { "epoch": 0.22247555048899023, "grad_norm": 1.3599412441253662, "learning_rate": 6.341997281543157e-06, "loss": 0.4549, "step": 3708 }, { "epoch": 0.22253554928901423, "grad_norm": 1.118637204170227, "learning_rate": 6.3416002428164926e-06, "loss": 0.4122, "step": 3709 }, { "epoch": 0.22259554808903823, "grad_norm": 1.3373252153396606, "learning_rate": 6.341203096775533e-06, "loss": 0.4409, "step": 3710 }, { "epoch": 0.22265554688906222, "grad_norm": 1.2271391153335571, "learning_rate": 6.340805843435277e-06, "loss": 0.4115, "step": 3711 }, { "epoch": 0.22271554568908622, "grad_norm": 1.307281255722046, "learning_rate": 6.340408482810727e-06, "loss": 0.4568, "step": 3712 }, { "epoch": 0.22277554448911022, "grad_norm": 1.1606395244598389, "learning_rate": 6.340011014916888e-06, "loss": 0.4696, "step": 3713 }, { "epoch": 0.2228355432891342, "grad_norm": 1.326490044593811, "learning_rate": 6.339613439768774e-06, "loss": 0.4311, "step": 3714 }, { "epoch": 0.2228955420891582, "grad_norm": 1.272247314453125, "learning_rate": 6.3392157573813974e-06, "loss": 0.4759, "step": 3715 }, { "epoch": 0.2229555408891822, "grad_norm": 1.2983072996139526, "learning_rate": 6.338817967769777e-06, "loss": 0.4874, "step": 3716 }, { "epoch": 0.2230155396892062, "grad_norm": 1.2410998344421387, "learning_rate": 6.338420070948935e-06, "loss": 0.499, "step": 3717 }, { "epoch": 0.2230755384892302, "grad_norm": 1.3191075325012207, "learning_rate": 6.338022066933899e-06, "loss": 0.4585, "step": 3718 }, { "epoch": 0.22313553728925423, "grad_norm": 1.3170987367630005, "learning_rate": 6.337623955739699e-06, "loss": 0.4879, "step": 3719 }, { "epoch": 0.22319553608927822, "grad_norm": 1.2348073720932007, "learning_rate": 6.33722573738137e-06, "loss": 0.4726, "step": 3720 }, { "epoch": 0.22325553488930222, "grad_norm": 1.3576561212539673, "learning_rate": 6.336827411873953e-06, "loss": 0.4686, "step": 3721 }, { "epoch": 0.22331553368932622, "grad_norm": 1.3341283798217773, "learning_rate": 6.336428979232489e-06, "loss": 0.5062, "step": 3722 }, { "epoch": 0.22337553248935021, "grad_norm": 1.3054027557373047, "learning_rate": 6.3360304394720235e-06, "loss": 0.4523, "step": 3723 }, { "epoch": 0.2234355312893742, "grad_norm": 1.3393731117248535, "learning_rate": 6.3356317926076115e-06, "loss": 0.5119, "step": 3724 }, { "epoch": 0.2234955300893982, "grad_norm": 1.4624156951904297, "learning_rate": 6.335233038654304e-06, "loss": 0.5108, "step": 3725 }, { "epoch": 0.2235555288894222, "grad_norm": 1.201904058456421, "learning_rate": 6.334834177627162e-06, "loss": 0.4373, "step": 3726 }, { "epoch": 0.2236155276894462, "grad_norm": 1.2667254209518433, "learning_rate": 6.334435209541251e-06, "loss": 0.4373, "step": 3727 }, { "epoch": 0.2236755264894702, "grad_norm": 1.2783451080322266, "learning_rate": 6.3340361344116324e-06, "loss": 0.4487, "step": 3728 }, { "epoch": 0.2237355252894942, "grad_norm": 1.2546794414520264, "learning_rate": 6.333636952253382e-06, "loss": 0.4824, "step": 3729 }, { "epoch": 0.22379552408951822, "grad_norm": 1.3473328351974487, "learning_rate": 6.333237663081576e-06, "loss": 0.4853, "step": 3730 }, { "epoch": 0.22385552288954222, "grad_norm": 1.2329334020614624, "learning_rate": 6.332838266911289e-06, "loss": 0.4494, "step": 3731 }, { "epoch": 0.22391552168956622, "grad_norm": 1.2050936222076416, "learning_rate": 6.332438763757608e-06, "loss": 0.3985, "step": 3732 }, { "epoch": 0.2239755204895902, "grad_norm": 1.2581894397735596, "learning_rate": 6.332039153635619e-06, "loss": 0.384, "step": 3733 }, { "epoch": 0.2240355192896142, "grad_norm": 1.2595431804656982, "learning_rate": 6.331639436560415e-06, "loss": 0.4403, "step": 3734 }, { "epoch": 0.2240955180896382, "grad_norm": 1.2325409650802612, "learning_rate": 6.331239612547089e-06, "loss": 0.4462, "step": 3735 }, { "epoch": 0.2241555168896622, "grad_norm": 1.3329379558563232, "learning_rate": 6.330839681610743e-06, "loss": 0.4403, "step": 3736 }, { "epoch": 0.2242155156896862, "grad_norm": 1.325912356376648, "learning_rate": 6.330439643766479e-06, "loss": 0.4189, "step": 3737 }, { "epoch": 0.2242755144897102, "grad_norm": 1.274042010307312, "learning_rate": 6.330039499029405e-06, "loss": 0.4851, "step": 3738 }, { "epoch": 0.2243355132897342, "grad_norm": 1.2858078479766846, "learning_rate": 6.329639247414634e-06, "loss": 0.4568, "step": 3739 }, { "epoch": 0.2243955120897582, "grad_norm": 1.423807144165039, "learning_rate": 6.329238888937278e-06, "loss": 0.5003, "step": 3740 }, { "epoch": 0.22445551088978222, "grad_norm": 1.2414405345916748, "learning_rate": 6.328838423612461e-06, "loss": 0.4646, "step": 3741 }, { "epoch": 0.22451550968980621, "grad_norm": 1.295215368270874, "learning_rate": 6.328437851455305e-06, "loss": 0.4725, "step": 3742 }, { "epoch": 0.2245755084898302, "grad_norm": 1.3124264478683472, "learning_rate": 6.328037172480937e-06, "loss": 0.4415, "step": 3743 }, { "epoch": 0.2246355072898542, "grad_norm": 1.2345134019851685, "learning_rate": 6.32763638670449e-06, "loss": 0.4554, "step": 3744 }, { "epoch": 0.2246955060898782, "grad_norm": 1.2127776145935059, "learning_rate": 6.327235494141098e-06, "loss": 0.4594, "step": 3745 }, { "epoch": 0.2247555048899022, "grad_norm": 1.39375901222229, "learning_rate": 6.326834494805904e-06, "loss": 0.4332, "step": 3746 }, { "epoch": 0.2248155036899262, "grad_norm": 1.2600581645965576, "learning_rate": 6.3264333887140485e-06, "loss": 0.4278, "step": 3747 }, { "epoch": 0.2248755024899502, "grad_norm": 1.2187036275863647, "learning_rate": 6.3260321758806835e-06, "loss": 0.4419, "step": 3748 }, { "epoch": 0.2249355012899742, "grad_norm": 1.5143334865570068, "learning_rate": 6.325630856320956e-06, "loss": 0.4731, "step": 3749 }, { "epoch": 0.2249955000899982, "grad_norm": 1.2405551671981812, "learning_rate": 6.325229430050026e-06, "loss": 0.4077, "step": 3750 }, { "epoch": 0.2250554988900222, "grad_norm": 1.3215588331222534, "learning_rate": 6.324827897083052e-06, "loss": 0.4981, "step": 3751 }, { "epoch": 0.2251154976900462, "grad_norm": 1.3276989459991455, "learning_rate": 6.324426257435198e-06, "loss": 0.4405, "step": 3752 }, { "epoch": 0.2251754964900702, "grad_norm": 1.2659906148910522, "learning_rate": 6.324024511121633e-06, "loss": 0.5213, "step": 3753 }, { "epoch": 0.2252354952900942, "grad_norm": 1.4742242097854614, "learning_rate": 6.323622658157527e-06, "loss": 0.5028, "step": 3754 }, { "epoch": 0.2252954940901182, "grad_norm": 1.1282038688659668, "learning_rate": 6.323220698558059e-06, "loss": 0.4086, "step": 3755 }, { "epoch": 0.2253554928901422, "grad_norm": 1.3692786693572998, "learning_rate": 6.322818632338407e-06, "loss": 0.5042, "step": 3756 }, { "epoch": 0.2254154916901662, "grad_norm": 1.204626441001892, "learning_rate": 6.3224164595137555e-06, "loss": 0.4801, "step": 3757 }, { "epoch": 0.2254754904901902, "grad_norm": 1.279915452003479, "learning_rate": 6.322014180099294e-06, "loss": 0.4618, "step": 3758 }, { "epoch": 0.2255354892902142, "grad_norm": 1.3008557558059692, "learning_rate": 6.321611794110214e-06, "loss": 0.4669, "step": 3759 }, { "epoch": 0.2255954880902382, "grad_norm": 1.1523696184158325, "learning_rate": 6.321209301561712e-06, "loss": 0.4406, "step": 3760 }, { "epoch": 0.22565548689026219, "grad_norm": 1.5088187456130981, "learning_rate": 6.320806702468987e-06, "loss": 0.4944, "step": 3761 }, { "epoch": 0.22571548569028618, "grad_norm": 1.2289936542510986, "learning_rate": 6.320403996847246e-06, "loss": 0.4304, "step": 3762 }, { "epoch": 0.2257754844903102, "grad_norm": 1.2776668071746826, "learning_rate": 6.3200011847116946e-06, "loss": 0.4524, "step": 3763 }, { "epoch": 0.2258354832903342, "grad_norm": 1.2227309942245483, "learning_rate": 6.319598266077547e-06, "loss": 0.4927, "step": 3764 }, { "epoch": 0.2258954820903582, "grad_norm": 1.2351293563842773, "learning_rate": 6.319195240960018e-06, "loss": 0.4818, "step": 3765 }, { "epoch": 0.2259554808903822, "grad_norm": 1.2579180002212524, "learning_rate": 6.31879210937433e-06, "loss": 0.4526, "step": 3766 }, { "epoch": 0.2260154796904062, "grad_norm": 1.5129225254058838, "learning_rate": 6.318388871335706e-06, "loss": 0.44, "step": 3767 }, { "epoch": 0.2260754784904302, "grad_norm": 1.1864733695983887, "learning_rate": 6.317985526859375e-06, "loss": 0.4328, "step": 3768 }, { "epoch": 0.2261354772904542, "grad_norm": 1.2603495121002197, "learning_rate": 6.317582075960569e-06, "loss": 0.5163, "step": 3769 }, { "epoch": 0.2261954760904782, "grad_norm": 1.1363499164581299, "learning_rate": 6.317178518654525e-06, "loss": 0.3365, "step": 3770 }, { "epoch": 0.22625547489050218, "grad_norm": 1.2876453399658203, "learning_rate": 6.316774854956484e-06, "loss": 0.4735, "step": 3771 }, { "epoch": 0.22631547369052618, "grad_norm": 1.356643557548523, "learning_rate": 6.316371084881688e-06, "loss": 0.5295, "step": 3772 }, { "epoch": 0.22637547249055018, "grad_norm": 1.294703722000122, "learning_rate": 6.315967208445389e-06, "loss": 0.4796, "step": 3773 }, { "epoch": 0.22643547129057418, "grad_norm": 1.249150037765503, "learning_rate": 6.315563225662838e-06, "loss": 0.4692, "step": 3774 }, { "epoch": 0.2264954700905982, "grad_norm": 1.3190603256225586, "learning_rate": 6.3151591365492925e-06, "loss": 0.4439, "step": 3775 }, { "epoch": 0.2265554688906222, "grad_norm": 1.3007007837295532, "learning_rate": 6.314754941120011e-06, "loss": 0.4861, "step": 3776 }, { "epoch": 0.2266154676906462, "grad_norm": 1.2770293951034546, "learning_rate": 6.31435063939026e-06, "loss": 0.4664, "step": 3777 }, { "epoch": 0.2266754664906702, "grad_norm": 1.2753627300262451, "learning_rate": 6.313946231375307e-06, "loss": 0.4299, "step": 3778 }, { "epoch": 0.2267354652906942, "grad_norm": 1.0887893438339233, "learning_rate": 6.313541717090425e-06, "loss": 0.4803, "step": 3779 }, { "epoch": 0.22679546409071819, "grad_norm": 1.2033144235610962, "learning_rate": 6.313137096550891e-06, "loss": 0.4116, "step": 3780 }, { "epoch": 0.22685546289074218, "grad_norm": 1.305129885673523, "learning_rate": 6.3127323697719855e-06, "loss": 0.4694, "step": 3781 }, { "epoch": 0.22691546169076618, "grad_norm": 1.2646037340164185, "learning_rate": 6.312327536768994e-06, "loss": 0.4602, "step": 3782 }, { "epoch": 0.22697546049079018, "grad_norm": 1.1074442863464355, "learning_rate": 6.311922597557203e-06, "loss": 0.4572, "step": 3783 }, { "epoch": 0.22703545929081417, "grad_norm": 1.2505412101745605, "learning_rate": 6.3115175521519075e-06, "loss": 0.4592, "step": 3784 }, { "epoch": 0.22709545809083817, "grad_norm": 1.2867443561553955, "learning_rate": 6.311112400568404e-06, "loss": 0.4745, "step": 3785 }, { "epoch": 0.2271554568908622, "grad_norm": 1.226894736289978, "learning_rate": 6.310707142821991e-06, "loss": 0.4601, "step": 3786 }, { "epoch": 0.2272154556908862, "grad_norm": 1.3101142644882202, "learning_rate": 6.310301778927974e-06, "loss": 0.4348, "step": 3787 }, { "epoch": 0.2272754544909102, "grad_norm": 1.2015016078948975, "learning_rate": 6.309896308901665e-06, "loss": 0.4637, "step": 3788 }, { "epoch": 0.2273354532909342, "grad_norm": 1.1110544204711914, "learning_rate": 6.3094907327583725e-06, "loss": 0.4292, "step": 3789 }, { "epoch": 0.22739545209095818, "grad_norm": 1.325100064277649, "learning_rate": 6.309085050513416e-06, "loss": 0.5035, "step": 3790 }, { "epoch": 0.22745545089098218, "grad_norm": 1.3193784952163696, "learning_rate": 6.308679262182114e-06, "loss": 0.4564, "step": 3791 }, { "epoch": 0.22751544969100618, "grad_norm": 1.137812614440918, "learning_rate": 6.3082733677797945e-06, "loss": 0.3967, "step": 3792 }, { "epoch": 0.22757544849103017, "grad_norm": 1.2077693939208984, "learning_rate": 6.307867367321784e-06, "loss": 0.4389, "step": 3793 }, { "epoch": 0.22763544729105417, "grad_norm": 1.184495210647583, "learning_rate": 6.307461260823415e-06, "loss": 0.4771, "step": 3794 }, { "epoch": 0.22769544609107817, "grad_norm": 1.2412129640579224, "learning_rate": 6.307055048300024e-06, "loss": 0.5057, "step": 3795 }, { "epoch": 0.22775544489110217, "grad_norm": 1.2083858251571655, "learning_rate": 6.3066487297669536e-06, "loss": 0.4247, "step": 3796 }, { "epoch": 0.2278154436911262, "grad_norm": 1.258762240409851, "learning_rate": 6.306242305239548e-06, "loss": 0.402, "step": 3797 }, { "epoch": 0.2278754424911502, "grad_norm": 1.0826524496078491, "learning_rate": 6.305835774733156e-06, "loss": 0.3745, "step": 3798 }, { "epoch": 0.22793544129117418, "grad_norm": 1.2211899757385254, "learning_rate": 6.30542913826313e-06, "loss": 0.4573, "step": 3799 }, { "epoch": 0.22799544009119818, "grad_norm": 1.3486956357955933, "learning_rate": 6.305022395844825e-06, "loss": 0.468, "step": 3800 }, { "epoch": 0.22805543889122218, "grad_norm": 1.5354303121566772, "learning_rate": 6.304615547493606e-06, "loss": 0.4924, "step": 3801 }, { "epoch": 0.22811543769124618, "grad_norm": 1.2153512239456177, "learning_rate": 6.3042085932248354e-06, "loss": 0.4859, "step": 3802 }, { "epoch": 0.22817543649127017, "grad_norm": 1.3307836055755615, "learning_rate": 6.303801533053882e-06, "loss": 0.4387, "step": 3803 }, { "epoch": 0.22823543529129417, "grad_norm": 1.1974068880081177, "learning_rate": 6.303394366996118e-06, "loss": 0.5414, "step": 3804 }, { "epoch": 0.22829543409131817, "grad_norm": 1.3075008392333984, "learning_rate": 6.302987095066922e-06, "loss": 0.4735, "step": 3805 }, { "epoch": 0.22835543289134216, "grad_norm": 1.280059814453125, "learning_rate": 6.3025797172816745e-06, "loss": 0.4786, "step": 3806 }, { "epoch": 0.22841543169136616, "grad_norm": 1.266695261001587, "learning_rate": 6.302172233655758e-06, "loss": 0.4626, "step": 3807 }, { "epoch": 0.22847543049139019, "grad_norm": 1.3262996673583984, "learning_rate": 6.301764644204563e-06, "loss": 0.4582, "step": 3808 }, { "epoch": 0.22853542929141418, "grad_norm": 1.3658329248428345, "learning_rate": 6.3013569489434825e-06, "loss": 0.4901, "step": 3809 }, { "epoch": 0.22859542809143818, "grad_norm": 1.2986727952957153, "learning_rate": 6.300949147887913e-06, "loss": 0.481, "step": 3810 }, { "epoch": 0.22865542689146218, "grad_norm": 1.2079051733016968, "learning_rate": 6.3005412410532556e-06, "loss": 0.4205, "step": 3811 }, { "epoch": 0.22871542569148617, "grad_norm": 1.1647406816482544, "learning_rate": 6.300133228454914e-06, "loss": 0.366, "step": 3812 }, { "epoch": 0.22877542449151017, "grad_norm": 1.1216442584991455, "learning_rate": 6.299725110108298e-06, "loss": 0.3906, "step": 3813 }, { "epoch": 0.22883542329153417, "grad_norm": 1.2388240098953247, "learning_rate": 6.299316886028821e-06, "loss": 0.4065, "step": 3814 }, { "epoch": 0.22889542209155816, "grad_norm": 1.2919042110443115, "learning_rate": 6.298908556231898e-06, "loss": 0.4483, "step": 3815 }, { "epoch": 0.22895542089158216, "grad_norm": 1.1048063039779663, "learning_rate": 6.298500120732951e-06, "loss": 0.3918, "step": 3816 }, { "epoch": 0.22901541969160616, "grad_norm": 1.2689098119735718, "learning_rate": 6.2980915795474035e-06, "loss": 0.4865, "step": 3817 }, { "epoch": 0.22907541849163016, "grad_norm": 1.3810129165649414, "learning_rate": 6.297682932690686e-06, "loss": 0.428, "step": 3818 }, { "epoch": 0.22913541729165415, "grad_norm": 1.3693702220916748, "learning_rate": 6.29727418017823e-06, "loss": 0.4562, "step": 3819 }, { "epoch": 0.22919541609167818, "grad_norm": 1.2996689081192017, "learning_rate": 6.296865322025473e-06, "loss": 0.4605, "step": 3820 }, { "epoch": 0.22925541489170218, "grad_norm": 1.2586100101470947, "learning_rate": 6.296456358247855e-06, "loss": 0.4343, "step": 3821 }, { "epoch": 0.22931541369172617, "grad_norm": 1.3168822526931763, "learning_rate": 6.296047288860821e-06, "loss": 0.5071, "step": 3822 }, { "epoch": 0.22937541249175017, "grad_norm": 1.2859807014465332, "learning_rate": 6.29563811387982e-06, "loss": 0.4981, "step": 3823 }, { "epoch": 0.22943541129177417, "grad_norm": 1.460992693901062, "learning_rate": 6.295228833320303e-06, "loss": 0.499, "step": 3824 }, { "epoch": 0.22949541009179816, "grad_norm": 1.2924145460128784, "learning_rate": 6.294819447197729e-06, "loss": 0.511, "step": 3825 }, { "epoch": 0.22955540889182216, "grad_norm": 1.1239594221115112, "learning_rate": 6.294409955527558e-06, "loss": 0.4864, "step": 3826 }, { "epoch": 0.22961540769184616, "grad_norm": 1.2700364589691162, "learning_rate": 6.294000358325254e-06, "loss": 0.4551, "step": 3827 }, { "epoch": 0.22967540649187015, "grad_norm": 1.4313520193099976, "learning_rate": 6.293590655606286e-06, "loss": 0.4507, "step": 3828 }, { "epoch": 0.22973540529189415, "grad_norm": 1.1645785570144653, "learning_rate": 6.2931808473861266e-06, "loss": 0.4216, "step": 3829 }, { "epoch": 0.22979540409191815, "grad_norm": 1.425503134727478, "learning_rate": 6.292770933680251e-06, "loss": 0.491, "step": 3830 }, { "epoch": 0.22985540289194217, "grad_norm": 1.2956370115280151, "learning_rate": 6.2923609145041426e-06, "loss": 0.4485, "step": 3831 }, { "epoch": 0.22991540169196617, "grad_norm": 1.3550385236740112, "learning_rate": 6.291950789873284e-06, "loss": 0.4416, "step": 3832 }, { "epoch": 0.22997540049199017, "grad_norm": 1.2429084777832031, "learning_rate": 6.2915405598031635e-06, "loss": 0.4764, "step": 3833 }, { "epoch": 0.23003539929201416, "grad_norm": 1.3842803239822388, "learning_rate": 6.291130224309275e-06, "loss": 0.4969, "step": 3834 }, { "epoch": 0.23009539809203816, "grad_norm": 1.2399861812591553, "learning_rate": 6.290719783407114e-06, "loss": 0.4442, "step": 3835 }, { "epoch": 0.23015539689206216, "grad_norm": 1.1673272848129272, "learning_rate": 6.290309237112181e-06, "loss": 0.3925, "step": 3836 }, { "epoch": 0.23021539569208616, "grad_norm": 1.2013803720474243, "learning_rate": 6.289898585439982e-06, "loss": 0.3939, "step": 3837 }, { "epoch": 0.23027539449211015, "grad_norm": 1.2909754514694214, "learning_rate": 6.2894878284060235e-06, "loss": 0.4648, "step": 3838 }, { "epoch": 0.23033539329213415, "grad_norm": 1.3171367645263672, "learning_rate": 6.289076966025818e-06, "loss": 0.454, "step": 3839 }, { "epoch": 0.23039539209215815, "grad_norm": 1.3155704736709595, "learning_rate": 6.288665998314883e-06, "loss": 0.4586, "step": 3840 }, { "epoch": 0.23045539089218214, "grad_norm": 1.183996319770813, "learning_rate": 6.2882549252887386e-06, "loss": 0.4885, "step": 3841 }, { "epoch": 0.23051538969220617, "grad_norm": 1.1475234031677246, "learning_rate": 6.287843746962908e-06, "loss": 0.4533, "step": 3842 }, { "epoch": 0.23057538849223017, "grad_norm": 1.2867177724838257, "learning_rate": 6.287432463352921e-06, "loss": 0.4388, "step": 3843 }, { "epoch": 0.23063538729225416, "grad_norm": 1.2564976215362549, "learning_rate": 6.287021074474309e-06, "loss": 0.4166, "step": 3844 }, { "epoch": 0.23069538609227816, "grad_norm": 1.2734253406524658, "learning_rate": 6.286609580342609e-06, "loss": 0.4369, "step": 3845 }, { "epoch": 0.23075538489230216, "grad_norm": 1.2191147804260254, "learning_rate": 6.286197980973362e-06, "loss": 0.4672, "step": 3846 }, { "epoch": 0.23081538369232615, "grad_norm": 1.1818503141403198, "learning_rate": 6.28578627638211e-06, "loss": 0.4197, "step": 3847 }, { "epoch": 0.23087538249235015, "grad_norm": 1.2679641246795654, "learning_rate": 6.285374466584402e-06, "loss": 0.4136, "step": 3848 }, { "epoch": 0.23093538129237415, "grad_norm": 1.1961239576339722, "learning_rate": 6.284962551595791e-06, "loss": 0.4815, "step": 3849 }, { "epoch": 0.23099538009239814, "grad_norm": 1.2237604856491089, "learning_rate": 6.284550531431834e-06, "loss": 0.4568, "step": 3850 }, { "epoch": 0.23105537889242214, "grad_norm": 1.212365984916687, "learning_rate": 6.284138406108088e-06, "loss": 0.4317, "step": 3851 }, { "epoch": 0.23111537769244614, "grad_norm": 1.3149014711380005, "learning_rate": 6.283726175640118e-06, "loss": 0.4719, "step": 3852 }, { "epoch": 0.23117537649247016, "grad_norm": 1.3004502058029175, "learning_rate": 6.283313840043495e-06, "loss": 0.4456, "step": 3853 }, { "epoch": 0.23123537529249416, "grad_norm": 1.4643151760101318, "learning_rate": 6.282901399333789e-06, "loss": 0.5361, "step": 3854 }, { "epoch": 0.23129537409251816, "grad_norm": 1.221537709236145, "learning_rate": 6.2824888535265755e-06, "loss": 0.4438, "step": 3855 }, { "epoch": 0.23135537289254215, "grad_norm": 1.4007015228271484, "learning_rate": 6.282076202637434e-06, "loss": 0.4695, "step": 3856 }, { "epoch": 0.23141537169256615, "grad_norm": 1.4462175369262695, "learning_rate": 6.281663446681951e-06, "loss": 0.523, "step": 3857 }, { "epoch": 0.23147537049259015, "grad_norm": 1.1937768459320068, "learning_rate": 6.281250585675711e-06, "loss": 0.4272, "step": 3858 }, { "epoch": 0.23153536929261415, "grad_norm": 1.2730138301849365, "learning_rate": 6.280837619634309e-06, "loss": 0.4604, "step": 3859 }, { "epoch": 0.23159536809263814, "grad_norm": 1.2749582529067993, "learning_rate": 6.28042454857334e-06, "loss": 0.479, "step": 3860 }, { "epoch": 0.23165536689266214, "grad_norm": 1.2469971179962158, "learning_rate": 6.280011372508403e-06, "loss": 0.4866, "step": 3861 }, { "epoch": 0.23171536569268614, "grad_norm": 1.2749067544937134, "learning_rate": 6.279598091455102e-06, "loss": 0.4708, "step": 3862 }, { "epoch": 0.23177536449271013, "grad_norm": 1.294648289680481, "learning_rate": 6.279184705429045e-06, "loss": 0.468, "step": 3863 }, { "epoch": 0.23183536329273416, "grad_norm": 1.2578500509262085, "learning_rate": 6.278771214445844e-06, "loss": 0.4282, "step": 3864 }, { "epoch": 0.23189536209275816, "grad_norm": 1.3206897974014282, "learning_rate": 6.278357618521114e-06, "loss": 0.527, "step": 3865 }, { "epoch": 0.23195536089278215, "grad_norm": 1.2235618829727173, "learning_rate": 6.277943917670474e-06, "loss": 0.3949, "step": 3866 }, { "epoch": 0.23201535969280615, "grad_norm": 1.2109965085983276, "learning_rate": 6.27753011190955e-06, "loss": 0.4554, "step": 3867 }, { "epoch": 0.23207535849283015, "grad_norm": 1.319857120513916, "learning_rate": 6.277116201253966e-06, "loss": 0.4243, "step": 3868 }, { "epoch": 0.23213535729285414, "grad_norm": 1.1927906274795532, "learning_rate": 6.276702185719357e-06, "loss": 0.4358, "step": 3869 }, { "epoch": 0.23219535609287814, "grad_norm": 1.2293483018875122, "learning_rate": 6.276288065321357e-06, "loss": 0.449, "step": 3870 }, { "epoch": 0.23225535489290214, "grad_norm": 1.2643083333969116, "learning_rate": 6.275873840075606e-06, "loss": 0.48, "step": 3871 }, { "epoch": 0.23231535369292614, "grad_norm": 1.2808780670166016, "learning_rate": 6.275459509997746e-06, "loss": 0.4662, "step": 3872 }, { "epoch": 0.23237535249295013, "grad_norm": 1.2165327072143555, "learning_rate": 6.2750450751034255e-06, "loss": 0.4872, "step": 3873 }, { "epoch": 0.23243535129297413, "grad_norm": 1.0864629745483398, "learning_rate": 6.2746305354082965e-06, "loss": 0.474, "step": 3874 }, { "epoch": 0.23249535009299813, "grad_norm": 1.2453525066375732, "learning_rate": 6.274215890928012e-06, "loss": 0.3979, "step": 3875 }, { "epoch": 0.23255534889302215, "grad_norm": 1.4186033010482788, "learning_rate": 6.2738011416782334e-06, "loss": 0.4636, "step": 3876 }, { "epoch": 0.23261534769304615, "grad_norm": 1.2367078065872192, "learning_rate": 6.2733862876746235e-06, "loss": 0.5142, "step": 3877 }, { "epoch": 0.23267534649307015, "grad_norm": 1.1733012199401855, "learning_rate": 6.272971328932848e-06, "loss": 0.4735, "step": 3878 }, { "epoch": 0.23273534529309414, "grad_norm": 1.2563925981521606, "learning_rate": 6.272556265468579e-06, "loss": 0.4591, "step": 3879 }, { "epoch": 0.23279534409311814, "grad_norm": 1.3225767612457275, "learning_rate": 6.272141097297493e-06, "loss": 0.4307, "step": 3880 }, { "epoch": 0.23285534289314214, "grad_norm": 1.3044111728668213, "learning_rate": 6.271725824435267e-06, "loss": 0.4748, "step": 3881 }, { "epoch": 0.23291534169316613, "grad_norm": 1.2088512182235718, "learning_rate": 6.271310446897584e-06, "loss": 0.4983, "step": 3882 }, { "epoch": 0.23297534049319013, "grad_norm": 1.159775733947754, "learning_rate": 6.270894964700131e-06, "loss": 0.4573, "step": 3883 }, { "epoch": 0.23303533929321413, "grad_norm": 1.3303738832473755, "learning_rate": 6.2704793778586004e-06, "loss": 0.5131, "step": 3884 }, { "epoch": 0.23309533809323812, "grad_norm": 1.269437313079834, "learning_rate": 6.270063686388685e-06, "loss": 0.4803, "step": 3885 }, { "epoch": 0.23315533689326212, "grad_norm": 1.2193098068237305, "learning_rate": 6.269647890306085e-06, "loss": 0.4398, "step": 3886 }, { "epoch": 0.23321533569328615, "grad_norm": 1.2804076671600342, "learning_rate": 6.269231989626503e-06, "loss": 0.4542, "step": 3887 }, { "epoch": 0.23327533449331014, "grad_norm": 1.4807038307189941, "learning_rate": 6.268815984365645e-06, "loss": 0.4521, "step": 3888 }, { "epoch": 0.23333533329333414, "grad_norm": 1.3456083536148071, "learning_rate": 6.268399874539222e-06, "loss": 0.4544, "step": 3889 }, { "epoch": 0.23339533209335814, "grad_norm": 1.3208606243133545, "learning_rate": 6.267983660162949e-06, "loss": 0.4749, "step": 3890 }, { "epoch": 0.23345533089338213, "grad_norm": 1.1205447912216187, "learning_rate": 6.2675673412525445e-06, "loss": 0.475, "step": 3891 }, { "epoch": 0.23351532969340613, "grad_norm": 1.2246818542480469, "learning_rate": 6.267150917823729e-06, "loss": 0.4968, "step": 3892 }, { "epoch": 0.23357532849343013, "grad_norm": 1.2935832738876343, "learning_rate": 6.266734389892231e-06, "loss": 0.4818, "step": 3893 }, { "epoch": 0.23363532729345413, "grad_norm": 1.4702725410461426, "learning_rate": 6.266317757473781e-06, "loss": 0.494, "step": 3894 }, { "epoch": 0.23369532609347812, "grad_norm": 1.356195330619812, "learning_rate": 6.265901020584112e-06, "loss": 0.4806, "step": 3895 }, { "epoch": 0.23375532489350212, "grad_norm": 1.1625124216079712, "learning_rate": 6.265484179238963e-06, "loss": 0.4892, "step": 3896 }, { "epoch": 0.23381532369352612, "grad_norm": 1.200708270072937, "learning_rate": 6.265067233454076e-06, "loss": 0.429, "step": 3897 }, { "epoch": 0.23387532249355014, "grad_norm": 1.2494527101516724, "learning_rate": 6.264650183245197e-06, "loss": 0.4718, "step": 3898 }, { "epoch": 0.23393532129357414, "grad_norm": 1.389925479888916, "learning_rate": 6.264233028628077e-06, "loss": 0.4471, "step": 3899 }, { "epoch": 0.23399532009359814, "grad_norm": 1.2035354375839233, "learning_rate": 6.263815769618469e-06, "loss": 0.4427, "step": 3900 }, { "epoch": 0.23405531889362213, "grad_norm": 1.5170392990112305, "learning_rate": 6.263398406232131e-06, "loss": 0.4933, "step": 3901 }, { "epoch": 0.23411531769364613, "grad_norm": 1.269334316253662, "learning_rate": 6.262980938484826e-06, "loss": 0.4474, "step": 3902 }, { "epoch": 0.23417531649367013, "grad_norm": 1.1700348854064941, "learning_rate": 6.262563366392318e-06, "loss": 0.4078, "step": 3903 }, { "epoch": 0.23423531529369412, "grad_norm": 1.2721339464187622, "learning_rate": 6.262145689970378e-06, "loss": 0.4526, "step": 3904 }, { "epoch": 0.23429531409371812, "grad_norm": 1.2899833917617798, "learning_rate": 6.26172790923478e-06, "loss": 0.5114, "step": 3905 }, { "epoch": 0.23435531289374212, "grad_norm": 1.2601871490478516, "learning_rate": 6.2613100242013e-06, "loss": 0.5352, "step": 3906 }, { "epoch": 0.23441531169376612, "grad_norm": 1.3444758653640747, "learning_rate": 6.260892034885723e-06, "loss": 0.4444, "step": 3907 }, { "epoch": 0.2344753104937901, "grad_norm": 1.324641466140747, "learning_rate": 6.260473941303832e-06, "loss": 0.4734, "step": 3908 }, { "epoch": 0.23453530929381414, "grad_norm": 1.3103948831558228, "learning_rate": 6.260055743471416e-06, "loss": 0.4275, "step": 3909 }, { "epoch": 0.23459530809383813, "grad_norm": 1.3242334127426147, "learning_rate": 6.25963744140427e-06, "loss": 0.4916, "step": 3910 }, { "epoch": 0.23465530689386213, "grad_norm": 1.1551414728164673, "learning_rate": 6.25921903511819e-06, "loss": 0.4005, "step": 3911 }, { "epoch": 0.23471530569388613, "grad_norm": 1.323932409286499, "learning_rate": 6.258800524628979e-06, "loss": 0.4413, "step": 3912 }, { "epoch": 0.23477530449391013, "grad_norm": 1.1697393655776978, "learning_rate": 6.258381909952441e-06, "loss": 0.4432, "step": 3913 }, { "epoch": 0.23483530329393412, "grad_norm": 1.2026491165161133, "learning_rate": 6.257963191104384e-06, "loss": 0.4539, "step": 3914 }, { "epoch": 0.23489530209395812, "grad_norm": 1.270440936088562, "learning_rate": 6.257544368100624e-06, "loss": 0.4944, "step": 3915 }, { "epoch": 0.23495530089398212, "grad_norm": 1.278679609298706, "learning_rate": 6.257125440956977e-06, "loss": 0.4402, "step": 3916 }, { "epoch": 0.2350152996940061, "grad_norm": 1.2910319566726685, "learning_rate": 6.256706409689262e-06, "loss": 0.4644, "step": 3917 }, { "epoch": 0.2350752984940301, "grad_norm": 1.3322151899337769, "learning_rate": 6.256287274313306e-06, "loss": 0.5169, "step": 3918 }, { "epoch": 0.2351352972940541, "grad_norm": 1.2721481323242188, "learning_rate": 6.255868034844938e-06, "loss": 0.4465, "step": 3919 }, { "epoch": 0.23519529609407813, "grad_norm": 1.1985023021697998, "learning_rate": 6.255448691299989e-06, "loss": 0.45, "step": 3920 }, { "epoch": 0.23525529489410213, "grad_norm": 1.2935878038406372, "learning_rate": 6.255029243694298e-06, "loss": 0.4454, "step": 3921 }, { "epoch": 0.23531529369412613, "grad_norm": 1.248757004737854, "learning_rate": 6.254609692043703e-06, "loss": 0.4957, "step": 3922 }, { "epoch": 0.23537529249415012, "grad_norm": 1.2386184930801392, "learning_rate": 6.2541900363640506e-06, "loss": 0.4449, "step": 3923 }, { "epoch": 0.23543529129417412, "grad_norm": 1.352911114692688, "learning_rate": 6.253770276671188e-06, "loss": 0.4966, "step": 3924 }, { "epoch": 0.23549529009419812, "grad_norm": 1.2358508110046387, "learning_rate": 6.2533504129809694e-06, "loss": 0.4476, "step": 3925 }, { "epoch": 0.23555528889422211, "grad_norm": 1.2555983066558838, "learning_rate": 6.252930445309249e-06, "loss": 0.4115, "step": 3926 }, { "epoch": 0.2356152876942461, "grad_norm": 1.3427704572677612, "learning_rate": 6.252510373671889e-06, "loss": 0.4219, "step": 3927 }, { "epoch": 0.2356752864942701, "grad_norm": 1.2334262132644653, "learning_rate": 6.252090198084751e-06, "loss": 0.5107, "step": 3928 }, { "epoch": 0.2357352852942941, "grad_norm": 1.2127774953842163, "learning_rate": 6.251669918563706e-06, "loss": 0.4477, "step": 3929 }, { "epoch": 0.2357952840943181, "grad_norm": 1.3055216073989868, "learning_rate": 6.251249535124623e-06, "loss": 0.4515, "step": 3930 }, { "epoch": 0.2358552828943421, "grad_norm": 1.2371647357940674, "learning_rate": 6.250829047783381e-06, "loss": 0.5368, "step": 3931 }, { "epoch": 0.23591528169436612, "grad_norm": 1.1606234312057495, "learning_rate": 6.250408456555858e-06, "loss": 0.4704, "step": 3932 }, { "epoch": 0.23597528049439012, "grad_norm": 1.2693595886230469, "learning_rate": 6.24998776145794e-06, "loss": 0.4634, "step": 3933 }, { "epoch": 0.23603527929441412, "grad_norm": 1.199256420135498, "learning_rate": 6.2495669625055114e-06, "loss": 0.4282, "step": 3934 }, { "epoch": 0.23609527809443812, "grad_norm": 1.3924527168273926, "learning_rate": 6.2491460597144675e-06, "loss": 0.5077, "step": 3935 }, { "epoch": 0.2361552768944621, "grad_norm": 1.270361304283142, "learning_rate": 6.2487250531007e-06, "loss": 0.4562, "step": 3936 }, { "epoch": 0.2362152756944861, "grad_norm": 1.3811389207839966, "learning_rate": 6.248303942680112e-06, "loss": 0.4673, "step": 3937 }, { "epoch": 0.2362752744945101, "grad_norm": 1.2577329874038696, "learning_rate": 6.247882728468604e-06, "loss": 0.4665, "step": 3938 }, { "epoch": 0.2363352732945341, "grad_norm": 1.2579056024551392, "learning_rate": 6.247461410482085e-06, "loss": 0.4175, "step": 3939 }, { "epoch": 0.2363952720945581, "grad_norm": 1.4297386407852173, "learning_rate": 6.247039988736467e-06, "loss": 0.4652, "step": 3940 }, { "epoch": 0.2364552708945821, "grad_norm": 1.329157829284668, "learning_rate": 6.2466184632476625e-06, "loss": 0.4465, "step": 3941 }, { "epoch": 0.2365152696946061, "grad_norm": 1.1692579984664917, "learning_rate": 6.246196834031593e-06, "loss": 0.4027, "step": 3942 }, { "epoch": 0.23657526849463012, "grad_norm": 1.2134220600128174, "learning_rate": 6.245775101104181e-06, "loss": 0.4165, "step": 3943 }, { "epoch": 0.23663526729465412, "grad_norm": 1.2566026449203491, "learning_rate": 6.245353264481353e-06, "loss": 0.4608, "step": 3944 }, { "epoch": 0.23669526609467811, "grad_norm": 1.3675899505615234, "learning_rate": 6.24493132417904e-06, "loss": 0.492, "step": 3945 }, { "epoch": 0.2367552648947021, "grad_norm": 1.6820236444473267, "learning_rate": 6.244509280213177e-06, "loss": 0.4806, "step": 3946 }, { "epoch": 0.2368152636947261, "grad_norm": 1.1834995746612549, "learning_rate": 6.244087132599701e-06, "loss": 0.4324, "step": 3947 }, { "epoch": 0.2368752624947501, "grad_norm": 1.1776785850524902, "learning_rate": 6.243664881354558e-06, "loss": 0.438, "step": 3948 }, { "epoch": 0.2369352612947741, "grad_norm": 1.142322063446045, "learning_rate": 6.2432425264936916e-06, "loss": 0.432, "step": 3949 }, { "epoch": 0.2369952600947981, "grad_norm": 1.146523118019104, "learning_rate": 6.242820068033053e-06, "loss": 0.3941, "step": 3950 }, { "epoch": 0.2370552588948221, "grad_norm": 1.1325457096099854, "learning_rate": 6.242397505988597e-06, "loss": 0.4051, "step": 3951 }, { "epoch": 0.2371152576948461, "grad_norm": 1.2269309759140015, "learning_rate": 6.241974840376282e-06, "loss": 0.4245, "step": 3952 }, { "epoch": 0.2371752564948701, "grad_norm": 1.2635537385940552, "learning_rate": 6.241552071212068e-06, "loss": 0.4795, "step": 3953 }, { "epoch": 0.23723525529489412, "grad_norm": 1.2665326595306396, "learning_rate": 6.241129198511924e-06, "loss": 0.4727, "step": 3954 }, { "epoch": 0.2372952540949181, "grad_norm": 1.2527128458023071, "learning_rate": 6.240706222291818e-06, "loss": 0.4782, "step": 3955 }, { "epoch": 0.2373552528949421, "grad_norm": 1.198751449584961, "learning_rate": 6.240283142567727e-06, "loss": 0.4186, "step": 3956 }, { "epoch": 0.2374152516949661, "grad_norm": 1.345041275024414, "learning_rate": 6.239859959355624e-06, "loss": 0.4692, "step": 3957 }, { "epoch": 0.2374752504949901, "grad_norm": 1.2825130224227905, "learning_rate": 6.239436672671494e-06, "loss": 0.4736, "step": 3958 }, { "epoch": 0.2375352492950141, "grad_norm": 1.26047682762146, "learning_rate": 6.2390132825313205e-06, "loss": 0.5176, "step": 3959 }, { "epoch": 0.2375952480950381, "grad_norm": 1.3261786699295044, "learning_rate": 6.238589788951096e-06, "loss": 0.4249, "step": 3960 }, { "epoch": 0.2376552468950621, "grad_norm": 1.1312751770019531, "learning_rate": 6.2381661919468114e-06, "loss": 0.4572, "step": 3961 }, { "epoch": 0.2377152456950861, "grad_norm": 1.2099188566207886, "learning_rate": 6.237742491534464e-06, "loss": 0.4699, "step": 3962 }, { "epoch": 0.2377752444951101, "grad_norm": 1.2038257122039795, "learning_rate": 6.237318687730056e-06, "loss": 0.4202, "step": 3963 }, { "epoch": 0.23783524329513409, "grad_norm": 1.2114027738571167, "learning_rate": 6.2368947805495944e-06, "loss": 0.4446, "step": 3964 }, { "epoch": 0.2378952420951581, "grad_norm": 1.2367240190505981, "learning_rate": 6.236470770009084e-06, "loss": 0.4117, "step": 3965 }, { "epoch": 0.2379552408951821, "grad_norm": 1.1791342496871948, "learning_rate": 6.236046656124541e-06, "loss": 0.4525, "step": 3966 }, { "epoch": 0.2380152396952061, "grad_norm": 1.368454933166504, "learning_rate": 6.23562243891198e-06, "loss": 0.4715, "step": 3967 }, { "epoch": 0.2380752384952301, "grad_norm": 1.1408865451812744, "learning_rate": 6.235198118387425e-06, "loss": 0.4287, "step": 3968 }, { "epoch": 0.2381352372952541, "grad_norm": 1.1625581979751587, "learning_rate": 6.234773694566897e-06, "loss": 0.403, "step": 3969 }, { "epoch": 0.2381952360952781, "grad_norm": 1.2307450771331787, "learning_rate": 6.234349167466426e-06, "loss": 0.4595, "step": 3970 }, { "epoch": 0.2382552348953021, "grad_norm": 1.1444168090820312, "learning_rate": 6.2339245371020456e-06, "loss": 0.4046, "step": 3971 }, { "epoch": 0.2383152336953261, "grad_norm": 1.308617115020752, "learning_rate": 6.233499803489791e-06, "loss": 0.4404, "step": 3972 }, { "epoch": 0.2383752324953501, "grad_norm": 1.2473691701889038, "learning_rate": 6.233074966645701e-06, "loss": 0.431, "step": 3973 }, { "epoch": 0.23843523129537408, "grad_norm": 1.345826506614685, "learning_rate": 6.232650026585824e-06, "loss": 0.5064, "step": 3974 }, { "epoch": 0.23849523009539808, "grad_norm": 1.225189447402954, "learning_rate": 6.232224983326204e-06, "loss": 0.436, "step": 3975 }, { "epoch": 0.23855522889542208, "grad_norm": 1.2431349754333496, "learning_rate": 6.231799836882894e-06, "loss": 0.4659, "step": 3976 }, { "epoch": 0.2386152276954461, "grad_norm": 1.310357928276062, "learning_rate": 6.2313745872719505e-06, "loss": 0.4663, "step": 3977 }, { "epoch": 0.2386752264954701, "grad_norm": 1.1906307935714722, "learning_rate": 6.230949234509433e-06, "loss": 0.4116, "step": 3978 }, { "epoch": 0.2387352252954941, "grad_norm": 1.2221652269363403, "learning_rate": 6.230523778611406e-06, "loss": 0.4314, "step": 3979 }, { "epoch": 0.2387952240955181, "grad_norm": 1.3149324655532837, "learning_rate": 6.230098219593935e-06, "loss": 0.4744, "step": 3980 }, { "epoch": 0.2388552228955421, "grad_norm": 1.2294214963912964, "learning_rate": 6.2296725574730934e-06, "loss": 0.4533, "step": 3981 }, { "epoch": 0.2389152216955661, "grad_norm": 1.337965488433838, "learning_rate": 6.229246792264955e-06, "loss": 0.496, "step": 3982 }, { "epoch": 0.23897522049559008, "grad_norm": 1.2441383600234985, "learning_rate": 6.2288209239856e-06, "loss": 0.4606, "step": 3983 }, { "epoch": 0.23903521929561408, "grad_norm": 1.4992339611053467, "learning_rate": 6.228394952651111e-06, "loss": 0.5456, "step": 3984 }, { "epoch": 0.23909521809563808, "grad_norm": 1.1006211042404175, "learning_rate": 6.227968878277575e-06, "loss": 0.4285, "step": 3985 }, { "epoch": 0.23915521689566208, "grad_norm": 1.2355432510375977, "learning_rate": 6.227542700881082e-06, "loss": 0.4473, "step": 3986 }, { "epoch": 0.23921521569568607, "grad_norm": 1.1789345741271973, "learning_rate": 6.22711642047773e-06, "loss": 0.394, "step": 3987 }, { "epoch": 0.2392752144957101, "grad_norm": 1.1899569034576416, "learning_rate": 6.226690037083614e-06, "loss": 0.4357, "step": 3988 }, { "epoch": 0.2393352132957341, "grad_norm": 1.4595941305160522, "learning_rate": 6.226263550714838e-06, "loss": 0.4836, "step": 3989 }, { "epoch": 0.2393952120957581, "grad_norm": 1.367331624031067, "learning_rate": 6.22583696138751e-06, "loss": 0.4789, "step": 3990 }, { "epoch": 0.2394552108957821, "grad_norm": 1.2654290199279785, "learning_rate": 6.225410269117738e-06, "loss": 0.4695, "step": 3991 }, { "epoch": 0.23951520969580609, "grad_norm": 1.2341692447662354, "learning_rate": 6.224983473921637e-06, "loss": 0.4288, "step": 3992 }, { "epoch": 0.23957520849583008, "grad_norm": 1.2527910470962524, "learning_rate": 6.224556575815325e-06, "loss": 0.528, "step": 3993 }, { "epoch": 0.23963520729585408, "grad_norm": 1.1934411525726318, "learning_rate": 6.224129574814925e-06, "loss": 0.4792, "step": 3994 }, { "epoch": 0.23969520609587808, "grad_norm": 1.1687804460525513, "learning_rate": 6.223702470936562e-06, "loss": 0.4743, "step": 3995 }, { "epoch": 0.23975520489590207, "grad_norm": 1.2111365795135498, "learning_rate": 6.223275264196365e-06, "loss": 0.4282, "step": 3996 }, { "epoch": 0.23981520369592607, "grad_norm": 1.241452932357788, "learning_rate": 6.2228479546104686e-06, "loss": 0.4157, "step": 3997 }, { "epoch": 0.23987520249595007, "grad_norm": 1.3047959804534912, "learning_rate": 6.22242054219501e-06, "loss": 0.432, "step": 3998 }, { "epoch": 0.2399352012959741, "grad_norm": 1.362937569618225, "learning_rate": 6.221993026966132e-06, "loss": 0.4795, "step": 3999 }, { "epoch": 0.2399952000959981, "grad_norm": 1.257739543914795, "learning_rate": 6.221565408939978e-06, "loss": 0.4349, "step": 4000 }, { "epoch": 0.2400551988960221, "grad_norm": 1.333024501800537, "learning_rate": 6.221137688132698e-06, "loss": 0.4656, "step": 4001 }, { "epoch": 0.24011519769604608, "grad_norm": 1.238416075706482, "learning_rate": 6.220709864560445e-06, "loss": 0.5034, "step": 4002 }, { "epoch": 0.24017519649607008, "grad_norm": 1.1398074626922607, "learning_rate": 6.220281938239376e-06, "loss": 0.4514, "step": 4003 }, { "epoch": 0.24023519529609408, "grad_norm": 1.305513620376587, "learning_rate": 6.219853909185651e-06, "loss": 0.4524, "step": 4004 }, { "epoch": 0.24029519409611808, "grad_norm": 1.2514002323150635, "learning_rate": 6.2194257774154365e-06, "loss": 0.4563, "step": 4005 }, { "epoch": 0.24035519289614207, "grad_norm": 1.1839241981506348, "learning_rate": 6.218997542944899e-06, "loss": 0.4422, "step": 4006 }, { "epoch": 0.24041519169616607, "grad_norm": 1.144951581954956, "learning_rate": 6.218569205790213e-06, "loss": 0.412, "step": 4007 }, { "epoch": 0.24047519049619007, "grad_norm": 1.219608187675476, "learning_rate": 6.218140765967554e-06, "loss": 0.4807, "step": 4008 }, { "epoch": 0.24053518929621406, "grad_norm": 1.1467417478561401, "learning_rate": 6.217712223493102e-06, "loss": 0.4655, "step": 4009 }, { "epoch": 0.2405951880962381, "grad_norm": 1.434380292892456, "learning_rate": 6.21728357838304e-06, "loss": 0.4943, "step": 4010 }, { "epoch": 0.24065518689626209, "grad_norm": 1.2207040786743164, "learning_rate": 6.216854830653558e-06, "loss": 0.4201, "step": 4011 }, { "epoch": 0.24071518569628608, "grad_norm": 1.4452341794967651, "learning_rate": 6.216425980320848e-06, "loss": 0.4233, "step": 4012 }, { "epoch": 0.24077518449631008, "grad_norm": 1.3710768222808838, "learning_rate": 6.215997027401103e-06, "loss": 0.4846, "step": 4013 }, { "epoch": 0.24083518329633408, "grad_norm": 1.3294414281845093, "learning_rate": 6.215567971910526e-06, "loss": 0.4949, "step": 4014 }, { "epoch": 0.24089518209635807, "grad_norm": 1.4912775754928589, "learning_rate": 6.215138813865317e-06, "loss": 0.4809, "step": 4015 }, { "epoch": 0.24095518089638207, "grad_norm": 1.3616673946380615, "learning_rate": 6.214709553281687e-06, "loss": 0.4724, "step": 4016 }, { "epoch": 0.24101517969640607, "grad_norm": 1.3217477798461914, "learning_rate": 6.2142801901758435e-06, "loss": 0.4932, "step": 4017 }, { "epoch": 0.24107517849643006, "grad_norm": 1.143399715423584, "learning_rate": 6.213850724564005e-06, "loss": 0.4953, "step": 4018 }, { "epoch": 0.24113517729645406, "grad_norm": 1.2103554010391235, "learning_rate": 6.213421156462388e-06, "loss": 0.46, "step": 4019 }, { "epoch": 0.24119517609647806, "grad_norm": 1.3192752599716187, "learning_rate": 6.212991485887216e-06, "loss": 0.514, "step": 4020 }, { "epoch": 0.24125517489650208, "grad_norm": 1.2234822511672974, "learning_rate": 6.212561712854717e-06, "loss": 0.4092, "step": 4021 }, { "epoch": 0.24131517369652608, "grad_norm": 1.2388767004013062, "learning_rate": 6.2121318373811195e-06, "loss": 0.4267, "step": 4022 }, { "epoch": 0.24137517249655008, "grad_norm": 1.1863614320755005, "learning_rate": 6.21170185948266e-06, "loss": 0.4755, "step": 4023 }, { "epoch": 0.24143517129657407, "grad_norm": 1.0594367980957031, "learning_rate": 6.211271779175574e-06, "loss": 0.3955, "step": 4024 }, { "epoch": 0.24149517009659807, "grad_norm": 1.2267423868179321, "learning_rate": 6.2108415964761075e-06, "loss": 0.4709, "step": 4025 }, { "epoch": 0.24155516889662207, "grad_norm": 1.1891857385635376, "learning_rate": 6.210411311400503e-06, "loss": 0.4566, "step": 4026 }, { "epoch": 0.24161516769664607, "grad_norm": 1.3433291912078857, "learning_rate": 6.209980923965013e-06, "loss": 0.4421, "step": 4027 }, { "epoch": 0.24167516649667006, "grad_norm": 1.2392934560775757, "learning_rate": 6.209550434185889e-06, "loss": 0.4757, "step": 4028 }, { "epoch": 0.24173516529669406, "grad_norm": 1.1524938344955444, "learning_rate": 6.209119842079391e-06, "loss": 0.4611, "step": 4029 }, { "epoch": 0.24179516409671806, "grad_norm": 1.3435837030410767, "learning_rate": 6.2086891476617784e-06, "loss": 0.4514, "step": 4030 }, { "epoch": 0.24185516289674205, "grad_norm": 1.2883063554763794, "learning_rate": 6.2082583509493175e-06, "loss": 0.5207, "step": 4031 }, { "epoch": 0.24191516169676605, "grad_norm": 1.2103033065795898, "learning_rate": 6.207827451958277e-06, "loss": 0.5337, "step": 4032 }, { "epoch": 0.24197516049679008, "grad_norm": 1.203825831413269, "learning_rate": 6.207396450704932e-06, "loss": 0.4041, "step": 4033 }, { "epoch": 0.24203515929681407, "grad_norm": 1.1991223096847534, "learning_rate": 6.206965347205556e-06, "loss": 0.4351, "step": 4034 }, { "epoch": 0.24209515809683807, "grad_norm": 1.2523189783096313, "learning_rate": 6.206534141476433e-06, "loss": 0.4466, "step": 4035 }, { "epoch": 0.24215515689686207, "grad_norm": 1.3831206560134888, "learning_rate": 6.206102833533847e-06, "loss": 0.5666, "step": 4036 }, { "epoch": 0.24221515569688606, "grad_norm": 1.3572431802749634, "learning_rate": 6.2056714233940845e-06, "loss": 0.4487, "step": 4037 }, { "epoch": 0.24227515449691006, "grad_norm": 1.3058894872665405, "learning_rate": 6.20523991107344e-06, "loss": 0.4606, "step": 4038 }, { "epoch": 0.24233515329693406, "grad_norm": 1.309530258178711, "learning_rate": 6.204808296588209e-06, "loss": 0.45, "step": 4039 }, { "epoch": 0.24239515209695806, "grad_norm": 1.232407808303833, "learning_rate": 6.204376579954692e-06, "loss": 0.4666, "step": 4040 }, { "epoch": 0.24245515089698205, "grad_norm": 1.3424851894378662, "learning_rate": 6.2039447611891925e-06, "loss": 0.4241, "step": 4041 }, { "epoch": 0.24251514969700605, "grad_norm": 1.4516968727111816, "learning_rate": 6.203512840308018e-06, "loss": 0.4021, "step": 4042 }, { "epoch": 0.24257514849703005, "grad_norm": 1.3618876934051514, "learning_rate": 6.203080817327482e-06, "loss": 0.4245, "step": 4043 }, { "epoch": 0.24263514729705407, "grad_norm": 1.2370139360427856, "learning_rate": 6.202648692263898e-06, "loss": 0.4598, "step": 4044 }, { "epoch": 0.24269514609707807, "grad_norm": 1.2269471883773804, "learning_rate": 6.202216465133585e-06, "loss": 0.4978, "step": 4045 }, { "epoch": 0.24275514489710207, "grad_norm": 1.2402790784835815, "learning_rate": 6.201784135952869e-06, "loss": 0.4723, "step": 4046 }, { "epoch": 0.24281514369712606, "grad_norm": 1.216552972793579, "learning_rate": 6.201351704738075e-06, "loss": 0.5119, "step": 4047 }, { "epoch": 0.24287514249715006, "grad_norm": 1.2763184309005737, "learning_rate": 6.200919171505534e-06, "loss": 0.4618, "step": 4048 }, { "epoch": 0.24293514129717406, "grad_norm": 1.3868576288223267, "learning_rate": 6.200486536271582e-06, "loss": 0.497, "step": 4049 }, { "epoch": 0.24299514009719805, "grad_norm": 1.1418870687484741, "learning_rate": 6.200053799052556e-06, "loss": 0.4566, "step": 4050 }, { "epoch": 0.24305513889722205, "grad_norm": 1.2445532083511353, "learning_rate": 6.1996209598648e-06, "loss": 0.486, "step": 4051 }, { "epoch": 0.24311513769724605, "grad_norm": 1.2314352989196777, "learning_rate": 6.19918801872466e-06, "loss": 0.4545, "step": 4052 }, { "epoch": 0.24317513649727004, "grad_norm": 1.2672369480133057, "learning_rate": 6.198754975648485e-06, "loss": 0.4601, "step": 4053 }, { "epoch": 0.24323513529729404, "grad_norm": 1.3583807945251465, "learning_rate": 6.1983218306526315e-06, "loss": 0.461, "step": 4054 }, { "epoch": 0.24329513409731807, "grad_norm": 1.2236731052398682, "learning_rate": 6.197888583753455e-06, "loss": 0.4245, "step": 4055 }, { "epoch": 0.24335513289734206, "grad_norm": 1.2977415323257446, "learning_rate": 6.197455234967318e-06, "loss": 0.4781, "step": 4056 }, { "epoch": 0.24341513169736606, "grad_norm": 1.2849998474121094, "learning_rate": 6.197021784310588e-06, "loss": 0.4447, "step": 4057 }, { "epoch": 0.24347513049739006, "grad_norm": 1.2206591367721558, "learning_rate": 6.196588231799631e-06, "loss": 0.4324, "step": 4058 }, { "epoch": 0.24353512929741405, "grad_norm": 1.2240740060806274, "learning_rate": 6.196154577450823e-06, "loss": 0.4273, "step": 4059 }, { "epoch": 0.24359512809743805, "grad_norm": 1.2388802766799927, "learning_rate": 6.19572082128054e-06, "loss": 0.4606, "step": 4060 }, { "epoch": 0.24365512689746205, "grad_norm": 1.2277644872665405, "learning_rate": 6.195286963305164e-06, "loss": 0.4396, "step": 4061 }, { "epoch": 0.24371512569748605, "grad_norm": 1.1177183389663696, "learning_rate": 6.1948530035410775e-06, "loss": 0.4443, "step": 4062 }, { "epoch": 0.24377512449751004, "grad_norm": 1.4286013841629028, "learning_rate": 6.194418942004671e-06, "loss": 0.4778, "step": 4063 }, { "epoch": 0.24383512329753404, "grad_norm": 1.2580657005310059, "learning_rate": 6.1939847787123386e-06, "loss": 0.5082, "step": 4064 }, { "epoch": 0.24389512209755804, "grad_norm": 1.2768220901489258, "learning_rate": 6.193550513680474e-06, "loss": 0.4552, "step": 4065 }, { "epoch": 0.24395512089758206, "grad_norm": 1.0908068418502808, "learning_rate": 6.1931161469254775e-06, "loss": 0.4183, "step": 4066 }, { "epoch": 0.24401511969760606, "grad_norm": 1.363520622253418, "learning_rate": 6.192681678463755e-06, "loss": 0.4736, "step": 4067 }, { "epoch": 0.24407511849763006, "grad_norm": 1.3479026556015015, "learning_rate": 6.192247108311712e-06, "loss": 0.4876, "step": 4068 }, { "epoch": 0.24413511729765405, "grad_norm": 1.4475727081298828, "learning_rate": 6.191812436485763e-06, "loss": 0.4325, "step": 4069 }, { "epoch": 0.24419511609767805, "grad_norm": 1.2408992052078247, "learning_rate": 6.1913776630023205e-06, "loss": 0.4738, "step": 4070 }, { "epoch": 0.24425511489770205, "grad_norm": 1.2960047721862793, "learning_rate": 6.190942787877807e-06, "loss": 0.4165, "step": 4071 }, { "epoch": 0.24431511369772604, "grad_norm": 1.2231049537658691, "learning_rate": 6.190507811128645e-06, "loss": 0.4392, "step": 4072 }, { "epoch": 0.24437511249775004, "grad_norm": 1.1110624074935913, "learning_rate": 6.190072732771259e-06, "loss": 0.4802, "step": 4073 }, { "epoch": 0.24443511129777404, "grad_norm": 1.3175793886184692, "learning_rate": 6.189637552822082e-06, "loss": 0.4476, "step": 4074 }, { "epoch": 0.24449511009779804, "grad_norm": 1.1627967357635498, "learning_rate": 6.189202271297549e-06, "loss": 0.3857, "step": 4075 }, { "epoch": 0.24455510889782203, "grad_norm": 1.3369872570037842, "learning_rate": 6.188766888214099e-06, "loss": 0.4954, "step": 4076 }, { "epoch": 0.24461510769784603, "grad_norm": 1.2313408851623535, "learning_rate": 6.188331403588173e-06, "loss": 0.4265, "step": 4077 }, { "epoch": 0.24467510649787005, "grad_norm": 1.2350575923919678, "learning_rate": 6.187895817436219e-06, "loss": 0.4073, "step": 4078 }, { "epoch": 0.24473510529789405, "grad_norm": 1.2329258918762207, "learning_rate": 6.187460129774683e-06, "loss": 0.4884, "step": 4079 }, { "epoch": 0.24479510409791805, "grad_norm": 1.2315888404846191, "learning_rate": 6.187024340620024e-06, "loss": 0.4289, "step": 4080 }, { "epoch": 0.24485510289794205, "grad_norm": 1.3508926630020142, "learning_rate": 6.1865884499886985e-06, "loss": 0.4503, "step": 4081 }, { "epoch": 0.24491510169796604, "grad_norm": 1.1941496133804321, "learning_rate": 6.186152457897165e-06, "loss": 0.4529, "step": 4082 }, { "epoch": 0.24497510049799004, "grad_norm": 1.1641185283660889, "learning_rate": 6.1857163643618935e-06, "loss": 0.4325, "step": 4083 }, { "epoch": 0.24503509929801404, "grad_norm": 1.2397289276123047, "learning_rate": 6.18528016939935e-06, "loss": 0.4935, "step": 4084 }, { "epoch": 0.24509509809803803, "grad_norm": 1.1963427066802979, "learning_rate": 6.184843873026008e-06, "loss": 0.4388, "step": 4085 }, { "epoch": 0.24515509689806203, "grad_norm": 1.15805983543396, "learning_rate": 6.1844074752583465e-06, "loss": 0.4383, "step": 4086 }, { "epoch": 0.24521509569808603, "grad_norm": 1.2814440727233887, "learning_rate": 6.1839709761128444e-06, "loss": 0.4878, "step": 4087 }, { "epoch": 0.24527509449811002, "grad_norm": 1.264719843864441, "learning_rate": 6.183534375605986e-06, "loss": 0.4808, "step": 4088 }, { "epoch": 0.24533509329813405, "grad_norm": 1.1579481363296509, "learning_rate": 6.1830976737542605e-06, "loss": 0.4075, "step": 4089 }, { "epoch": 0.24539509209815805, "grad_norm": 1.201308012008667, "learning_rate": 6.18266087057416e-06, "loss": 0.4355, "step": 4090 }, { "epoch": 0.24545509089818204, "grad_norm": 1.2164400815963745, "learning_rate": 6.1822239660821814e-06, "loss": 0.4606, "step": 4091 }, { "epoch": 0.24551508969820604, "grad_norm": 1.3126952648162842, "learning_rate": 6.1817869602948225e-06, "loss": 0.5399, "step": 4092 }, { "epoch": 0.24557508849823004, "grad_norm": 1.2363511323928833, "learning_rate": 6.181349853228589e-06, "loss": 0.4603, "step": 4093 }, { "epoch": 0.24563508729825403, "grad_norm": 1.2522393465042114, "learning_rate": 6.180912644899988e-06, "loss": 0.4415, "step": 4094 }, { "epoch": 0.24569508609827803, "grad_norm": 1.148925542831421, "learning_rate": 6.18047533532553e-06, "loss": 0.4214, "step": 4095 }, { "epoch": 0.24575508489830203, "grad_norm": 1.343514323234558, "learning_rate": 6.180037924521732e-06, "loss": 0.4343, "step": 4096 }, { "epoch": 0.24581508369832603, "grad_norm": 1.1725056171417236, "learning_rate": 6.17960041250511e-06, "loss": 0.4674, "step": 4097 }, { "epoch": 0.24587508249835002, "grad_norm": 1.1820074319839478, "learning_rate": 6.179162799292189e-06, "loss": 0.4055, "step": 4098 }, { "epoch": 0.24593508129837402, "grad_norm": 1.3576899766921997, "learning_rate": 6.178725084899497e-06, "loss": 0.4446, "step": 4099 }, { "epoch": 0.24599508009839804, "grad_norm": 1.3473671674728394, "learning_rate": 6.178287269343562e-06, "loss": 0.4712, "step": 4100 }, { "epoch": 0.24605507889842204, "grad_norm": 1.1790677309036255, "learning_rate": 6.177849352640919e-06, "loss": 0.446, "step": 4101 }, { "epoch": 0.24611507769844604, "grad_norm": 1.2790579795837402, "learning_rate": 6.177411334808105e-06, "loss": 0.4121, "step": 4102 }, { "epoch": 0.24617507649847004, "grad_norm": 1.381894588470459, "learning_rate": 6.176973215861664e-06, "loss": 0.4768, "step": 4103 }, { "epoch": 0.24623507529849403, "grad_norm": 1.3566895723342896, "learning_rate": 6.1765349958181416e-06, "loss": 0.4842, "step": 4104 }, { "epoch": 0.24629507409851803, "grad_norm": 1.386802315711975, "learning_rate": 6.1760966746940855e-06, "loss": 0.4217, "step": 4105 }, { "epoch": 0.24635507289854203, "grad_norm": 1.2832484245300293, "learning_rate": 6.175658252506051e-06, "loss": 0.4603, "step": 4106 }, { "epoch": 0.24641507169856602, "grad_norm": 1.3075140714645386, "learning_rate": 6.175219729270595e-06, "loss": 0.4463, "step": 4107 }, { "epoch": 0.24647507049859002, "grad_norm": 1.2385737895965576, "learning_rate": 6.174781105004277e-06, "loss": 0.4496, "step": 4108 }, { "epoch": 0.24653506929861402, "grad_norm": 1.2973114252090454, "learning_rate": 6.1743423797236635e-06, "loss": 0.3963, "step": 4109 }, { "epoch": 0.24659506809863802, "grad_norm": 1.4372767210006714, "learning_rate": 6.1739035534453224e-06, "loss": 0.5037, "step": 4110 }, { "epoch": 0.24665506689866204, "grad_norm": 1.1732888221740723, "learning_rate": 6.173464626185828e-06, "loss": 0.4275, "step": 4111 }, { "epoch": 0.24671506569868604, "grad_norm": 1.1722347736358643, "learning_rate": 6.173025597961752e-06, "loss": 0.3805, "step": 4112 }, { "epoch": 0.24677506449871003, "grad_norm": 1.1621899604797363, "learning_rate": 6.172586468789679e-06, "loss": 0.4297, "step": 4113 }, { "epoch": 0.24683506329873403, "grad_norm": 1.2377601861953735, "learning_rate": 6.172147238686191e-06, "loss": 0.403, "step": 4114 }, { "epoch": 0.24689506209875803, "grad_norm": 1.3751962184906006, "learning_rate": 6.171707907667877e-06, "loss": 0.4827, "step": 4115 }, { "epoch": 0.24695506089878203, "grad_norm": 1.3209525346755981, "learning_rate": 6.171268475751327e-06, "loss": 0.4395, "step": 4116 }, { "epoch": 0.24701505969880602, "grad_norm": 1.1651707887649536, "learning_rate": 6.170828942953137e-06, "loss": 0.445, "step": 4117 }, { "epoch": 0.24707505849883002, "grad_norm": 1.3347030878067017, "learning_rate": 6.170389309289905e-06, "loss": 0.4466, "step": 4118 }, { "epoch": 0.24713505729885402, "grad_norm": 1.2391830682754517, "learning_rate": 6.1699495747782366e-06, "loss": 0.3971, "step": 4119 }, { "epoch": 0.247195056098878, "grad_norm": 1.3574447631835938, "learning_rate": 6.169509739434736e-06, "loss": 0.4874, "step": 4120 }, { "epoch": 0.247255054898902, "grad_norm": 1.1665023565292358, "learning_rate": 6.169069803276016e-06, "loss": 0.367, "step": 4121 }, { "epoch": 0.24731505369892604, "grad_norm": 1.3353025913238525, "learning_rate": 6.168629766318688e-06, "loss": 0.4434, "step": 4122 }, { "epoch": 0.24737505249895003, "grad_norm": 1.1680147647857666, "learning_rate": 6.168189628579374e-06, "loss": 0.4429, "step": 4123 }, { "epoch": 0.24743505129897403, "grad_norm": 1.2171510457992554, "learning_rate": 6.167749390074692e-06, "loss": 0.4315, "step": 4124 }, { "epoch": 0.24749505009899803, "grad_norm": 1.3242278099060059, "learning_rate": 6.167309050821271e-06, "loss": 0.4458, "step": 4125 }, { "epoch": 0.24755504889902202, "grad_norm": 1.3517179489135742, "learning_rate": 6.166868610835739e-06, "loss": 0.509, "step": 4126 }, { "epoch": 0.24761504769904602, "grad_norm": 1.4560097455978394, "learning_rate": 6.16642807013473e-06, "loss": 0.4777, "step": 4127 }, { "epoch": 0.24767504649907002, "grad_norm": 1.2439861297607422, "learning_rate": 6.165987428734881e-06, "loss": 0.4825, "step": 4128 }, { "epoch": 0.24773504529909401, "grad_norm": 1.280843734741211, "learning_rate": 6.165546686652833e-06, "loss": 0.4513, "step": 4129 }, { "epoch": 0.247795044099118, "grad_norm": 1.1994459629058838, "learning_rate": 6.165105843905231e-06, "loss": 0.4234, "step": 4130 }, { "epoch": 0.247855042899142, "grad_norm": 1.3618005514144897, "learning_rate": 6.164664900508723e-06, "loss": 0.4712, "step": 4131 }, { "epoch": 0.247915041699166, "grad_norm": 1.3780187368392944, "learning_rate": 6.164223856479961e-06, "loss": 0.4628, "step": 4132 }, { "epoch": 0.24797504049919, "grad_norm": 1.2432448863983154, "learning_rate": 6.163782711835605e-06, "loss": 0.4916, "step": 4133 }, { "epoch": 0.24803503929921403, "grad_norm": 1.125359296798706, "learning_rate": 6.16334146659231e-06, "loss": 0.426, "step": 4134 }, { "epoch": 0.24809503809923802, "grad_norm": 1.3148747682571411, "learning_rate": 6.162900120766742e-06, "loss": 0.4619, "step": 4135 }, { "epoch": 0.24815503689926202, "grad_norm": 1.2488282918930054, "learning_rate": 6.162458674375569e-06, "loss": 0.4189, "step": 4136 }, { "epoch": 0.24821503569928602, "grad_norm": 1.1525217294692993, "learning_rate": 6.162017127435462e-06, "loss": 0.4558, "step": 4137 }, { "epoch": 0.24827503449931002, "grad_norm": 1.2102683782577515, "learning_rate": 6.161575479963096e-06, "loss": 0.4424, "step": 4138 }, { "epoch": 0.248335033299334, "grad_norm": 1.2808136940002441, "learning_rate": 6.161133731975151e-06, "loss": 0.4429, "step": 4139 }, { "epoch": 0.248395032099358, "grad_norm": 1.1922259330749512, "learning_rate": 6.160691883488307e-06, "loss": 0.4145, "step": 4140 }, { "epoch": 0.248455030899382, "grad_norm": 1.2697834968566895, "learning_rate": 6.160249934519255e-06, "loss": 0.4311, "step": 4141 }, { "epoch": 0.248515029699406, "grad_norm": 1.3167027235031128, "learning_rate": 6.159807885084682e-06, "loss": 0.461, "step": 4142 }, { "epoch": 0.24857502849943, "grad_norm": 1.3207032680511475, "learning_rate": 6.159365735201283e-06, "loss": 0.5313, "step": 4143 }, { "epoch": 0.248635027299454, "grad_norm": 1.157066822052002, "learning_rate": 6.158923484885756e-06, "loss": 0.4329, "step": 4144 }, { "epoch": 0.24869502609947802, "grad_norm": 1.418747901916504, "learning_rate": 6.1584811341548045e-06, "loss": 0.4985, "step": 4145 }, { "epoch": 0.24875502489950202, "grad_norm": 1.078922986984253, "learning_rate": 6.158038683025131e-06, "loss": 0.4141, "step": 4146 }, { "epoch": 0.24881502369952602, "grad_norm": 1.343811273574829, "learning_rate": 6.157596131513447e-06, "loss": 0.4582, "step": 4147 }, { "epoch": 0.24887502249955, "grad_norm": 1.503296136856079, "learning_rate": 6.157153479636466e-06, "loss": 0.4451, "step": 4148 }, { "epoch": 0.248935021299574, "grad_norm": 1.2434184551239014, "learning_rate": 6.156710727410902e-06, "loss": 0.43, "step": 4149 }, { "epoch": 0.248995020099598, "grad_norm": 1.376444697380066, "learning_rate": 6.15626787485348e-06, "loss": 0.4199, "step": 4150 }, { "epoch": 0.249055018899622, "grad_norm": 1.2103294134140015, "learning_rate": 6.1558249219809206e-06, "loss": 0.4765, "step": 4151 }, { "epoch": 0.249115017699646, "grad_norm": 1.2399176359176636, "learning_rate": 6.155381868809955e-06, "loss": 0.4627, "step": 4152 }, { "epoch": 0.24917501649967, "grad_norm": 1.2955174446105957, "learning_rate": 6.154938715357313e-06, "loss": 0.4142, "step": 4153 }, { "epoch": 0.249235015299694, "grad_norm": 1.2862772941589355, "learning_rate": 6.154495461639732e-06, "loss": 0.4404, "step": 4154 }, { "epoch": 0.249295014099718, "grad_norm": 1.3392349481582642, "learning_rate": 6.154052107673952e-06, "loss": 0.4607, "step": 4155 }, { "epoch": 0.24935501289974202, "grad_norm": 1.2180299758911133, "learning_rate": 6.153608653476715e-06, "loss": 0.443, "step": 4156 }, { "epoch": 0.24941501169976601, "grad_norm": 1.231119990348816, "learning_rate": 6.15316509906477e-06, "loss": 0.4704, "step": 4157 }, { "epoch": 0.24947501049979, "grad_norm": 1.1869959831237793, "learning_rate": 6.152721444454865e-06, "loss": 0.4434, "step": 4158 }, { "epoch": 0.249535009299814, "grad_norm": 1.330407738685608, "learning_rate": 6.152277689663758e-06, "loss": 0.4411, "step": 4159 }, { "epoch": 0.249595008099838, "grad_norm": 1.3281968832015991, "learning_rate": 6.151833834708207e-06, "loss": 0.4478, "step": 4160 }, { "epoch": 0.249655006899862, "grad_norm": 1.321290135383606, "learning_rate": 6.1513898796049724e-06, "loss": 0.4667, "step": 4161 }, { "epoch": 0.249715005699886, "grad_norm": 1.3372769355773926, "learning_rate": 6.150945824370823e-06, "loss": 0.4816, "step": 4162 }, { "epoch": 0.24977500449991, "grad_norm": 1.1616390943527222, "learning_rate": 6.150501669022527e-06, "loss": 0.4533, "step": 4163 }, { "epoch": 0.249835003299934, "grad_norm": 1.2127200365066528, "learning_rate": 6.1500574135768584e-06, "loss": 0.4224, "step": 4164 }, { "epoch": 0.249895002099958, "grad_norm": 1.25346040725708, "learning_rate": 6.149613058050595e-06, "loss": 0.3972, "step": 4165 }, { "epoch": 0.249955000899982, "grad_norm": 1.255661129951477, "learning_rate": 6.149168602460518e-06, "loss": 0.5209, "step": 4166 }, { "epoch": 0.250014999700006, "grad_norm": 1.3742437362670898, "learning_rate": 6.148724046823412e-06, "loss": 0.4648, "step": 4167 }, { "epoch": 0.25007499850003, "grad_norm": 1.3107562065124512, "learning_rate": 6.148279391156068e-06, "loss": 0.4804, "step": 4168 }, { "epoch": 0.250134997300054, "grad_norm": 1.2235411405563354, "learning_rate": 6.1478346354752755e-06, "loss": 0.4632, "step": 4169 }, { "epoch": 0.250194996100078, "grad_norm": 1.3534711599349976, "learning_rate": 6.147389779797832e-06, "loss": 0.4305, "step": 4170 }, { "epoch": 0.250254994900102, "grad_norm": 1.3575507402420044, "learning_rate": 6.146944824140538e-06, "loss": 0.5262, "step": 4171 }, { "epoch": 0.250314993700126, "grad_norm": 1.2719626426696777, "learning_rate": 6.146499768520199e-06, "loss": 0.4247, "step": 4172 }, { "epoch": 0.25037499250015, "grad_norm": 1.2902421951293945, "learning_rate": 6.146054612953621e-06, "loss": 0.4816, "step": 4173 }, { "epoch": 0.250434991300174, "grad_norm": 1.3830797672271729, "learning_rate": 6.145609357457614e-06, "loss": 0.4512, "step": 4174 }, { "epoch": 0.250494990100198, "grad_norm": 1.300775408744812, "learning_rate": 6.145164002048996e-06, "loss": 0.4412, "step": 4175 }, { "epoch": 0.250554988900222, "grad_norm": 1.268896222114563, "learning_rate": 6.144718546744585e-06, "loss": 0.4703, "step": 4176 }, { "epoch": 0.250614987700246, "grad_norm": 1.2968978881835938, "learning_rate": 6.144272991561203e-06, "loss": 0.5016, "step": 4177 }, { "epoch": 0.25067498650027, "grad_norm": 1.2555416822433472, "learning_rate": 6.143827336515678e-06, "loss": 0.4344, "step": 4178 }, { "epoch": 0.250734985300294, "grad_norm": 1.341938853263855, "learning_rate": 6.1433815816248404e-06, "loss": 0.4069, "step": 4179 }, { "epoch": 0.250794984100318, "grad_norm": 1.2816499471664429, "learning_rate": 6.1429357269055226e-06, "loss": 0.4166, "step": 4180 }, { "epoch": 0.25085498290034197, "grad_norm": 1.4516549110412598, "learning_rate": 6.142489772374564e-06, "loss": 0.4671, "step": 4181 }, { "epoch": 0.25091498170036597, "grad_norm": 1.3777588605880737, "learning_rate": 6.142043718048806e-06, "loss": 0.4082, "step": 4182 }, { "epoch": 0.25097498050038997, "grad_norm": 1.053219199180603, "learning_rate": 6.1415975639450935e-06, "loss": 0.4349, "step": 4183 }, { "epoch": 0.251034979300414, "grad_norm": 1.406553030014038, "learning_rate": 6.141151310080276e-06, "loss": 0.4579, "step": 4184 }, { "epoch": 0.251094978100438, "grad_norm": 1.2643169164657593, "learning_rate": 6.140704956471208e-06, "loss": 0.463, "step": 4185 }, { "epoch": 0.251154976900462, "grad_norm": 1.2707756757736206, "learning_rate": 6.140258503134744e-06, "loss": 0.4425, "step": 4186 }, { "epoch": 0.251214975700486, "grad_norm": 1.3191032409667969, "learning_rate": 6.1398119500877446e-06, "loss": 0.5542, "step": 4187 }, { "epoch": 0.25127497450051, "grad_norm": 1.1553739309310913, "learning_rate": 6.139365297347075e-06, "loss": 0.4424, "step": 4188 }, { "epoch": 0.251334973300534, "grad_norm": 1.1823604106903076, "learning_rate": 6.1389185449296036e-06, "loss": 0.4288, "step": 4189 }, { "epoch": 0.251394972100558, "grad_norm": 1.3797950744628906, "learning_rate": 6.1384716928522015e-06, "loss": 0.4638, "step": 4190 }, { "epoch": 0.251454970900582, "grad_norm": 1.2690109014511108, "learning_rate": 6.138024741131744e-06, "loss": 0.4505, "step": 4191 }, { "epoch": 0.251514969700606, "grad_norm": 1.1951645612716675, "learning_rate": 6.137577689785112e-06, "loss": 0.4351, "step": 4192 }, { "epoch": 0.25157496850063, "grad_norm": 1.2319811582565308, "learning_rate": 6.137130538829186e-06, "loss": 0.463, "step": 4193 }, { "epoch": 0.251634967300654, "grad_norm": 1.2974865436553955, "learning_rate": 6.1366832882808545e-06, "loss": 0.4713, "step": 4194 }, { "epoch": 0.251694966100678, "grad_norm": 1.2927945852279663, "learning_rate": 6.136235938157007e-06, "loss": 0.481, "step": 4195 }, { "epoch": 0.251754964900702, "grad_norm": 1.1383471488952637, "learning_rate": 6.135788488474541e-06, "loss": 0.4645, "step": 4196 }, { "epoch": 0.251814963700726, "grad_norm": 1.3178423643112183, "learning_rate": 6.1353409392503515e-06, "loss": 0.4511, "step": 4197 }, { "epoch": 0.25187496250075, "grad_norm": 1.2665070295333862, "learning_rate": 6.1348932905013405e-06, "loss": 0.5053, "step": 4198 }, { "epoch": 0.251934961300774, "grad_norm": 1.24454927444458, "learning_rate": 6.134445542244416e-06, "loss": 0.4218, "step": 4199 }, { "epoch": 0.25199496010079797, "grad_norm": 1.2363617420196533, "learning_rate": 6.1339976944964845e-06, "loss": 0.4804, "step": 4200 }, { "epoch": 0.25205495890082197, "grad_norm": 1.235734462738037, "learning_rate": 6.1335497472744615e-06, "loss": 0.4189, "step": 4201 }, { "epoch": 0.25211495770084597, "grad_norm": 1.2865893840789795, "learning_rate": 6.133101700595262e-06, "loss": 0.4079, "step": 4202 }, { "epoch": 0.25217495650086996, "grad_norm": 1.4268577098846436, "learning_rate": 6.1326535544758085e-06, "loss": 0.4793, "step": 4203 }, { "epoch": 0.25223495530089396, "grad_norm": 1.1924546957015991, "learning_rate": 6.132205308933025e-06, "loss": 0.4368, "step": 4204 }, { "epoch": 0.25229495410091796, "grad_norm": 1.2823328971862793, "learning_rate": 6.131756963983838e-06, "loss": 0.4306, "step": 4205 }, { "epoch": 0.25235495290094195, "grad_norm": 1.3226135969161987, "learning_rate": 6.131308519645181e-06, "loss": 0.4392, "step": 4206 }, { "epoch": 0.252414951700966, "grad_norm": 1.3761423826217651, "learning_rate": 6.1308599759339896e-06, "loss": 0.4705, "step": 4207 }, { "epoch": 0.25247495050099, "grad_norm": 1.2235236167907715, "learning_rate": 6.130411332867203e-06, "loss": 0.4246, "step": 4208 }, { "epoch": 0.252534949301014, "grad_norm": 1.3137099742889404, "learning_rate": 6.129962590461766e-06, "loss": 0.4702, "step": 4209 }, { "epoch": 0.252594948101038, "grad_norm": 1.1893537044525146, "learning_rate": 6.129513748734622e-06, "loss": 0.4155, "step": 4210 }, { "epoch": 0.252654946901062, "grad_norm": 1.194118618965149, "learning_rate": 6.129064807702724e-06, "loss": 0.4334, "step": 4211 }, { "epoch": 0.252714945701086, "grad_norm": 1.2903285026550293, "learning_rate": 6.128615767383028e-06, "loss": 0.453, "step": 4212 }, { "epoch": 0.25277494450111, "grad_norm": 1.1772369146347046, "learning_rate": 6.1281666277924885e-06, "loss": 0.4598, "step": 4213 }, { "epoch": 0.252834943301134, "grad_norm": 1.3033510446548462, "learning_rate": 6.127717388948069e-06, "loss": 0.4529, "step": 4214 }, { "epoch": 0.252894942101158, "grad_norm": 1.263437032699585, "learning_rate": 6.127268050866737e-06, "loss": 0.436, "step": 4215 }, { "epoch": 0.252954940901182, "grad_norm": 1.1824666261672974, "learning_rate": 6.1268186135654606e-06, "loss": 0.4176, "step": 4216 }, { "epoch": 0.253014939701206, "grad_norm": 1.2924429178237915, "learning_rate": 6.1263690770612115e-06, "loss": 0.4328, "step": 4217 }, { "epoch": 0.25307493850123, "grad_norm": 1.2717738151550293, "learning_rate": 6.12591944137097e-06, "loss": 0.4807, "step": 4218 }, { "epoch": 0.25313493730125397, "grad_norm": 1.2922724485397339, "learning_rate": 6.125469706511713e-06, "loss": 0.4522, "step": 4219 }, { "epoch": 0.25319493610127797, "grad_norm": 1.3624857664108276, "learning_rate": 6.125019872500428e-06, "loss": 0.4959, "step": 4220 }, { "epoch": 0.25325493490130196, "grad_norm": 1.1931120157241821, "learning_rate": 6.124569939354101e-06, "loss": 0.4661, "step": 4221 }, { "epoch": 0.25331493370132596, "grad_norm": 1.2591134309768677, "learning_rate": 6.124119907089725e-06, "loss": 0.4547, "step": 4222 }, { "epoch": 0.25337493250134996, "grad_norm": 1.4259121417999268, "learning_rate": 6.123669775724297e-06, "loss": 0.4855, "step": 4223 }, { "epoch": 0.25343493130137396, "grad_norm": 1.3993555307388306, "learning_rate": 6.123219545274814e-06, "loss": 0.3962, "step": 4224 }, { "epoch": 0.25349493010139795, "grad_norm": 1.231750249862671, "learning_rate": 6.12276921575828e-06, "loss": 0.4162, "step": 4225 }, { "epoch": 0.25355492890142195, "grad_norm": 1.285537600517273, "learning_rate": 6.1223187871917025e-06, "loss": 0.4361, "step": 4226 }, { "epoch": 0.25361492770144595, "grad_norm": 1.3860474824905396, "learning_rate": 6.121868259592091e-06, "loss": 0.4971, "step": 4227 }, { "epoch": 0.25367492650146994, "grad_norm": 1.33649480342865, "learning_rate": 6.121417632976461e-06, "loss": 0.4897, "step": 4228 }, { "epoch": 0.253734925301494, "grad_norm": 1.386539340019226, "learning_rate": 6.1209669073618294e-06, "loss": 0.4771, "step": 4229 }, { "epoch": 0.253794924101518, "grad_norm": 1.198505163192749, "learning_rate": 6.12051608276522e-06, "loss": 0.3917, "step": 4230 }, { "epoch": 0.253854922901542, "grad_norm": 1.3675642013549805, "learning_rate": 6.120065159203656e-06, "loss": 0.4932, "step": 4231 }, { "epoch": 0.253914921701566, "grad_norm": 1.3441197872161865, "learning_rate": 6.119614136694169e-06, "loss": 0.5096, "step": 4232 }, { "epoch": 0.25397492050159, "grad_norm": 1.2783207893371582, "learning_rate": 6.119163015253791e-06, "loss": 0.462, "step": 4233 }, { "epoch": 0.254034919301614, "grad_norm": 1.3607178926467896, "learning_rate": 6.118711794899558e-06, "loss": 0.4485, "step": 4234 }, { "epoch": 0.254094918101638, "grad_norm": 1.167261004447937, "learning_rate": 6.118260475648512e-06, "loss": 0.4203, "step": 4235 }, { "epoch": 0.254154916901662, "grad_norm": 1.205588936805725, "learning_rate": 6.117809057517696e-06, "loss": 0.4185, "step": 4236 }, { "epoch": 0.254214915701686, "grad_norm": 1.1436936855316162, "learning_rate": 6.117357540524158e-06, "loss": 0.4319, "step": 4237 }, { "epoch": 0.25427491450170997, "grad_norm": 1.366320252418518, "learning_rate": 6.116905924684952e-06, "loss": 0.5129, "step": 4238 }, { "epoch": 0.25433491330173397, "grad_norm": 1.2953282594680786, "learning_rate": 6.116454210017131e-06, "loss": 0.4083, "step": 4239 }, { "epoch": 0.25439491210175796, "grad_norm": 1.2620718479156494, "learning_rate": 6.116002396537755e-06, "loss": 0.448, "step": 4240 }, { "epoch": 0.25445491090178196, "grad_norm": 1.1897146701812744, "learning_rate": 6.115550484263887e-06, "loss": 0.4488, "step": 4241 }, { "epoch": 0.25451490970180596, "grad_norm": 1.309072732925415, "learning_rate": 6.115098473212593e-06, "loss": 0.4478, "step": 4242 }, { "epoch": 0.25457490850182996, "grad_norm": 1.251484751701355, "learning_rate": 6.114646363400945e-06, "loss": 0.4729, "step": 4243 }, { "epoch": 0.25463490730185395, "grad_norm": 1.255150556564331, "learning_rate": 6.114194154846016e-06, "loss": 0.4852, "step": 4244 }, { "epoch": 0.25469490610187795, "grad_norm": 1.2259823083877563, "learning_rate": 6.113741847564883e-06, "loss": 0.4186, "step": 4245 }, { "epoch": 0.25475490490190195, "grad_norm": 1.473941683769226, "learning_rate": 6.113289441574629e-06, "loss": 0.447, "step": 4246 }, { "epoch": 0.25481490370192594, "grad_norm": 1.1416407823562622, "learning_rate": 6.11283693689234e-06, "loss": 0.4365, "step": 4247 }, { "epoch": 0.25487490250194994, "grad_norm": 1.394693374633789, "learning_rate": 6.112384333535104e-06, "loss": 0.4541, "step": 4248 }, { "epoch": 0.25493490130197394, "grad_norm": 1.2826402187347412, "learning_rate": 6.111931631520013e-06, "loss": 0.4502, "step": 4249 }, { "epoch": 0.25499490010199793, "grad_norm": 1.3255316019058228, "learning_rate": 6.1114788308641645e-06, "loss": 0.464, "step": 4250 }, { "epoch": 0.255054898902022, "grad_norm": 1.3416807651519775, "learning_rate": 6.1110259315846575e-06, "loss": 0.4965, "step": 4251 }, { "epoch": 0.255114897702046, "grad_norm": 1.3076525926589966, "learning_rate": 6.110572933698598e-06, "loss": 0.4259, "step": 4252 }, { "epoch": 0.25517489650207, "grad_norm": 1.3220901489257812, "learning_rate": 6.110119837223092e-06, "loss": 0.4577, "step": 4253 }, { "epoch": 0.255234895302094, "grad_norm": 1.2925200462341309, "learning_rate": 6.109666642175251e-06, "loss": 0.4402, "step": 4254 }, { "epoch": 0.255294894102118, "grad_norm": 1.3254770040512085, "learning_rate": 6.1092133485721915e-06, "loss": 0.4561, "step": 4255 }, { "epoch": 0.25535489290214197, "grad_norm": 1.1932326555252075, "learning_rate": 6.108759956431031e-06, "loss": 0.4132, "step": 4256 }, { "epoch": 0.25541489170216597, "grad_norm": 1.2694616317749023, "learning_rate": 6.108306465768892e-06, "loss": 0.468, "step": 4257 }, { "epoch": 0.25547489050218997, "grad_norm": 1.2747538089752197, "learning_rate": 6.107852876602901e-06, "loss": 0.4856, "step": 4258 }, { "epoch": 0.25553488930221396, "grad_norm": 1.2233868837356567, "learning_rate": 6.10739918895019e-06, "loss": 0.3978, "step": 4259 }, { "epoch": 0.25559488810223796, "grad_norm": 1.3506218194961548, "learning_rate": 6.106945402827889e-06, "loss": 0.4533, "step": 4260 }, { "epoch": 0.25565488690226196, "grad_norm": 1.2617257833480835, "learning_rate": 6.106491518253137e-06, "loss": 0.4452, "step": 4261 }, { "epoch": 0.25571488570228595, "grad_norm": 1.3324036598205566, "learning_rate": 6.106037535243077e-06, "loss": 0.4517, "step": 4262 }, { "epoch": 0.25577488450230995, "grad_norm": 1.2720017433166504, "learning_rate": 6.105583453814852e-06, "loss": 0.4313, "step": 4263 }, { "epoch": 0.25583488330233395, "grad_norm": 1.4663856029510498, "learning_rate": 6.105129273985611e-06, "loss": 0.4478, "step": 4264 }, { "epoch": 0.25589488210235795, "grad_norm": 1.2719571590423584, "learning_rate": 6.104674995772506e-06, "loss": 0.4098, "step": 4265 }, { "epoch": 0.25595488090238194, "grad_norm": 1.2154842615127563, "learning_rate": 6.104220619192693e-06, "loss": 0.4542, "step": 4266 }, { "epoch": 0.25601487970240594, "grad_norm": 1.3341386318206787, "learning_rate": 6.103766144263332e-06, "loss": 0.5017, "step": 4267 }, { "epoch": 0.25607487850242994, "grad_norm": 1.434821605682373, "learning_rate": 6.1033115710015876e-06, "loss": 0.4388, "step": 4268 }, { "epoch": 0.25613487730245393, "grad_norm": 1.2128164768218994, "learning_rate": 6.1028568994246245e-06, "loss": 0.4364, "step": 4269 }, { "epoch": 0.25619487610247793, "grad_norm": 1.2635304927825928, "learning_rate": 6.102402129549616e-06, "loss": 0.415, "step": 4270 }, { "epoch": 0.25625487490250193, "grad_norm": 1.2431386709213257, "learning_rate": 6.101947261393735e-06, "loss": 0.4382, "step": 4271 }, { "epoch": 0.2563148737025259, "grad_norm": 1.274288296699524, "learning_rate": 6.101492294974159e-06, "loss": 0.3911, "step": 4272 }, { "epoch": 0.2563748725025499, "grad_norm": 1.3090285062789917, "learning_rate": 6.101037230308074e-06, "loss": 0.4676, "step": 4273 }, { "epoch": 0.256434871302574, "grad_norm": 1.31680428981781, "learning_rate": 6.100582067412662e-06, "loss": 0.4885, "step": 4274 }, { "epoch": 0.25649487010259797, "grad_norm": 1.2943751811981201, "learning_rate": 6.100126806305113e-06, "loss": 0.436, "step": 4275 }, { "epoch": 0.25655486890262197, "grad_norm": 1.3347463607788086, "learning_rate": 6.099671447002621e-06, "loss": 0.4496, "step": 4276 }, { "epoch": 0.25661486770264597, "grad_norm": 1.1777232885360718, "learning_rate": 6.0992159895223824e-06, "loss": 0.4142, "step": 4277 }, { "epoch": 0.25667486650266996, "grad_norm": 1.3435285091400146, "learning_rate": 6.0987604338815986e-06, "loss": 0.4498, "step": 4278 }, { "epoch": 0.25673486530269396, "grad_norm": 1.3823277950286865, "learning_rate": 6.098304780097474e-06, "loss": 0.4983, "step": 4279 }, { "epoch": 0.25679486410271796, "grad_norm": 1.3314791917800903, "learning_rate": 6.097849028187213e-06, "loss": 0.4944, "step": 4280 }, { "epoch": 0.25685486290274195, "grad_norm": 1.3278083801269531, "learning_rate": 6.097393178168031e-06, "loss": 0.5017, "step": 4281 }, { "epoch": 0.25691486170276595, "grad_norm": 1.203558325767517, "learning_rate": 6.096937230057142e-06, "loss": 0.4722, "step": 4282 }, { "epoch": 0.25697486050278995, "grad_norm": 1.2160340547561646, "learning_rate": 6.096481183871767e-06, "loss": 0.4494, "step": 4283 }, { "epoch": 0.25703485930281395, "grad_norm": 1.5047687292099, "learning_rate": 6.0960250396291266e-06, "loss": 0.4524, "step": 4284 }, { "epoch": 0.25709485810283794, "grad_norm": 1.29521644115448, "learning_rate": 6.095568797346448e-06, "loss": 0.462, "step": 4285 }, { "epoch": 0.25715485690286194, "grad_norm": 1.294965386390686, "learning_rate": 6.09511245704096e-06, "loss": 0.483, "step": 4286 }, { "epoch": 0.25721485570288594, "grad_norm": 1.329624056816101, "learning_rate": 6.094656018729899e-06, "loss": 0.4909, "step": 4287 }, { "epoch": 0.25727485450290993, "grad_norm": 1.3755632638931274, "learning_rate": 6.094199482430502e-06, "loss": 0.4383, "step": 4288 }, { "epoch": 0.25733485330293393, "grad_norm": 1.142562985420227, "learning_rate": 6.093742848160009e-06, "loss": 0.4443, "step": 4289 }, { "epoch": 0.2573948521029579, "grad_norm": 1.3742852210998535, "learning_rate": 6.093286115935664e-06, "loss": 0.44, "step": 4290 }, { "epoch": 0.2574548509029819, "grad_norm": 1.2210150957107544, "learning_rate": 6.09282928577472e-06, "loss": 0.4728, "step": 4291 }, { "epoch": 0.2575148497030059, "grad_norm": 1.2947267293930054, "learning_rate": 6.092372357694425e-06, "loss": 0.4499, "step": 4292 }, { "epoch": 0.2575748485030299, "grad_norm": 1.1817516088485718, "learning_rate": 6.091915331712038e-06, "loss": 0.4474, "step": 4293 }, { "epoch": 0.2576348473030539, "grad_norm": 1.3521230220794678, "learning_rate": 6.091458207844817e-06, "loss": 0.4909, "step": 4294 }, { "epoch": 0.2576948461030779, "grad_norm": 1.227303385734558, "learning_rate": 6.091000986110026e-06, "loss": 0.4463, "step": 4295 }, { "epoch": 0.25775484490310197, "grad_norm": 1.3122999668121338, "learning_rate": 6.090543666524933e-06, "loss": 0.4575, "step": 4296 }, { "epoch": 0.25781484370312596, "grad_norm": 1.255448341369629, "learning_rate": 6.090086249106807e-06, "loss": 0.4488, "step": 4297 }, { "epoch": 0.25787484250314996, "grad_norm": 1.203818678855896, "learning_rate": 6.089628733872925e-06, "loss": 0.4465, "step": 4298 }, { "epoch": 0.25793484130317396, "grad_norm": 1.1823663711547852, "learning_rate": 6.0891711208405645e-06, "loss": 0.5439, "step": 4299 }, { "epoch": 0.25799484010319795, "grad_norm": 1.1785911321640015, "learning_rate": 6.088713410027006e-06, "loss": 0.4564, "step": 4300 }, { "epoch": 0.25805483890322195, "grad_norm": 1.363882303237915, "learning_rate": 6.0882556014495365e-06, "loss": 0.4085, "step": 4301 }, { "epoch": 0.25811483770324595, "grad_norm": 1.1626094579696655, "learning_rate": 6.087797695125445e-06, "loss": 0.4005, "step": 4302 }, { "epoch": 0.25817483650326994, "grad_norm": 1.3641297817230225, "learning_rate": 6.087339691072024e-06, "loss": 0.5008, "step": 4303 }, { "epoch": 0.25823483530329394, "grad_norm": 1.2461316585540771, "learning_rate": 6.086881589306571e-06, "loss": 0.4577, "step": 4304 }, { "epoch": 0.25829483410331794, "grad_norm": 1.2595891952514648, "learning_rate": 6.0864233898463866e-06, "loss": 0.4673, "step": 4305 }, { "epoch": 0.25835483290334194, "grad_norm": 1.2438429594039917, "learning_rate": 6.085965092708775e-06, "loss": 0.4724, "step": 4306 }, { "epoch": 0.25841483170336593, "grad_norm": 1.1358891725540161, "learning_rate": 6.085506697911042e-06, "loss": 0.4182, "step": 4307 }, { "epoch": 0.25847483050338993, "grad_norm": 1.1799856424331665, "learning_rate": 6.085048205470501e-06, "loss": 0.441, "step": 4308 }, { "epoch": 0.2585348293034139, "grad_norm": 1.3184679746627808, "learning_rate": 6.084589615404468e-06, "loss": 0.4791, "step": 4309 }, { "epoch": 0.2585948281034379, "grad_norm": 1.2226510047912598, "learning_rate": 6.084130927730258e-06, "loss": 0.4439, "step": 4310 }, { "epoch": 0.2586548269034619, "grad_norm": 1.3132294416427612, "learning_rate": 6.083672142465198e-06, "loss": 0.4728, "step": 4311 }, { "epoch": 0.2587148257034859, "grad_norm": 1.3134737014770508, "learning_rate": 6.0832132596266125e-06, "loss": 0.495, "step": 4312 }, { "epoch": 0.2587748245035099, "grad_norm": 1.2419322729110718, "learning_rate": 6.0827542792318315e-06, "loss": 0.5087, "step": 4313 }, { "epoch": 0.2588348233035339, "grad_norm": 1.2023398876190186, "learning_rate": 6.082295201298187e-06, "loss": 0.4128, "step": 4314 }, { "epoch": 0.2588948221035579, "grad_norm": 1.2333234548568726, "learning_rate": 6.0818360258430185e-06, "loss": 0.4356, "step": 4315 }, { "epoch": 0.2589548209035819, "grad_norm": 1.2106724977493286, "learning_rate": 6.081376752883666e-06, "loss": 0.4498, "step": 4316 }, { "epoch": 0.2590148197036059, "grad_norm": 1.143176794052124, "learning_rate": 6.080917382437473e-06, "loss": 0.4535, "step": 4317 }, { "epoch": 0.2590748185036299, "grad_norm": 1.1856412887573242, "learning_rate": 6.080457914521791e-06, "loss": 0.4628, "step": 4318 }, { "epoch": 0.25913481730365395, "grad_norm": 1.2734044790267944, "learning_rate": 6.0799983491539695e-06, "loss": 0.4261, "step": 4319 }, { "epoch": 0.25919481610367795, "grad_norm": 1.2396503686904907, "learning_rate": 6.0795386863513644e-06, "loss": 0.4366, "step": 4320 }, { "epoch": 0.25925481490370195, "grad_norm": 1.277608871459961, "learning_rate": 6.079078926131337e-06, "loss": 0.4925, "step": 4321 }, { "epoch": 0.25931481370372594, "grad_norm": 1.1987664699554443, "learning_rate": 6.078619068511247e-06, "loss": 0.4335, "step": 4322 }, { "epoch": 0.25937481250374994, "grad_norm": 1.3094078302383423, "learning_rate": 6.078159113508464e-06, "loss": 0.4974, "step": 4323 }, { "epoch": 0.25943481130377394, "grad_norm": 1.2375965118408203, "learning_rate": 6.077699061140356e-06, "loss": 0.4723, "step": 4324 }, { "epoch": 0.25949481010379793, "grad_norm": 1.3254797458648682, "learning_rate": 6.077238911424299e-06, "loss": 0.509, "step": 4325 }, { "epoch": 0.25955480890382193, "grad_norm": 1.280867576599121, "learning_rate": 6.076778664377671e-06, "loss": 0.4019, "step": 4326 }, { "epoch": 0.25961480770384593, "grad_norm": 1.3062520027160645, "learning_rate": 6.076318320017852e-06, "loss": 0.4257, "step": 4327 }, { "epoch": 0.2596748065038699, "grad_norm": 1.3356413841247559, "learning_rate": 6.075857878362228e-06, "loss": 0.5236, "step": 4328 }, { "epoch": 0.2597348053038939, "grad_norm": 1.2446866035461426, "learning_rate": 6.075397339428188e-06, "loss": 0.4055, "step": 4329 }, { "epoch": 0.2597948041039179, "grad_norm": 1.340210199356079, "learning_rate": 6.074936703233124e-06, "loss": 0.5066, "step": 4330 }, { "epoch": 0.2598548029039419, "grad_norm": 1.2355153560638428, "learning_rate": 6.074475969794431e-06, "loss": 0.4589, "step": 4331 }, { "epoch": 0.2599148017039659, "grad_norm": 1.2083663940429688, "learning_rate": 6.07401513912951e-06, "loss": 0.4824, "step": 4332 }, { "epoch": 0.2599748005039899, "grad_norm": 1.235817551612854, "learning_rate": 6.073554211255765e-06, "loss": 0.4664, "step": 4333 }, { "epoch": 0.2600347993040139, "grad_norm": 1.1523741483688354, "learning_rate": 6.073093186190602e-06, "loss": 0.4253, "step": 4334 }, { "epoch": 0.2600947981040379, "grad_norm": 1.429412603378296, "learning_rate": 6.072632063951432e-06, "loss": 0.4634, "step": 4335 }, { "epoch": 0.2601547969040619, "grad_norm": 1.4360066652297974, "learning_rate": 6.07217084455567e-06, "loss": 0.4431, "step": 4336 }, { "epoch": 0.2602147957040859, "grad_norm": 1.2020281553268433, "learning_rate": 6.071709528020735e-06, "loss": 0.4531, "step": 4337 }, { "epoch": 0.2602747945041099, "grad_norm": 1.2485229969024658, "learning_rate": 6.071248114364047e-06, "loss": 0.4728, "step": 4338 }, { "epoch": 0.2603347933041339, "grad_norm": 1.2915157079696655, "learning_rate": 6.0707866036030334e-06, "loss": 0.4332, "step": 4339 }, { "epoch": 0.2603947921041579, "grad_norm": 1.2474726438522339, "learning_rate": 6.070324995755121e-06, "loss": 0.4254, "step": 4340 }, { "epoch": 0.26045479090418194, "grad_norm": 1.3750859498977661, "learning_rate": 6.069863290837744e-06, "loss": 0.4741, "step": 4341 }, { "epoch": 0.26051478970420594, "grad_norm": 1.2806379795074463, "learning_rate": 6.069401488868339e-06, "loss": 0.4273, "step": 4342 }, { "epoch": 0.26057478850422994, "grad_norm": 1.4726911783218384, "learning_rate": 6.068939589864346e-06, "loss": 0.5052, "step": 4343 }, { "epoch": 0.26063478730425393, "grad_norm": 1.2313520908355713, "learning_rate": 6.0684775938432085e-06, "loss": 0.4383, "step": 4344 }, { "epoch": 0.26069478610427793, "grad_norm": 1.3223369121551514, "learning_rate": 6.068015500822375e-06, "loss": 0.4899, "step": 4345 }, { "epoch": 0.26075478490430193, "grad_norm": 1.1884760856628418, "learning_rate": 6.0675533108192956e-06, "loss": 0.4162, "step": 4346 }, { "epoch": 0.2608147837043259, "grad_norm": 1.1492711305618286, "learning_rate": 6.067091023851426e-06, "loss": 0.4246, "step": 4347 }, { "epoch": 0.2608747825043499, "grad_norm": 1.4744971990585327, "learning_rate": 6.066628639936222e-06, "loss": 0.4698, "step": 4348 }, { "epoch": 0.2609347813043739, "grad_norm": 1.2684807777404785, "learning_rate": 6.0661661590911494e-06, "loss": 0.4245, "step": 4349 }, { "epoch": 0.2609947801043979, "grad_norm": 1.1166448593139648, "learning_rate": 6.065703581333672e-06, "loss": 0.4036, "step": 4350 }, { "epoch": 0.2610547789044219, "grad_norm": 1.2630586624145508, "learning_rate": 6.06524090668126e-06, "loss": 0.4609, "step": 4351 }, { "epoch": 0.2611147777044459, "grad_norm": 1.1133967638015747, "learning_rate": 6.064778135151385e-06, "loss": 0.4306, "step": 4352 }, { "epoch": 0.2611747765044699, "grad_norm": 1.301109790802002, "learning_rate": 6.0643152667615265e-06, "loss": 0.5401, "step": 4353 }, { "epoch": 0.2612347753044939, "grad_norm": 1.0891034603118896, "learning_rate": 6.063852301529162e-06, "loss": 0.4548, "step": 4354 }, { "epoch": 0.2612947741045179, "grad_norm": 1.239608883857727, "learning_rate": 6.063389239471778e-06, "loss": 0.4507, "step": 4355 }, { "epoch": 0.2613547729045419, "grad_norm": 1.2396851778030396, "learning_rate": 6.062926080606861e-06, "loss": 0.4819, "step": 4356 }, { "epoch": 0.2614147717045659, "grad_norm": 1.1192493438720703, "learning_rate": 6.062462824951902e-06, "loss": 0.4155, "step": 4357 }, { "epoch": 0.2614747705045899, "grad_norm": 1.2740353345870972, "learning_rate": 6.061999472524396e-06, "loss": 0.4571, "step": 4358 }, { "epoch": 0.2615347693046139, "grad_norm": 1.2293773889541626, "learning_rate": 6.061536023341844e-06, "loss": 0.4136, "step": 4359 }, { "epoch": 0.2615947681046379, "grad_norm": 1.1068639755249023, "learning_rate": 6.0610724774217465e-06, "loss": 0.4362, "step": 4360 }, { "epoch": 0.2616547669046619, "grad_norm": 1.300658106803894, "learning_rate": 6.060608834781609e-06, "loss": 0.4561, "step": 4361 }, { "epoch": 0.2617147657046859, "grad_norm": 1.2465338706970215, "learning_rate": 6.060145095438942e-06, "loss": 0.4215, "step": 4362 }, { "epoch": 0.2617747645047099, "grad_norm": 1.1576014757156372, "learning_rate": 6.0596812594112596e-06, "loss": 0.4145, "step": 4363 }, { "epoch": 0.26183476330473393, "grad_norm": 1.2332329750061035, "learning_rate": 6.059217326716077e-06, "loss": 0.4306, "step": 4364 }, { "epoch": 0.2618947621047579, "grad_norm": 1.2275636196136475, "learning_rate": 6.058753297370916e-06, "loss": 0.436, "step": 4365 }, { "epoch": 0.2619547609047819, "grad_norm": 1.183271884918213, "learning_rate": 6.058289171393301e-06, "loss": 0.4355, "step": 4366 }, { "epoch": 0.2620147597048059, "grad_norm": 1.2494367361068726, "learning_rate": 6.05782494880076e-06, "loss": 0.4348, "step": 4367 }, { "epoch": 0.2620747585048299, "grad_norm": 1.3794963359832764, "learning_rate": 6.057360629610823e-06, "loss": 0.4624, "step": 4368 }, { "epoch": 0.2621347573048539, "grad_norm": 1.3388944864273071, "learning_rate": 6.056896213841026e-06, "loss": 0.4502, "step": 4369 }, { "epoch": 0.2621947561048779, "grad_norm": 1.2933743000030518, "learning_rate": 6.056431701508908e-06, "loss": 0.4242, "step": 4370 }, { "epoch": 0.2622547549049019, "grad_norm": 1.2404751777648926, "learning_rate": 6.0559670926320125e-06, "loss": 0.5161, "step": 4371 }, { "epoch": 0.2623147537049259, "grad_norm": 1.4533134698867798, "learning_rate": 6.055502387227885e-06, "loss": 0.454, "step": 4372 }, { "epoch": 0.2623747525049499, "grad_norm": 1.2184184789657593, "learning_rate": 6.0550375853140755e-06, "loss": 0.4808, "step": 4373 }, { "epoch": 0.2624347513049739, "grad_norm": 1.302679181098938, "learning_rate": 6.054572686908137e-06, "loss": 0.4214, "step": 4374 }, { "epoch": 0.2624947501049979, "grad_norm": 1.2049083709716797, "learning_rate": 6.0541076920276265e-06, "loss": 0.4856, "step": 4375 }, { "epoch": 0.2625547489050219, "grad_norm": 1.3269094228744507, "learning_rate": 6.053642600690104e-06, "loss": 0.4712, "step": 4376 }, { "epoch": 0.2626147477050459, "grad_norm": 1.0459048748016357, "learning_rate": 6.053177412913137e-06, "loss": 0.4391, "step": 4377 }, { "epoch": 0.2626747465050699, "grad_norm": 1.2158300876617432, "learning_rate": 6.05271212871429e-06, "loss": 0.4397, "step": 4378 }, { "epoch": 0.2627347453050939, "grad_norm": 1.191422462463379, "learning_rate": 6.052246748111136e-06, "loss": 0.4507, "step": 4379 }, { "epoch": 0.2627947441051179, "grad_norm": 1.220670223236084, "learning_rate": 6.051781271121251e-06, "loss": 0.4694, "step": 4380 }, { "epoch": 0.2628547429051419, "grad_norm": 1.3434568643569946, "learning_rate": 6.0513156977622116e-06, "loss": 0.4945, "step": 4381 }, { "epoch": 0.2629147417051659, "grad_norm": 1.4498683214187622, "learning_rate": 6.050850028051603e-06, "loss": 0.4411, "step": 4382 }, { "epoch": 0.2629747405051899, "grad_norm": 1.2629046440124512, "learning_rate": 6.050384262007011e-06, "loss": 0.4823, "step": 4383 }, { "epoch": 0.26303473930521387, "grad_norm": 1.2994128465652466, "learning_rate": 6.049918399646024e-06, "loss": 0.4104, "step": 4384 }, { "epoch": 0.26309473810523787, "grad_norm": 1.3475321531295776, "learning_rate": 6.0494524409862374e-06, "loss": 0.4633, "step": 4385 }, { "epoch": 0.2631547369052619, "grad_norm": 1.3587027788162231, "learning_rate": 6.048986386045247e-06, "loss": 0.4435, "step": 4386 }, { "epoch": 0.2632147357052859, "grad_norm": 1.2672854661941528, "learning_rate": 6.048520234840654e-06, "loss": 0.4303, "step": 4387 }, { "epoch": 0.2632747345053099, "grad_norm": 1.2800164222717285, "learning_rate": 6.0480539873900636e-06, "loss": 0.4698, "step": 4388 }, { "epoch": 0.2633347333053339, "grad_norm": 1.3575706481933594, "learning_rate": 6.047587643711081e-06, "loss": 0.5086, "step": 4389 }, { "epoch": 0.2633947321053579, "grad_norm": 1.2283743619918823, "learning_rate": 6.047121203821321e-06, "loss": 0.4593, "step": 4390 }, { "epoch": 0.2634547309053819, "grad_norm": 1.2147260904312134, "learning_rate": 6.0466546677383985e-06, "loss": 0.4324, "step": 4391 }, { "epoch": 0.2635147297054059, "grad_norm": 1.2080289125442505, "learning_rate": 6.046188035479931e-06, "loss": 0.445, "step": 4392 }, { "epoch": 0.2635747285054299, "grad_norm": 1.2939395904541016, "learning_rate": 6.045721307063542e-06, "loss": 0.4833, "step": 4393 }, { "epoch": 0.2636347273054539, "grad_norm": 1.2437376976013184, "learning_rate": 6.045254482506857e-06, "loss": 0.4593, "step": 4394 }, { "epoch": 0.2636947261054779, "grad_norm": 1.2964208126068115, "learning_rate": 6.0447875618275065e-06, "loss": 0.4857, "step": 4395 }, { "epoch": 0.2637547249055019, "grad_norm": 1.3506131172180176, "learning_rate": 6.044320545043124e-06, "loss": 0.4768, "step": 4396 }, { "epoch": 0.2638147237055259, "grad_norm": 1.2255231142044067, "learning_rate": 6.043853432171347e-06, "loss": 0.4488, "step": 4397 }, { "epoch": 0.2638747225055499, "grad_norm": 1.327652931213379, "learning_rate": 6.0433862232298146e-06, "loss": 0.4073, "step": 4398 }, { "epoch": 0.2639347213055739, "grad_norm": 1.1791948080062866, "learning_rate": 6.042918918236173e-06, "loss": 0.4269, "step": 4399 }, { "epoch": 0.2639947201055979, "grad_norm": 1.3128057718276978, "learning_rate": 6.042451517208069e-06, "loss": 0.4264, "step": 4400 }, { "epoch": 0.2640547189056219, "grad_norm": 1.2538594007492065, "learning_rate": 6.041984020163155e-06, "loss": 0.4439, "step": 4401 }, { "epoch": 0.2641147177056459, "grad_norm": 1.3962798118591309, "learning_rate": 6.041516427119085e-06, "loss": 0.5029, "step": 4402 }, { "epoch": 0.26417471650566987, "grad_norm": 1.2660225629806519, "learning_rate": 6.041048738093519e-06, "loss": 0.4413, "step": 4403 }, { "epoch": 0.26423471530569387, "grad_norm": 1.189460277557373, "learning_rate": 6.040580953104119e-06, "loss": 0.4015, "step": 4404 }, { "epoch": 0.26429471410571787, "grad_norm": 1.2145657539367676, "learning_rate": 6.040113072168552e-06, "loss": 0.4501, "step": 4405 }, { "epoch": 0.26435471290574186, "grad_norm": 1.1832733154296875, "learning_rate": 6.039645095304486e-06, "loss": 0.4113, "step": 4406 }, { "epoch": 0.26441471170576586, "grad_norm": 1.3025550842285156, "learning_rate": 6.039177022529596e-06, "loss": 0.4494, "step": 4407 }, { "epoch": 0.2644747105057899, "grad_norm": 1.1740858554840088, "learning_rate": 6.038708853861558e-06, "loss": 0.4009, "step": 4408 }, { "epoch": 0.2645347093058139, "grad_norm": 1.202390193939209, "learning_rate": 6.038240589318053e-06, "loss": 0.4583, "step": 4409 }, { "epoch": 0.2645947081058379, "grad_norm": 1.1667102575302124, "learning_rate": 6.0377722289167654e-06, "loss": 0.455, "step": 4410 }, { "epoch": 0.2646547069058619, "grad_norm": 1.1850661039352417, "learning_rate": 6.037303772675382e-06, "loss": 0.4407, "step": 4411 }, { "epoch": 0.2647147057058859, "grad_norm": 1.446686863899231, "learning_rate": 6.0368352206115966e-06, "loss": 0.4451, "step": 4412 }, { "epoch": 0.2647747045059099, "grad_norm": 1.1331537961959839, "learning_rate": 6.036366572743101e-06, "loss": 0.4192, "step": 4413 }, { "epoch": 0.2648347033059339, "grad_norm": 1.2936793565750122, "learning_rate": 6.0358978290875965e-06, "loss": 0.4917, "step": 4414 }, { "epoch": 0.2648947021059579, "grad_norm": 1.2859562635421753, "learning_rate": 6.0354289896627845e-06, "loss": 0.4394, "step": 4415 }, { "epoch": 0.2649547009059819, "grad_norm": 1.2139520645141602, "learning_rate": 6.0349600544863705e-06, "loss": 0.4153, "step": 4416 }, { "epoch": 0.2650146997060059, "grad_norm": 1.2403693199157715, "learning_rate": 6.034491023576064e-06, "loss": 0.4769, "step": 4417 }, { "epoch": 0.2650746985060299, "grad_norm": 1.2245620489120483, "learning_rate": 6.034021896949581e-06, "loss": 0.4351, "step": 4418 }, { "epoch": 0.2651346973060539, "grad_norm": 1.4661214351654053, "learning_rate": 6.033552674624633e-06, "loss": 0.5223, "step": 4419 }, { "epoch": 0.2651946961060779, "grad_norm": 1.315005898475647, "learning_rate": 6.033083356618945e-06, "loss": 0.4489, "step": 4420 }, { "epoch": 0.2652546949061019, "grad_norm": 1.3522663116455078, "learning_rate": 6.032613942950239e-06, "loss": 0.5009, "step": 4421 }, { "epoch": 0.26531469370612587, "grad_norm": 1.3669465780258179, "learning_rate": 6.032144433636243e-06, "loss": 0.5096, "step": 4422 }, { "epoch": 0.26537469250614987, "grad_norm": 1.25503671169281, "learning_rate": 6.031674828694688e-06, "loss": 0.4156, "step": 4423 }, { "epoch": 0.26543469130617386, "grad_norm": 1.1201653480529785, "learning_rate": 6.031205128143309e-06, "loss": 0.4665, "step": 4424 }, { "epoch": 0.26549469010619786, "grad_norm": 1.2671618461608887, "learning_rate": 6.030735331999845e-06, "loss": 0.5147, "step": 4425 }, { "epoch": 0.26555468890622186, "grad_norm": 1.1678345203399658, "learning_rate": 6.030265440282037e-06, "loss": 0.4721, "step": 4426 }, { "epoch": 0.26561468770624586, "grad_norm": 1.2396366596221924, "learning_rate": 6.029795453007631e-06, "loss": 0.4612, "step": 4427 }, { "epoch": 0.26567468650626985, "grad_norm": 1.3194332122802734, "learning_rate": 6.0293253701943775e-06, "loss": 0.4913, "step": 4428 }, { "epoch": 0.26573468530629385, "grad_norm": 1.307190179824829, "learning_rate": 6.028855191860027e-06, "loss": 0.4592, "step": 4429 }, { "epoch": 0.26579468410631785, "grad_norm": 1.3850610256195068, "learning_rate": 6.028384918022338e-06, "loss": 0.4584, "step": 4430 }, { "epoch": 0.2658546829063419, "grad_norm": 1.1695479154586792, "learning_rate": 6.02791454869907e-06, "loss": 0.45, "step": 4431 }, { "epoch": 0.2659146817063659, "grad_norm": 1.2545632123947144, "learning_rate": 6.027444083907986e-06, "loss": 0.4027, "step": 4432 }, { "epoch": 0.2659746805063899, "grad_norm": 1.3627631664276123, "learning_rate": 6.026973523666854e-06, "loss": 0.456, "step": 4433 }, { "epoch": 0.2660346793064139, "grad_norm": 1.4390732049942017, "learning_rate": 6.026502867993446e-06, "loss": 0.4578, "step": 4434 }, { "epoch": 0.2660946781064379, "grad_norm": 1.2173081636428833, "learning_rate": 6.0260321169055346e-06, "loss": 0.4055, "step": 4435 }, { "epoch": 0.2661546769064619, "grad_norm": 1.3625359535217285, "learning_rate": 6.025561270420898e-06, "loss": 0.4653, "step": 4436 }, { "epoch": 0.2662146757064859, "grad_norm": 1.4237048625946045, "learning_rate": 6.0250903285573195e-06, "loss": 0.463, "step": 4437 }, { "epoch": 0.2662746745065099, "grad_norm": 1.300727367401123, "learning_rate": 6.024619291332584e-06, "loss": 0.476, "step": 4438 }, { "epoch": 0.2663346733065339, "grad_norm": 1.3052890300750732, "learning_rate": 6.0241481587644795e-06, "loss": 0.4665, "step": 4439 }, { "epoch": 0.2663946721065579, "grad_norm": 1.32789146900177, "learning_rate": 6.023676930870799e-06, "loss": 0.4073, "step": 4440 }, { "epoch": 0.26645467090658187, "grad_norm": 1.2740375995635986, "learning_rate": 6.023205607669338e-06, "loss": 0.4687, "step": 4441 }, { "epoch": 0.26651466970660587, "grad_norm": 1.2511295080184937, "learning_rate": 6.022734189177898e-06, "loss": 0.4626, "step": 4442 }, { "epoch": 0.26657466850662986, "grad_norm": 1.2066633701324463, "learning_rate": 6.0222626754142814e-06, "loss": 0.3551, "step": 4443 }, { "epoch": 0.26663466730665386, "grad_norm": 1.3038007020950317, "learning_rate": 6.021791066396294e-06, "loss": 0.4538, "step": 4444 }, { "epoch": 0.26669466610667786, "grad_norm": 1.275325059890747, "learning_rate": 6.021319362141748e-06, "loss": 0.4433, "step": 4445 }, { "epoch": 0.26675466490670185, "grad_norm": 1.1176263093948364, "learning_rate": 6.020847562668456e-06, "loss": 0.3875, "step": 4446 }, { "epoch": 0.26681466370672585, "grad_norm": 1.30499267578125, "learning_rate": 6.020375667994238e-06, "loss": 0.4835, "step": 4447 }, { "epoch": 0.26687466250674985, "grad_norm": 1.308707594871521, "learning_rate": 6.0199036781369125e-06, "loss": 0.4716, "step": 4448 }, { "epoch": 0.26693466130677385, "grad_norm": 1.2586994171142578, "learning_rate": 6.019431593114306e-06, "loss": 0.4564, "step": 4449 }, { "epoch": 0.26699466010679784, "grad_norm": 1.2459808588027954, "learning_rate": 6.018959412944247e-06, "loss": 0.4085, "step": 4450 }, { "epoch": 0.26705465890682184, "grad_norm": 1.2557787895202637, "learning_rate": 6.018487137644567e-06, "loss": 0.4243, "step": 4451 }, { "epoch": 0.26711465770684584, "grad_norm": 1.2165735960006714, "learning_rate": 6.0180147672331015e-06, "loss": 0.4591, "step": 4452 }, { "epoch": 0.2671746565068699, "grad_norm": 1.371696949005127, "learning_rate": 6.0175423017276915e-06, "loss": 0.4691, "step": 4453 }, { "epoch": 0.2672346553068939, "grad_norm": 1.328778862953186, "learning_rate": 6.017069741146178e-06, "loss": 0.4652, "step": 4454 }, { "epoch": 0.2672946541069179, "grad_norm": 1.2046148777008057, "learning_rate": 6.016597085506408e-06, "loss": 0.4502, "step": 4455 }, { "epoch": 0.2673546529069419, "grad_norm": 1.2313958406448364, "learning_rate": 6.016124334826231e-06, "loss": 0.4416, "step": 4456 }, { "epoch": 0.2674146517069659, "grad_norm": 1.210768699645996, "learning_rate": 6.015651489123503e-06, "loss": 0.4414, "step": 4457 }, { "epoch": 0.2674746505069899, "grad_norm": 1.2078646421432495, "learning_rate": 6.015178548416079e-06, "loss": 0.4935, "step": 4458 }, { "epoch": 0.26753464930701387, "grad_norm": 1.1478887796401978, "learning_rate": 6.01470551272182e-06, "loss": 0.4414, "step": 4459 }, { "epoch": 0.26759464810703787, "grad_norm": 1.3145849704742432, "learning_rate": 6.01423238205859e-06, "loss": 0.4598, "step": 4460 }, { "epoch": 0.26765464690706187, "grad_norm": 1.2325104475021362, "learning_rate": 6.013759156444259e-06, "loss": 0.4145, "step": 4461 }, { "epoch": 0.26771464570708586, "grad_norm": 1.2147035598754883, "learning_rate": 6.013285835896696e-06, "loss": 0.4395, "step": 4462 }, { "epoch": 0.26777464450710986, "grad_norm": 1.161086082458496, "learning_rate": 6.012812420433779e-06, "loss": 0.4447, "step": 4463 }, { "epoch": 0.26783464330713386, "grad_norm": 1.2781388759613037, "learning_rate": 6.012338910073384e-06, "loss": 0.4676, "step": 4464 }, { "epoch": 0.26789464210715785, "grad_norm": 1.369572639465332, "learning_rate": 6.011865304833396e-06, "loss": 0.453, "step": 4465 }, { "epoch": 0.26795464090718185, "grad_norm": 1.2344757318496704, "learning_rate": 6.011391604731698e-06, "loss": 0.4175, "step": 4466 }, { "epoch": 0.26801463970720585, "grad_norm": 1.407244086265564, "learning_rate": 6.010917809786181e-06, "loss": 0.394, "step": 4467 }, { "epoch": 0.26807463850722985, "grad_norm": 1.3960398435592651, "learning_rate": 6.0104439200147385e-06, "loss": 0.394, "step": 4468 }, { "epoch": 0.26813463730725384, "grad_norm": 1.1995056867599487, "learning_rate": 6.009969935435267e-06, "loss": 0.4172, "step": 4469 }, { "epoch": 0.26819463610727784, "grad_norm": 1.2734349966049194, "learning_rate": 6.009495856065666e-06, "loss": 0.4496, "step": 4470 }, { "epoch": 0.26825463490730184, "grad_norm": 1.2576638460159302, "learning_rate": 6.009021681923841e-06, "loss": 0.4673, "step": 4471 }, { "epoch": 0.26831463370732583, "grad_norm": 1.1789339780807495, "learning_rate": 6.008547413027697e-06, "loss": 0.4037, "step": 4472 }, { "epoch": 0.26837463250734983, "grad_norm": 1.4362682104110718, "learning_rate": 6.008073049395147e-06, "loss": 0.4994, "step": 4473 }, { "epoch": 0.2684346313073738, "grad_norm": 1.343554139137268, "learning_rate": 6.007598591044103e-06, "loss": 0.4989, "step": 4474 }, { "epoch": 0.2684946301073978, "grad_norm": 1.2025154829025269, "learning_rate": 6.007124037992486e-06, "loss": 0.4168, "step": 4475 }, { "epoch": 0.2685546289074219, "grad_norm": 1.17513906955719, "learning_rate": 6.0066493902582155e-06, "loss": 0.4342, "step": 4476 }, { "epoch": 0.2686146277074459, "grad_norm": 1.1951613426208496, "learning_rate": 6.006174647859219e-06, "loss": 0.4227, "step": 4477 }, { "epoch": 0.26867462650746987, "grad_norm": 1.3571597337722778, "learning_rate": 6.005699810813423e-06, "loss": 0.4774, "step": 4478 }, { "epoch": 0.26873462530749387, "grad_norm": 1.175524353981018, "learning_rate": 6.0052248791387625e-06, "loss": 0.4335, "step": 4479 }, { "epoch": 0.26879462410751787, "grad_norm": 1.1752467155456543, "learning_rate": 6.004749852853172e-06, "loss": 0.4153, "step": 4480 }, { "epoch": 0.26885462290754186, "grad_norm": 1.2787853479385376, "learning_rate": 6.00427473197459e-06, "loss": 0.4427, "step": 4481 }, { "epoch": 0.26891462170756586, "grad_norm": 1.1914827823638916, "learning_rate": 6.003799516520961e-06, "loss": 0.4645, "step": 4482 }, { "epoch": 0.26897462050758986, "grad_norm": 1.1274105310440063, "learning_rate": 6.003324206510231e-06, "loss": 0.3933, "step": 4483 }, { "epoch": 0.26903461930761385, "grad_norm": 1.2843220233917236, "learning_rate": 6.002848801960352e-06, "loss": 0.4355, "step": 4484 }, { "epoch": 0.26909461810763785, "grad_norm": 1.252213954925537, "learning_rate": 6.002373302889277e-06, "loss": 0.4483, "step": 4485 }, { "epoch": 0.26915461690766185, "grad_norm": 1.1321250200271606, "learning_rate": 6.0018977093149626e-06, "loss": 0.3911, "step": 4486 }, { "epoch": 0.26921461570768584, "grad_norm": 1.4664666652679443, "learning_rate": 6.00142202125537e-06, "loss": 0.5097, "step": 4487 }, { "epoch": 0.26927461450770984, "grad_norm": 1.339414119720459, "learning_rate": 6.000946238728465e-06, "loss": 0.4734, "step": 4488 }, { "epoch": 0.26933461330773384, "grad_norm": 1.259731650352478, "learning_rate": 6.0004703617522135e-06, "loss": 0.4199, "step": 4489 }, { "epoch": 0.26939461210775784, "grad_norm": 1.4522099494934082, "learning_rate": 5.9999943903445894e-06, "loss": 0.4557, "step": 4490 }, { "epoch": 0.26945461090778183, "grad_norm": 1.2081654071807861, "learning_rate": 5.999518324523568e-06, "loss": 0.4991, "step": 4491 }, { "epoch": 0.26951460970780583, "grad_norm": 1.1396986246109009, "learning_rate": 5.999042164307126e-06, "loss": 0.4539, "step": 4492 }, { "epoch": 0.2695746085078298, "grad_norm": 1.1872766017913818, "learning_rate": 5.998565909713248e-06, "loss": 0.4225, "step": 4493 }, { "epoch": 0.2696346073078538, "grad_norm": 1.2926325798034668, "learning_rate": 5.998089560759918e-06, "loss": 0.4348, "step": 4494 }, { "epoch": 0.2696946061078778, "grad_norm": 1.3340330123901367, "learning_rate": 5.997613117465126e-06, "loss": 0.4452, "step": 4495 }, { "epoch": 0.2697546049079018, "grad_norm": 1.2610923051834106, "learning_rate": 5.9971365798468665e-06, "loss": 0.4365, "step": 4496 }, { "epoch": 0.2698146037079258, "grad_norm": 1.2961326837539673, "learning_rate": 5.996659947923136e-06, "loss": 0.4221, "step": 4497 }, { "epoch": 0.26987460250794987, "grad_norm": 1.2500510215759277, "learning_rate": 5.996183221711933e-06, "loss": 0.4452, "step": 4498 }, { "epoch": 0.26993460130797386, "grad_norm": 1.347856879234314, "learning_rate": 5.995706401231263e-06, "loss": 0.4534, "step": 4499 }, { "epoch": 0.26999460010799786, "grad_norm": 1.3922159671783447, "learning_rate": 5.995229486499132e-06, "loss": 0.4476, "step": 4500 }, { "epoch": 0.27005459890802186, "grad_norm": 1.2075897455215454, "learning_rate": 5.994752477533553e-06, "loss": 0.472, "step": 4501 }, { "epoch": 0.27011459770804586, "grad_norm": 1.3441208600997925, "learning_rate": 5.994275374352538e-06, "loss": 0.5031, "step": 4502 }, { "epoch": 0.27017459650806985, "grad_norm": 1.3006728887557983, "learning_rate": 5.993798176974105e-06, "loss": 0.4413, "step": 4503 }, { "epoch": 0.27023459530809385, "grad_norm": 1.2040646076202393, "learning_rate": 5.993320885416278e-06, "loss": 0.4409, "step": 4504 }, { "epoch": 0.27029459410811785, "grad_norm": 1.226088523864746, "learning_rate": 5.992843499697079e-06, "loss": 0.4463, "step": 4505 }, { "epoch": 0.27035459290814184, "grad_norm": 1.2981560230255127, "learning_rate": 5.99236601983454e-06, "loss": 0.4781, "step": 4506 }, { "epoch": 0.27041459170816584, "grad_norm": 1.374540090560913, "learning_rate": 5.991888445846691e-06, "loss": 0.4675, "step": 4507 }, { "epoch": 0.27047459050818984, "grad_norm": 1.405877947807312, "learning_rate": 5.9914107777515685e-06, "loss": 0.4623, "step": 4508 }, { "epoch": 0.27053458930821384, "grad_norm": 1.3237402439117432, "learning_rate": 5.990933015567212e-06, "loss": 0.4216, "step": 4509 }, { "epoch": 0.27059458810823783, "grad_norm": 1.2838587760925293, "learning_rate": 5.990455159311664e-06, "loss": 0.4123, "step": 4510 }, { "epoch": 0.27065458690826183, "grad_norm": 1.2939926385879517, "learning_rate": 5.9899772090029714e-06, "loss": 0.3864, "step": 4511 }, { "epoch": 0.2707145857082858, "grad_norm": 1.2802534103393555, "learning_rate": 5.989499164659184e-06, "loss": 0.4306, "step": 4512 }, { "epoch": 0.2707745845083098, "grad_norm": 1.2830708026885986, "learning_rate": 5.989021026298354e-06, "loss": 0.4215, "step": 4513 }, { "epoch": 0.2708345833083338, "grad_norm": 1.1173689365386963, "learning_rate": 5.9885427939385405e-06, "loss": 0.4052, "step": 4514 }, { "epoch": 0.2708945821083578, "grad_norm": 1.2482421398162842, "learning_rate": 5.988064467597804e-06, "loss": 0.445, "step": 4515 }, { "epoch": 0.2709545809083818, "grad_norm": 1.318345546722412, "learning_rate": 5.9875860472942075e-06, "loss": 0.4665, "step": 4516 }, { "epoch": 0.2710145797084058, "grad_norm": 1.308820128440857, "learning_rate": 5.98710753304582e-06, "loss": 0.4091, "step": 4517 }, { "epoch": 0.2710745785084298, "grad_norm": 1.1937450170516968, "learning_rate": 5.9866289248707125e-06, "loss": 0.4145, "step": 4518 }, { "epoch": 0.2711345773084538, "grad_norm": 1.1546986103057861, "learning_rate": 5.9861502227869584e-06, "loss": 0.4221, "step": 4519 }, { "epoch": 0.2711945761084778, "grad_norm": 1.1419153213500977, "learning_rate": 5.985671426812638e-06, "loss": 0.464, "step": 4520 }, { "epoch": 0.27125457490850186, "grad_norm": 1.206035852432251, "learning_rate": 5.985192536965832e-06, "loss": 0.4489, "step": 4521 }, { "epoch": 0.27131457370852585, "grad_norm": 1.348814845085144, "learning_rate": 5.9847135532646275e-06, "loss": 0.4766, "step": 4522 }, { "epoch": 0.27137457250854985, "grad_norm": 1.1293509006500244, "learning_rate": 5.984234475727112e-06, "loss": 0.4329, "step": 4523 }, { "epoch": 0.27143457130857385, "grad_norm": 1.2054057121276855, "learning_rate": 5.983755304371377e-06, "loss": 0.4317, "step": 4524 }, { "epoch": 0.27149457010859784, "grad_norm": 1.1392332315444946, "learning_rate": 5.9832760392155216e-06, "loss": 0.4587, "step": 4525 }, { "epoch": 0.27155456890862184, "grad_norm": 1.253176212310791, "learning_rate": 5.982796680277643e-06, "loss": 0.404, "step": 4526 }, { "epoch": 0.27161456770864584, "grad_norm": 1.2128503322601318, "learning_rate": 5.982317227575846e-06, "loss": 0.4459, "step": 4527 }, { "epoch": 0.27167456650866983, "grad_norm": 1.3176307678222656, "learning_rate": 5.981837681128237e-06, "loss": 0.4994, "step": 4528 }, { "epoch": 0.27173456530869383, "grad_norm": 1.2994465827941895, "learning_rate": 5.981358040952926e-06, "loss": 0.4143, "step": 4529 }, { "epoch": 0.27179456410871783, "grad_norm": 1.1993681192398071, "learning_rate": 5.980878307068027e-06, "loss": 0.4966, "step": 4530 }, { "epoch": 0.2718545629087418, "grad_norm": 1.1420221328735352, "learning_rate": 5.9803984794916565e-06, "loss": 0.4, "step": 4531 }, { "epoch": 0.2719145617087658, "grad_norm": 1.1482958793640137, "learning_rate": 5.979918558241936e-06, "loss": 0.4521, "step": 4532 }, { "epoch": 0.2719745605087898, "grad_norm": 1.2152329683303833, "learning_rate": 5.979438543336991e-06, "loss": 0.4075, "step": 4533 }, { "epoch": 0.2720345593088138, "grad_norm": 1.4707951545715332, "learning_rate": 5.978958434794948e-06, "loss": 0.4681, "step": 4534 }, { "epoch": 0.2720945581088378, "grad_norm": 1.0923737287521362, "learning_rate": 5.978478232633938e-06, "loss": 0.5001, "step": 4535 }, { "epoch": 0.2721545569088618, "grad_norm": 1.3844376802444458, "learning_rate": 5.977997936872099e-06, "loss": 0.4329, "step": 4536 }, { "epoch": 0.2722145557088858, "grad_norm": 1.1837661266326904, "learning_rate": 5.977517547527566e-06, "loss": 0.474, "step": 4537 }, { "epoch": 0.2722745545089098, "grad_norm": 1.2581514120101929, "learning_rate": 5.9770370646184834e-06, "loss": 0.4478, "step": 4538 }, { "epoch": 0.2723345533089338, "grad_norm": 1.3736320734024048, "learning_rate": 5.9765564881629965e-06, "loss": 0.4634, "step": 4539 }, { "epoch": 0.2723945521089578, "grad_norm": 1.2302682399749756, "learning_rate": 5.976075818179255e-06, "loss": 0.4243, "step": 4540 }, { "epoch": 0.2724545509089818, "grad_norm": 1.268572211265564, "learning_rate": 5.975595054685409e-06, "loss": 0.419, "step": 4541 }, { "epoch": 0.2725145497090058, "grad_norm": 1.3460218906402588, "learning_rate": 5.975114197699617e-06, "loss": 0.5074, "step": 4542 }, { "epoch": 0.27257454850902985, "grad_norm": 1.330346941947937, "learning_rate": 5.9746332472400385e-06, "loss": 0.4339, "step": 4543 }, { "epoch": 0.27263454730905384, "grad_norm": 1.3707938194274902, "learning_rate": 5.974152203324838e-06, "loss": 0.511, "step": 4544 }, { "epoch": 0.27269454610907784, "grad_norm": 1.2624881267547607, "learning_rate": 5.973671065972178e-06, "loss": 0.4787, "step": 4545 }, { "epoch": 0.27275454490910184, "grad_norm": 1.5289908647537231, "learning_rate": 5.973189835200235e-06, "loss": 0.5051, "step": 4546 }, { "epoch": 0.27281454370912583, "grad_norm": 1.2993651628494263, "learning_rate": 5.972708511027178e-06, "loss": 0.4157, "step": 4547 }, { "epoch": 0.27287454250914983, "grad_norm": 1.2670584917068481, "learning_rate": 5.972227093471188e-06, "loss": 0.3815, "step": 4548 }, { "epoch": 0.27293454130917383, "grad_norm": 1.363112211227417, "learning_rate": 5.971745582550441e-06, "loss": 0.5042, "step": 4549 }, { "epoch": 0.2729945401091978, "grad_norm": 1.3927052021026611, "learning_rate": 5.971263978283127e-06, "loss": 0.5033, "step": 4550 }, { "epoch": 0.2730545389092218, "grad_norm": 1.4029314517974854, "learning_rate": 5.970782280687431e-06, "loss": 0.4487, "step": 4551 }, { "epoch": 0.2731145377092458, "grad_norm": 1.3577831983566284, "learning_rate": 5.970300489781545e-06, "loss": 0.4543, "step": 4552 }, { "epoch": 0.2731745365092698, "grad_norm": 1.2560278177261353, "learning_rate": 5.969818605583665e-06, "loss": 0.4395, "step": 4553 }, { "epoch": 0.2732345353092938, "grad_norm": 1.3521114587783813, "learning_rate": 5.969336628111988e-06, "loss": 0.471, "step": 4554 }, { "epoch": 0.2732945341093178, "grad_norm": 1.259425401687622, "learning_rate": 5.9688545573847155e-06, "loss": 0.4899, "step": 4555 }, { "epoch": 0.2733545329093418, "grad_norm": 1.2472723722457886, "learning_rate": 5.968372393420057e-06, "loss": 0.494, "step": 4556 }, { "epoch": 0.2734145317093658, "grad_norm": 1.2062149047851562, "learning_rate": 5.967890136236216e-06, "loss": 0.4128, "step": 4557 }, { "epoch": 0.2734745305093898, "grad_norm": 1.2051602602005005, "learning_rate": 5.967407785851411e-06, "loss": 0.3907, "step": 4558 }, { "epoch": 0.2735345293094138, "grad_norm": 1.2350661754608154, "learning_rate": 5.9669253422838545e-06, "loss": 0.3821, "step": 4559 }, { "epoch": 0.2735945281094378, "grad_norm": 1.1953797340393066, "learning_rate": 5.966442805551767e-06, "loss": 0.3986, "step": 4560 }, { "epoch": 0.2736545269094618, "grad_norm": 1.1566095352172852, "learning_rate": 5.965960175673371e-06, "loss": 0.4213, "step": 4561 }, { "epoch": 0.2737145257094858, "grad_norm": 1.2161824703216553, "learning_rate": 5.965477452666896e-06, "loss": 0.4366, "step": 4562 }, { "epoch": 0.2737745245095098, "grad_norm": 1.172999382019043, "learning_rate": 5.964994636550568e-06, "loss": 0.4419, "step": 4563 }, { "epoch": 0.2738345233095338, "grad_norm": 1.2088913917541504, "learning_rate": 5.964511727342626e-06, "loss": 0.4559, "step": 4564 }, { "epoch": 0.27389452210955784, "grad_norm": 1.3668783903121948, "learning_rate": 5.964028725061301e-06, "loss": 0.4843, "step": 4565 }, { "epoch": 0.27395452090958183, "grad_norm": 1.2119356393814087, "learning_rate": 5.963545629724839e-06, "loss": 0.4518, "step": 4566 }, { "epoch": 0.27401451970960583, "grad_norm": 1.3063806295394897, "learning_rate": 5.963062441351482e-06, "loss": 0.4967, "step": 4567 }, { "epoch": 0.2740745185096298, "grad_norm": 1.3444806337356567, "learning_rate": 5.96257915995948e-06, "loss": 0.4997, "step": 4568 }, { "epoch": 0.2741345173096538, "grad_norm": 1.261093258857727, "learning_rate": 5.96209578556708e-06, "loss": 0.4181, "step": 4569 }, { "epoch": 0.2741945161096778, "grad_norm": 1.276707649230957, "learning_rate": 5.961612318192541e-06, "loss": 0.4196, "step": 4570 }, { "epoch": 0.2742545149097018, "grad_norm": 1.2208311557769775, "learning_rate": 5.961128757854119e-06, "loss": 0.4098, "step": 4571 }, { "epoch": 0.2743145137097258, "grad_norm": 1.2063027620315552, "learning_rate": 5.960645104570077e-06, "loss": 0.3961, "step": 4572 }, { "epoch": 0.2743745125097498, "grad_norm": 1.3216559886932373, "learning_rate": 5.960161358358681e-06, "loss": 0.4216, "step": 4573 }, { "epoch": 0.2744345113097738, "grad_norm": 1.3358988761901855, "learning_rate": 5.959677519238198e-06, "loss": 0.4087, "step": 4574 }, { "epoch": 0.2744945101097978, "grad_norm": 1.4620097875595093, "learning_rate": 5.959193587226902e-06, "loss": 0.4456, "step": 4575 }, { "epoch": 0.2745545089098218, "grad_norm": 1.1287853717803955, "learning_rate": 5.958709562343066e-06, "loss": 0.4187, "step": 4576 }, { "epoch": 0.2746145077098458, "grad_norm": 1.3114491701126099, "learning_rate": 5.958225444604975e-06, "loss": 0.4384, "step": 4577 }, { "epoch": 0.2746745065098698, "grad_norm": 1.3014253377914429, "learning_rate": 5.957741234030906e-06, "loss": 0.4217, "step": 4578 }, { "epoch": 0.2747345053098938, "grad_norm": 1.2501708269119263, "learning_rate": 5.957256930639149e-06, "loss": 0.4076, "step": 4579 }, { "epoch": 0.2747945041099178, "grad_norm": 1.3419201374053955, "learning_rate": 5.956772534447993e-06, "loss": 0.4509, "step": 4580 }, { "epoch": 0.2748545029099418, "grad_norm": 1.4005862474441528, "learning_rate": 5.956288045475732e-06, "loss": 0.4497, "step": 4581 }, { "epoch": 0.2749145017099658, "grad_norm": 1.3643438816070557, "learning_rate": 5.955803463740662e-06, "loss": 0.4037, "step": 4582 }, { "epoch": 0.2749745005099898, "grad_norm": 1.2537307739257812, "learning_rate": 5.955318789261083e-06, "loss": 0.4501, "step": 4583 }, { "epoch": 0.2750344993100138, "grad_norm": 1.2705367803573608, "learning_rate": 5.9548340220553e-06, "loss": 0.4514, "step": 4584 }, { "epoch": 0.2750944981100378, "grad_norm": 1.326899766921997, "learning_rate": 5.954349162141621e-06, "loss": 0.4691, "step": 4585 }, { "epoch": 0.2751544969100618, "grad_norm": 1.1939325332641602, "learning_rate": 5.9538642095383555e-06, "loss": 0.4774, "step": 4586 }, { "epoch": 0.27521449571008577, "grad_norm": 1.2141939401626587, "learning_rate": 5.953379164263819e-06, "loss": 0.4972, "step": 4587 }, { "epoch": 0.2752744945101098, "grad_norm": 1.2993366718292236, "learning_rate": 5.952894026336328e-06, "loss": 0.4492, "step": 4588 }, { "epoch": 0.2753344933101338, "grad_norm": 1.5386531352996826, "learning_rate": 5.9524087957742065e-06, "loss": 0.5041, "step": 4589 }, { "epoch": 0.2753944921101578, "grad_norm": 1.2040536403656006, "learning_rate": 5.951923472595777e-06, "loss": 0.43, "step": 4590 }, { "epoch": 0.2754544909101818, "grad_norm": 1.1722608804702759, "learning_rate": 5.951438056819368e-06, "loss": 0.4669, "step": 4591 }, { "epoch": 0.2755144897102058, "grad_norm": 1.2591407299041748, "learning_rate": 5.950952548463315e-06, "loss": 0.4282, "step": 4592 }, { "epoch": 0.2755744885102298, "grad_norm": 1.2128690481185913, "learning_rate": 5.950466947545949e-06, "loss": 0.464, "step": 4593 }, { "epoch": 0.2756344873102538, "grad_norm": 1.3370506763458252, "learning_rate": 5.949981254085611e-06, "loss": 0.4595, "step": 4594 }, { "epoch": 0.2756944861102778, "grad_norm": 1.307190179824829, "learning_rate": 5.949495468100642e-06, "loss": 0.4822, "step": 4595 }, { "epoch": 0.2757544849103018, "grad_norm": 1.2648682594299316, "learning_rate": 5.9490095896093895e-06, "loss": 0.4648, "step": 4596 }, { "epoch": 0.2758144837103258, "grad_norm": 1.233228087425232, "learning_rate": 5.948523618630203e-06, "loss": 0.4525, "step": 4597 }, { "epoch": 0.2758744825103498, "grad_norm": 1.3648122549057007, "learning_rate": 5.948037555181435e-06, "loss": 0.4847, "step": 4598 }, { "epoch": 0.2759344813103738, "grad_norm": 1.1652473211288452, "learning_rate": 5.947551399281441e-06, "loss": 0.3942, "step": 4599 }, { "epoch": 0.2759944801103978, "grad_norm": 1.284881591796875, "learning_rate": 5.947065150948583e-06, "loss": 0.4677, "step": 4600 }, { "epoch": 0.2760544789104218, "grad_norm": 1.283643126487732, "learning_rate": 5.946578810201222e-06, "loss": 0.4638, "step": 4601 }, { "epoch": 0.2761144777104458, "grad_norm": 1.2575421333312988, "learning_rate": 5.946092377057726e-06, "loss": 0.4139, "step": 4602 }, { "epoch": 0.2761744765104698, "grad_norm": 1.2194688320159912, "learning_rate": 5.945605851536465e-06, "loss": 0.4412, "step": 4603 }, { "epoch": 0.2762344753104938, "grad_norm": 1.200164794921875, "learning_rate": 5.945119233655813e-06, "loss": 0.4407, "step": 4604 }, { "epoch": 0.2762944741105178, "grad_norm": 1.375411868095398, "learning_rate": 5.9446325234341494e-06, "loss": 0.4855, "step": 4605 }, { "epoch": 0.27635447291054177, "grad_norm": 1.2475749254226685, "learning_rate": 5.944145720889852e-06, "loss": 0.4313, "step": 4606 }, { "epoch": 0.27641447171056577, "grad_norm": 1.3873213529586792, "learning_rate": 5.9436588260413054e-06, "loss": 0.4446, "step": 4607 }, { "epoch": 0.27647447051058976, "grad_norm": 1.3947564363479614, "learning_rate": 5.943171838906899e-06, "loss": 0.4221, "step": 4608 }, { "epoch": 0.27653446931061376, "grad_norm": 1.2996010780334473, "learning_rate": 5.942684759505023e-06, "loss": 0.4698, "step": 4609 }, { "epoch": 0.2765944681106378, "grad_norm": 1.1840473413467407, "learning_rate": 5.942197587854074e-06, "loss": 0.3992, "step": 4610 }, { "epoch": 0.2766544669106618, "grad_norm": 1.347748875617981, "learning_rate": 5.941710323972447e-06, "loss": 0.4362, "step": 4611 }, { "epoch": 0.2767144657106858, "grad_norm": 1.2535157203674316, "learning_rate": 5.941222967878545e-06, "loss": 0.4135, "step": 4612 }, { "epoch": 0.2767744645107098, "grad_norm": 1.3959182500839233, "learning_rate": 5.9407355195907745e-06, "loss": 0.5011, "step": 4613 }, { "epoch": 0.2768344633107338, "grad_norm": 1.2132484912872314, "learning_rate": 5.940247979127543e-06, "loss": 0.3989, "step": 4614 }, { "epoch": 0.2768944621107578, "grad_norm": 1.2648978233337402, "learning_rate": 5.939760346507264e-06, "loss": 0.4726, "step": 4615 }, { "epoch": 0.2769544609107818, "grad_norm": 1.190901517868042, "learning_rate": 5.939272621748352e-06, "loss": 0.3985, "step": 4616 }, { "epoch": 0.2770144597108058, "grad_norm": 1.2391151189804077, "learning_rate": 5.938784804869226e-06, "loss": 0.4914, "step": 4617 }, { "epoch": 0.2770744585108298, "grad_norm": 1.223819375038147, "learning_rate": 5.9382968958883095e-06, "loss": 0.4414, "step": 4618 }, { "epoch": 0.2771344573108538, "grad_norm": 1.1710972785949707, "learning_rate": 5.937808894824027e-06, "loss": 0.4557, "step": 4619 }, { "epoch": 0.2771944561108778, "grad_norm": 1.2300225496292114, "learning_rate": 5.9373208016948095e-06, "loss": 0.4353, "step": 4620 }, { "epoch": 0.2772544549109018, "grad_norm": 1.313109278678894, "learning_rate": 5.936832616519091e-06, "loss": 0.4369, "step": 4621 }, { "epoch": 0.2773144537109258, "grad_norm": 1.3520580530166626, "learning_rate": 5.936344339315306e-06, "loss": 0.4008, "step": 4622 }, { "epoch": 0.2773744525109498, "grad_norm": 1.1674346923828125, "learning_rate": 5.935855970101895e-06, "loss": 0.4784, "step": 4623 }, { "epoch": 0.2774344513109738, "grad_norm": 1.4425957202911377, "learning_rate": 5.935367508897301e-06, "loss": 0.4903, "step": 4624 }, { "epoch": 0.27749445011099777, "grad_norm": 1.32976233959198, "learning_rate": 5.934878955719973e-06, "loss": 0.4621, "step": 4625 }, { "epoch": 0.27755444891102177, "grad_norm": 1.2489243745803833, "learning_rate": 5.934390310588359e-06, "loss": 0.4521, "step": 4626 }, { "epoch": 0.27761444771104576, "grad_norm": 1.1521625518798828, "learning_rate": 5.933901573520916e-06, "loss": 0.3739, "step": 4627 }, { "epoch": 0.27767444651106976, "grad_norm": 1.2126646041870117, "learning_rate": 5.933412744536097e-06, "loss": 0.4218, "step": 4628 }, { "epoch": 0.27773444531109376, "grad_norm": 1.2147928476333618, "learning_rate": 5.932923823652367e-06, "loss": 0.4145, "step": 4629 }, { "epoch": 0.27779444411111776, "grad_norm": 1.1754475831985474, "learning_rate": 5.932434810888189e-06, "loss": 0.4407, "step": 4630 }, { "epoch": 0.27785444291114175, "grad_norm": 1.3249189853668213, "learning_rate": 5.931945706262027e-06, "loss": 0.4663, "step": 4631 }, { "epoch": 0.27791444171116575, "grad_norm": 1.296442985534668, "learning_rate": 5.931456509792359e-06, "loss": 0.4737, "step": 4632 }, { "epoch": 0.2779744405111898, "grad_norm": 1.332782506942749, "learning_rate": 5.930967221497655e-06, "loss": 0.4529, "step": 4633 }, { "epoch": 0.2780344393112138, "grad_norm": 1.3119361400604248, "learning_rate": 5.930477841396395e-06, "loss": 0.4986, "step": 4634 }, { "epoch": 0.2780944381112378, "grad_norm": 1.1797316074371338, "learning_rate": 5.92998836950706e-06, "loss": 0.4256, "step": 4635 }, { "epoch": 0.2781544369112618, "grad_norm": 1.243808388710022, "learning_rate": 5.9294988058481345e-06, "loss": 0.4646, "step": 4636 }, { "epoch": 0.2782144357112858, "grad_norm": 1.2375081777572632, "learning_rate": 5.929009150438108e-06, "loss": 0.3989, "step": 4637 }, { "epoch": 0.2782744345113098, "grad_norm": 1.3268976211547852, "learning_rate": 5.928519403295473e-06, "loss": 0.4971, "step": 4638 }, { "epoch": 0.2783344333113338, "grad_norm": 1.2337547540664673, "learning_rate": 5.928029564438725e-06, "loss": 0.4269, "step": 4639 }, { "epoch": 0.2783944321113578, "grad_norm": 1.1008535623550415, "learning_rate": 5.927539633886361e-06, "loss": 0.3957, "step": 4640 }, { "epoch": 0.2784544309113818, "grad_norm": 1.1702637672424316, "learning_rate": 5.927049611656885e-06, "loss": 0.4521, "step": 4641 }, { "epoch": 0.2785144297114058, "grad_norm": 1.1714531183242798, "learning_rate": 5.9265594977688044e-06, "loss": 0.4019, "step": 4642 }, { "epoch": 0.2785744285114298, "grad_norm": 1.2819395065307617, "learning_rate": 5.926069292240626e-06, "loss": 0.4637, "step": 4643 }, { "epoch": 0.27863442731145377, "grad_norm": 1.314932942390442, "learning_rate": 5.925578995090863e-06, "loss": 0.4536, "step": 4644 }, { "epoch": 0.27869442611147777, "grad_norm": 1.3659873008728027, "learning_rate": 5.925088606338034e-06, "loss": 0.455, "step": 4645 }, { "epoch": 0.27875442491150176, "grad_norm": 1.4471943378448486, "learning_rate": 5.924598126000655e-06, "loss": 0.4738, "step": 4646 }, { "epoch": 0.27881442371152576, "grad_norm": 1.291366457939148, "learning_rate": 5.924107554097252e-06, "loss": 0.4422, "step": 4647 }, { "epoch": 0.27887442251154976, "grad_norm": 1.2507754564285278, "learning_rate": 5.923616890646351e-06, "loss": 0.3887, "step": 4648 }, { "epoch": 0.27893442131157375, "grad_norm": 1.4990832805633545, "learning_rate": 5.923126135666482e-06, "loss": 0.5198, "step": 4649 }, { "epoch": 0.27899442011159775, "grad_norm": 1.4702950716018677, "learning_rate": 5.922635289176178e-06, "loss": 0.4761, "step": 4650 }, { "epoch": 0.27905441891162175, "grad_norm": 1.2702637910842896, "learning_rate": 5.922144351193978e-06, "loss": 0.5009, "step": 4651 }, { "epoch": 0.27911441771164575, "grad_norm": 1.2349026203155518, "learning_rate": 5.92165332173842e-06, "loss": 0.4727, "step": 4652 }, { "epoch": 0.27917441651166974, "grad_norm": 1.36714768409729, "learning_rate": 5.921162200828049e-06, "loss": 0.432, "step": 4653 }, { "epoch": 0.27923441531169374, "grad_norm": 1.2188975811004639, "learning_rate": 5.920670988481412e-06, "loss": 0.451, "step": 4654 }, { "epoch": 0.2792944141117178, "grad_norm": 1.3758078813552856, "learning_rate": 5.920179684717062e-06, "loss": 0.4977, "step": 4655 }, { "epoch": 0.2793544129117418, "grad_norm": 1.2287307977676392, "learning_rate": 5.919688289553551e-06, "loss": 0.4729, "step": 4656 }, { "epoch": 0.2794144117117658, "grad_norm": 1.3929578065872192, "learning_rate": 5.919196803009437e-06, "loss": 0.4748, "step": 4657 }, { "epoch": 0.2794744105117898, "grad_norm": 1.170156717300415, "learning_rate": 5.918705225103282e-06, "loss": 0.5193, "step": 4658 }, { "epoch": 0.2795344093118138, "grad_norm": 1.26938796043396, "learning_rate": 5.918213555853649e-06, "loss": 0.4487, "step": 4659 }, { "epoch": 0.2795944081118378, "grad_norm": 1.325885534286499, "learning_rate": 5.917721795279108e-06, "loss": 0.3862, "step": 4660 }, { "epoch": 0.2796544069118618, "grad_norm": 1.3876187801361084, "learning_rate": 5.917229943398231e-06, "loss": 0.4795, "step": 4661 }, { "epoch": 0.27971440571188577, "grad_norm": 1.232845425605774, "learning_rate": 5.916738000229591e-06, "loss": 0.4638, "step": 4662 }, { "epoch": 0.27977440451190977, "grad_norm": 1.2542959451675415, "learning_rate": 5.9162459657917666e-06, "loss": 0.5132, "step": 4663 }, { "epoch": 0.27983440331193377, "grad_norm": 1.2409003973007202, "learning_rate": 5.915753840103343e-06, "loss": 0.4202, "step": 4664 }, { "epoch": 0.27989440211195776, "grad_norm": 1.3161078691482544, "learning_rate": 5.9152616231829e-06, "loss": 0.3889, "step": 4665 }, { "epoch": 0.27995440091198176, "grad_norm": 1.2259461879730225, "learning_rate": 5.914769315049031e-06, "loss": 0.4226, "step": 4666 }, { "epoch": 0.28001439971200576, "grad_norm": 1.2658412456512451, "learning_rate": 5.914276915720327e-06, "loss": 0.4834, "step": 4667 }, { "epoch": 0.28007439851202975, "grad_norm": 1.2253811359405518, "learning_rate": 5.913784425215382e-06, "loss": 0.4776, "step": 4668 }, { "epoch": 0.28013439731205375, "grad_norm": 1.3646337985992432, "learning_rate": 5.913291843552797e-06, "loss": 0.4516, "step": 4669 }, { "epoch": 0.28019439611207775, "grad_norm": 1.1802027225494385, "learning_rate": 5.912799170751175e-06, "loss": 0.4774, "step": 4670 }, { "epoch": 0.28025439491210175, "grad_norm": 1.2996188402175903, "learning_rate": 5.91230640682912e-06, "loss": 0.4471, "step": 4671 }, { "epoch": 0.28031439371212574, "grad_norm": 1.2387652397155762, "learning_rate": 5.911813551805242e-06, "loss": 0.4577, "step": 4672 }, { "epoch": 0.28037439251214974, "grad_norm": 1.49040687084198, "learning_rate": 5.911320605698155e-06, "loss": 0.5109, "step": 4673 }, { "epoch": 0.28043439131217374, "grad_norm": 1.2459862232208252, "learning_rate": 5.910827568526474e-06, "loss": 0.46, "step": 4674 }, { "epoch": 0.28049439011219773, "grad_norm": 1.1802552938461304, "learning_rate": 5.91033444030882e-06, "loss": 0.4615, "step": 4675 }, { "epoch": 0.28055438891222173, "grad_norm": 1.2872214317321777, "learning_rate": 5.909841221063816e-06, "loss": 0.4369, "step": 4676 }, { "epoch": 0.2806143877122457, "grad_norm": 1.3465626239776611, "learning_rate": 5.909347910810089e-06, "loss": 0.469, "step": 4677 }, { "epoch": 0.2806743865122698, "grad_norm": 1.316870927810669, "learning_rate": 5.908854509566266e-06, "loss": 0.4574, "step": 4678 }, { "epoch": 0.2807343853122938, "grad_norm": 1.1474827527999878, "learning_rate": 5.908361017350985e-06, "loss": 0.4365, "step": 4679 }, { "epoch": 0.2807943841123178, "grad_norm": 1.2277095317840576, "learning_rate": 5.9078674341828795e-06, "loss": 0.4169, "step": 4680 }, { "epoch": 0.28085438291234177, "grad_norm": 1.3196316957473755, "learning_rate": 5.907373760080592e-06, "loss": 0.4481, "step": 4681 }, { "epoch": 0.28091438171236577, "grad_norm": 1.333278775215149, "learning_rate": 5.906879995062766e-06, "loss": 0.4494, "step": 4682 }, { "epoch": 0.28097438051238977, "grad_norm": 1.1773940324783325, "learning_rate": 5.906386139148047e-06, "loss": 0.3388, "step": 4683 }, { "epoch": 0.28103437931241376, "grad_norm": 1.208803653717041, "learning_rate": 5.905892192355087e-06, "loss": 0.4439, "step": 4684 }, { "epoch": 0.28109437811243776, "grad_norm": 1.2612462043762207, "learning_rate": 5.905398154702541e-06, "loss": 0.4561, "step": 4685 }, { "epoch": 0.28115437691246176, "grad_norm": 1.3319951295852661, "learning_rate": 5.9049040262090655e-06, "loss": 0.4565, "step": 4686 }, { "epoch": 0.28121437571248575, "grad_norm": 1.278273105621338, "learning_rate": 5.9044098068933216e-06, "loss": 0.4395, "step": 4687 }, { "epoch": 0.28127437451250975, "grad_norm": 1.1941627264022827, "learning_rate": 5.903915496773974e-06, "loss": 0.453, "step": 4688 }, { "epoch": 0.28133437331253375, "grad_norm": 1.2849295139312744, "learning_rate": 5.9034210958696895e-06, "loss": 0.389, "step": 4689 }, { "epoch": 0.28139437211255774, "grad_norm": 1.4347782135009766, "learning_rate": 5.902926604199141e-06, "loss": 0.4549, "step": 4690 }, { "epoch": 0.28145437091258174, "grad_norm": 1.4317193031311035, "learning_rate": 5.902432021781001e-06, "loss": 0.484, "step": 4691 }, { "epoch": 0.28151436971260574, "grad_norm": 1.1917482614517212, "learning_rate": 5.9019373486339505e-06, "loss": 0.4132, "step": 4692 }, { "epoch": 0.28157436851262974, "grad_norm": 1.2718865871429443, "learning_rate": 5.901442584776669e-06, "loss": 0.4375, "step": 4693 }, { "epoch": 0.28163436731265373, "grad_norm": 1.3110014200210571, "learning_rate": 5.900947730227842e-06, "loss": 0.4399, "step": 4694 }, { "epoch": 0.28169436611267773, "grad_norm": 1.3033512830734253, "learning_rate": 5.900452785006158e-06, "loss": 0.4501, "step": 4695 }, { "epoch": 0.2817543649127017, "grad_norm": 1.3002691268920898, "learning_rate": 5.899957749130309e-06, "loss": 0.4589, "step": 4696 }, { "epoch": 0.2818143637127257, "grad_norm": 1.4363735914230347, "learning_rate": 5.89946262261899e-06, "loss": 0.5027, "step": 4697 }, { "epoch": 0.2818743625127497, "grad_norm": 1.2421417236328125, "learning_rate": 5.898967405490899e-06, "loss": 0.3983, "step": 4698 }, { "epoch": 0.2819343613127737, "grad_norm": 1.191238284111023, "learning_rate": 5.89847209776474e-06, "loss": 0.4731, "step": 4699 }, { "epoch": 0.28199436011279777, "grad_norm": 1.4116421937942505, "learning_rate": 5.897976699459216e-06, "loss": 0.4896, "step": 4700 }, { "epoch": 0.28205435891282177, "grad_norm": 1.30008065700531, "learning_rate": 5.8974812105930375e-06, "loss": 0.4773, "step": 4701 }, { "epoch": 0.28211435771284576, "grad_norm": 1.2923481464385986, "learning_rate": 5.896985631184916e-06, "loss": 0.4322, "step": 4702 }, { "epoch": 0.28217435651286976, "grad_norm": 1.3709015846252441, "learning_rate": 5.896489961253569e-06, "loss": 0.4565, "step": 4703 }, { "epoch": 0.28223435531289376, "grad_norm": 1.2855108976364136, "learning_rate": 5.895994200817715e-06, "loss": 0.4346, "step": 4704 }, { "epoch": 0.28229435411291776, "grad_norm": 1.3940136432647705, "learning_rate": 5.895498349896075e-06, "loss": 0.4944, "step": 4705 }, { "epoch": 0.28235435291294175, "grad_norm": 1.3065840005874634, "learning_rate": 5.895002408507377e-06, "loss": 0.4354, "step": 4706 }, { "epoch": 0.28241435171296575, "grad_norm": 1.2938590049743652, "learning_rate": 5.894506376670348e-06, "loss": 0.461, "step": 4707 }, { "epoch": 0.28247435051298975, "grad_norm": 1.3618128299713135, "learning_rate": 5.894010254403724e-06, "loss": 0.4584, "step": 4708 }, { "epoch": 0.28253434931301374, "grad_norm": 1.3329495191574097, "learning_rate": 5.893514041726239e-06, "loss": 0.4424, "step": 4709 }, { "epoch": 0.28259434811303774, "grad_norm": 1.118638038635254, "learning_rate": 5.893017738656634e-06, "loss": 0.4558, "step": 4710 }, { "epoch": 0.28265434691306174, "grad_norm": 1.2814959287643433, "learning_rate": 5.892521345213652e-06, "loss": 0.4573, "step": 4711 }, { "epoch": 0.28271434571308574, "grad_norm": 1.3580539226531982, "learning_rate": 5.892024861416039e-06, "loss": 0.4257, "step": 4712 }, { "epoch": 0.28277434451310973, "grad_norm": 1.2354488372802734, "learning_rate": 5.891528287282544e-06, "loss": 0.4675, "step": 4713 }, { "epoch": 0.28283434331313373, "grad_norm": 1.3429967164993286, "learning_rate": 5.891031622831921e-06, "loss": 0.4358, "step": 4714 }, { "epoch": 0.2828943421131577, "grad_norm": 1.24846613407135, "learning_rate": 5.890534868082929e-06, "loss": 0.464, "step": 4715 }, { "epoch": 0.2829543409131817, "grad_norm": 1.2540868520736694, "learning_rate": 5.890038023054325e-06, "loss": 0.4183, "step": 4716 }, { "epoch": 0.2830143397132057, "grad_norm": 1.3666836023330688, "learning_rate": 5.889541087764875e-06, "loss": 0.5211, "step": 4717 }, { "epoch": 0.2830743385132297, "grad_norm": 1.1805592775344849, "learning_rate": 5.889044062233343e-06, "loss": 0.4546, "step": 4718 }, { "epoch": 0.2831343373132537, "grad_norm": 1.2478981018066406, "learning_rate": 5.888546946478502e-06, "loss": 0.4348, "step": 4719 }, { "epoch": 0.2831943361132777, "grad_norm": 1.2889689207077026, "learning_rate": 5.888049740519126e-06, "loss": 0.4527, "step": 4720 }, { "epoch": 0.2832543349133017, "grad_norm": 1.2323862314224243, "learning_rate": 5.8875524443739905e-06, "loss": 0.4492, "step": 4721 }, { "epoch": 0.28331433371332576, "grad_norm": 1.162513017654419, "learning_rate": 5.887055058061876e-06, "loss": 0.4522, "step": 4722 }, { "epoch": 0.28337433251334976, "grad_norm": 1.3853671550750732, "learning_rate": 5.886557581601569e-06, "loss": 0.5048, "step": 4723 }, { "epoch": 0.28343433131337376, "grad_norm": 1.243817925453186, "learning_rate": 5.886060015011854e-06, "loss": 0.4763, "step": 4724 }, { "epoch": 0.28349433011339775, "grad_norm": 1.284959077835083, "learning_rate": 5.8855623583115235e-06, "loss": 0.4961, "step": 4725 }, { "epoch": 0.28355432891342175, "grad_norm": 1.287179708480835, "learning_rate": 5.885064611519372e-06, "loss": 0.438, "step": 4726 }, { "epoch": 0.28361432771344575, "grad_norm": 1.340787649154663, "learning_rate": 5.884566774654195e-06, "loss": 0.4746, "step": 4727 }, { "epoch": 0.28367432651346974, "grad_norm": 1.1921415328979492, "learning_rate": 5.884068847734796e-06, "loss": 0.4265, "step": 4728 }, { "epoch": 0.28373432531349374, "grad_norm": 1.367008924484253, "learning_rate": 5.883570830779979e-06, "loss": 0.4739, "step": 4729 }, { "epoch": 0.28379432411351774, "grad_norm": 1.265937328338623, "learning_rate": 5.88307272380855e-06, "loss": 0.4334, "step": 4730 }, { "epoch": 0.28385432291354173, "grad_norm": 1.1403290033340454, "learning_rate": 5.882574526839321e-06, "loss": 0.3863, "step": 4731 }, { "epoch": 0.28391432171356573, "grad_norm": 1.2052433490753174, "learning_rate": 5.882076239891108e-06, "loss": 0.4522, "step": 4732 }, { "epoch": 0.28397432051358973, "grad_norm": 1.2625787258148193, "learning_rate": 5.881577862982729e-06, "loss": 0.4129, "step": 4733 }, { "epoch": 0.2840343193136137, "grad_norm": 1.1767855882644653, "learning_rate": 5.881079396133003e-06, "loss": 0.4116, "step": 4734 }, { "epoch": 0.2840943181136377, "grad_norm": 1.2333340644836426, "learning_rate": 5.880580839360758e-06, "loss": 0.4491, "step": 4735 }, { "epoch": 0.2841543169136617, "grad_norm": 1.4018774032592773, "learning_rate": 5.88008219268482e-06, "loss": 0.4753, "step": 4736 }, { "epoch": 0.2842143157136857, "grad_norm": 1.1889238357543945, "learning_rate": 5.8795834561240206e-06, "loss": 0.4314, "step": 4737 }, { "epoch": 0.2842743145137097, "grad_norm": 1.4137563705444336, "learning_rate": 5.8790846296971956e-06, "loss": 0.458, "step": 4738 }, { "epoch": 0.2843343133137337, "grad_norm": 1.3199408054351807, "learning_rate": 5.878585713423183e-06, "loss": 0.4335, "step": 4739 }, { "epoch": 0.2843943121137577, "grad_norm": 1.3184893131256104, "learning_rate": 5.8780867073208254e-06, "loss": 0.4291, "step": 4740 }, { "epoch": 0.2844543109137817, "grad_norm": 1.2542833089828491, "learning_rate": 5.877587611408967e-06, "loss": 0.4163, "step": 4741 }, { "epoch": 0.2845143097138057, "grad_norm": 1.2506974935531616, "learning_rate": 5.877088425706457e-06, "loss": 0.4252, "step": 4742 }, { "epoch": 0.2845743085138297, "grad_norm": 1.5095118284225464, "learning_rate": 5.876589150232148e-06, "loss": 0.5114, "step": 4743 }, { "epoch": 0.2846343073138537, "grad_norm": 1.264278531074524, "learning_rate": 5.876089785004893e-06, "loss": 0.4452, "step": 4744 }, { "epoch": 0.28469430611387775, "grad_norm": 1.3746501207351685, "learning_rate": 5.875590330043553e-06, "loss": 0.4259, "step": 4745 }, { "epoch": 0.28475430491390175, "grad_norm": 1.1796848773956299, "learning_rate": 5.8750907853669895e-06, "loss": 0.4555, "step": 4746 }, { "epoch": 0.28481430371392574, "grad_norm": 1.230705976486206, "learning_rate": 5.874591150994068e-06, "loss": 0.4394, "step": 4747 }, { "epoch": 0.28487430251394974, "grad_norm": 1.1464794874191284, "learning_rate": 5.874091426943656e-06, "loss": 0.4631, "step": 4748 }, { "epoch": 0.28493430131397374, "grad_norm": 1.295670747756958, "learning_rate": 5.873591613234628e-06, "loss": 0.4142, "step": 4749 }, { "epoch": 0.28499430011399773, "grad_norm": 1.243279218673706, "learning_rate": 5.87309170988586e-06, "loss": 0.4518, "step": 4750 }, { "epoch": 0.28505429891402173, "grad_norm": 1.2157760858535767, "learning_rate": 5.872591716916228e-06, "loss": 0.3944, "step": 4751 }, { "epoch": 0.28511429771404573, "grad_norm": 1.1751177310943604, "learning_rate": 5.872091634344618e-06, "loss": 0.3695, "step": 4752 }, { "epoch": 0.2851742965140697, "grad_norm": 1.2898166179656982, "learning_rate": 5.871591462189913e-06, "loss": 0.4393, "step": 4753 }, { "epoch": 0.2852342953140937, "grad_norm": 1.2933810949325562, "learning_rate": 5.8710912004710035e-06, "loss": 0.4627, "step": 4754 }, { "epoch": 0.2852942941141177, "grad_norm": 1.4240049123764038, "learning_rate": 5.870590849206783e-06, "loss": 0.4292, "step": 4755 }, { "epoch": 0.2853542929141417, "grad_norm": 1.3596738576889038, "learning_rate": 5.8700904084161454e-06, "loss": 0.4418, "step": 4756 }, { "epoch": 0.2854142917141657, "grad_norm": 1.3722256422042847, "learning_rate": 5.869589878117991e-06, "loss": 0.4215, "step": 4757 }, { "epoch": 0.2854742905141897, "grad_norm": 1.2295523881912231, "learning_rate": 5.869089258331225e-06, "loss": 0.4831, "step": 4758 }, { "epoch": 0.2855342893142137, "grad_norm": 1.232431411743164, "learning_rate": 5.868588549074749e-06, "loss": 0.4574, "step": 4759 }, { "epoch": 0.2855942881142377, "grad_norm": 1.3644022941589355, "learning_rate": 5.8680877503674755e-06, "loss": 0.4297, "step": 4760 }, { "epoch": 0.2856542869142617, "grad_norm": 1.3370193243026733, "learning_rate": 5.867586862228316e-06, "loss": 0.478, "step": 4761 }, { "epoch": 0.2857142857142857, "grad_norm": 1.2787867784500122, "learning_rate": 5.867085884676188e-06, "loss": 0.4575, "step": 4762 }, { "epoch": 0.2857742845143097, "grad_norm": 1.252649188041687, "learning_rate": 5.8665848177300104e-06, "loss": 0.4473, "step": 4763 }, { "epoch": 0.2858342833143337, "grad_norm": 1.2333611249923706, "learning_rate": 5.866083661408707e-06, "loss": 0.4356, "step": 4764 }, { "epoch": 0.2858942821143577, "grad_norm": 1.1822820901870728, "learning_rate": 5.865582415731203e-06, "loss": 0.3878, "step": 4765 }, { "epoch": 0.2859542809143817, "grad_norm": 1.2178562879562378, "learning_rate": 5.865081080716428e-06, "loss": 0.4803, "step": 4766 }, { "epoch": 0.28601427971440574, "grad_norm": 1.321691632270813, "learning_rate": 5.864579656383317e-06, "loss": 0.4419, "step": 4767 }, { "epoch": 0.28607427851442974, "grad_norm": 1.2897614240646362, "learning_rate": 5.864078142750806e-06, "loss": 0.4408, "step": 4768 }, { "epoch": 0.28613427731445373, "grad_norm": 1.2335541248321533, "learning_rate": 5.863576539837833e-06, "loss": 0.4093, "step": 4769 }, { "epoch": 0.28619427611447773, "grad_norm": 1.1236813068389893, "learning_rate": 5.863074847663343e-06, "loss": 0.4238, "step": 4770 }, { "epoch": 0.2862542749145017, "grad_norm": 1.2173019647598267, "learning_rate": 5.862573066246281e-06, "loss": 0.4733, "step": 4771 }, { "epoch": 0.2863142737145257, "grad_norm": 1.1814898252487183, "learning_rate": 5.8620711956056e-06, "loss": 0.4443, "step": 4772 }, { "epoch": 0.2863742725145497, "grad_norm": 1.1369174718856812, "learning_rate": 5.86156923576025e-06, "loss": 0.4144, "step": 4773 }, { "epoch": 0.2864342713145737, "grad_norm": 1.3269001245498657, "learning_rate": 5.86106718672919e-06, "loss": 0.4554, "step": 4774 }, { "epoch": 0.2864942701145977, "grad_norm": 1.1316217184066772, "learning_rate": 5.860565048531379e-06, "loss": 0.3887, "step": 4775 }, { "epoch": 0.2865542689146217, "grad_norm": 1.2782851457595825, "learning_rate": 5.860062821185781e-06, "loss": 0.4347, "step": 4776 }, { "epoch": 0.2866142677146457, "grad_norm": 1.331979513168335, "learning_rate": 5.859560504711362e-06, "loss": 0.4546, "step": 4777 }, { "epoch": 0.2866742665146697, "grad_norm": 1.162933588027954, "learning_rate": 5.859058099127095e-06, "loss": 0.4268, "step": 4778 }, { "epoch": 0.2867342653146937, "grad_norm": 1.240270972251892, "learning_rate": 5.858555604451949e-06, "loss": 0.4667, "step": 4779 }, { "epoch": 0.2867942641147177, "grad_norm": 1.1655150651931763, "learning_rate": 5.858053020704904e-06, "loss": 0.4131, "step": 4780 }, { "epoch": 0.2868542629147417, "grad_norm": 1.146561622619629, "learning_rate": 5.857550347904941e-06, "loss": 0.4253, "step": 4781 }, { "epoch": 0.2869142617147657, "grad_norm": 1.400290846824646, "learning_rate": 5.857047586071042e-06, "loss": 0.4896, "step": 4782 }, { "epoch": 0.2869742605147897, "grad_norm": 1.271101713180542, "learning_rate": 5.856544735222193e-06, "loss": 0.4615, "step": 4783 }, { "epoch": 0.2870342593148137, "grad_norm": 1.346023440361023, "learning_rate": 5.856041795377386e-06, "loss": 0.4669, "step": 4784 }, { "epoch": 0.2870942581148377, "grad_norm": 1.198882818222046, "learning_rate": 5.855538766555616e-06, "loss": 0.3719, "step": 4785 }, { "epoch": 0.2871542569148617, "grad_norm": 1.29096519947052, "learning_rate": 5.8550356487758775e-06, "loss": 0.4291, "step": 4786 }, { "epoch": 0.2872142557148857, "grad_norm": 1.2056419849395752, "learning_rate": 5.854532442057172e-06, "loss": 0.4351, "step": 4787 }, { "epoch": 0.2872742545149097, "grad_norm": 1.1619114875793457, "learning_rate": 5.8540291464185036e-06, "loss": 0.4006, "step": 4788 }, { "epoch": 0.2873342533149337, "grad_norm": 1.4037914276123047, "learning_rate": 5.8535257618788785e-06, "loss": 0.4181, "step": 4789 }, { "epoch": 0.2873942521149577, "grad_norm": 1.3209799528121948, "learning_rate": 5.853022288457308e-06, "loss": 0.4935, "step": 4790 }, { "epoch": 0.2874542509149817, "grad_norm": 1.2071142196655273, "learning_rate": 5.852518726172807e-06, "loss": 0.4343, "step": 4791 }, { "epoch": 0.2875142497150057, "grad_norm": 1.2337136268615723, "learning_rate": 5.852015075044392e-06, "loss": 0.4579, "step": 4792 }, { "epoch": 0.2875742485150297, "grad_norm": 1.2109800577163696, "learning_rate": 5.851511335091083e-06, "loss": 0.4757, "step": 4793 }, { "epoch": 0.2876342473150537, "grad_norm": 1.2502875328063965, "learning_rate": 5.851007506331904e-06, "loss": 0.4652, "step": 4794 }, { "epoch": 0.2876942461150777, "grad_norm": 1.211006999015808, "learning_rate": 5.850503588785883e-06, "loss": 0.4124, "step": 4795 }, { "epoch": 0.2877542449151017, "grad_norm": 1.1485044956207275, "learning_rate": 5.84999958247205e-06, "loss": 0.42, "step": 4796 }, { "epoch": 0.2878142437151257, "grad_norm": 1.260810136795044, "learning_rate": 5.8494954874094395e-06, "loss": 0.4535, "step": 4797 }, { "epoch": 0.2878742425151497, "grad_norm": 1.0893759727478027, "learning_rate": 5.848991303617089e-06, "loss": 0.406, "step": 4798 }, { "epoch": 0.2879342413151737, "grad_norm": 1.3103489875793457, "learning_rate": 5.848487031114039e-06, "loss": 0.4638, "step": 4799 }, { "epoch": 0.2879942401151977, "grad_norm": 1.2429440021514893, "learning_rate": 5.847982669919332e-06, "loss": 0.424, "step": 4800 }, { "epoch": 0.2880542389152217, "grad_norm": 1.2417981624603271, "learning_rate": 5.8474782200520185e-06, "loss": 0.4337, "step": 4801 }, { "epoch": 0.2881142377152457, "grad_norm": 1.2910770177841187, "learning_rate": 5.846973681531147e-06, "loss": 0.4641, "step": 4802 }, { "epoch": 0.2881742365152697, "grad_norm": 1.2319505214691162, "learning_rate": 5.846469054375772e-06, "loss": 0.4305, "step": 4803 }, { "epoch": 0.2882342353152937, "grad_norm": 1.2160518169403076, "learning_rate": 5.845964338604953e-06, "loss": 0.4348, "step": 4804 }, { "epoch": 0.2882942341153177, "grad_norm": 1.1792922019958496, "learning_rate": 5.845459534237747e-06, "loss": 0.4249, "step": 4805 }, { "epoch": 0.2883542329153417, "grad_norm": 1.3216878175735474, "learning_rate": 5.8449546412932225e-06, "loss": 0.4533, "step": 4806 }, { "epoch": 0.2884142317153657, "grad_norm": 1.204778790473938, "learning_rate": 5.844449659790443e-06, "loss": 0.4134, "step": 4807 }, { "epoch": 0.2884742305153897, "grad_norm": 1.220524549484253, "learning_rate": 5.843944589748482e-06, "loss": 0.4148, "step": 4808 }, { "epoch": 0.28853422931541367, "grad_norm": 1.160364031791687, "learning_rate": 5.843439431186413e-06, "loss": 0.4229, "step": 4809 }, { "epoch": 0.28859422811543767, "grad_norm": 1.1698380708694458, "learning_rate": 5.842934184123313e-06, "loss": 0.428, "step": 4810 }, { "epoch": 0.28865422691546166, "grad_norm": 1.1855511665344238, "learning_rate": 5.842428848578263e-06, "loss": 0.4602, "step": 4811 }, { "epoch": 0.2887142257154857, "grad_norm": 1.2412158250808716, "learning_rate": 5.841923424570347e-06, "loss": 0.3858, "step": 4812 }, { "epoch": 0.2887742245155097, "grad_norm": 1.2384512424468994, "learning_rate": 5.841417912118654e-06, "loss": 0.4366, "step": 4813 }, { "epoch": 0.2888342233155337, "grad_norm": 1.3246315717697144, "learning_rate": 5.840912311242274e-06, "loss": 0.3595, "step": 4814 }, { "epoch": 0.2888942221155577, "grad_norm": 1.1416070461273193, "learning_rate": 5.840406621960301e-06, "loss": 0.4438, "step": 4815 }, { "epoch": 0.2889542209155817, "grad_norm": 1.1492371559143066, "learning_rate": 5.839900844291832e-06, "loss": 0.3759, "step": 4816 }, { "epoch": 0.2890142197156057, "grad_norm": 1.2754658460617065, "learning_rate": 5.839394978255969e-06, "loss": 0.4229, "step": 4817 }, { "epoch": 0.2890742185156297, "grad_norm": 1.1110800504684448, "learning_rate": 5.838889023871816e-06, "loss": 0.4482, "step": 4818 }, { "epoch": 0.2891342173156537, "grad_norm": 1.4675666093826294, "learning_rate": 5.838382981158481e-06, "loss": 0.4398, "step": 4819 }, { "epoch": 0.2891942161156777, "grad_norm": 1.1839443445205688, "learning_rate": 5.837876850135075e-06, "loss": 0.4246, "step": 4820 }, { "epoch": 0.2892542149157017, "grad_norm": 1.2908835411071777, "learning_rate": 5.8373706308207095e-06, "loss": 0.4206, "step": 4821 }, { "epoch": 0.2893142137157257, "grad_norm": 1.1674576997756958, "learning_rate": 5.836864323234506e-06, "loss": 0.4529, "step": 4822 }, { "epoch": 0.2893742125157497, "grad_norm": 1.1502419710159302, "learning_rate": 5.836357927395584e-06, "loss": 0.4131, "step": 4823 }, { "epoch": 0.2894342113157737, "grad_norm": 1.2818294763565063, "learning_rate": 5.8358514433230665e-06, "loss": 0.4406, "step": 4824 }, { "epoch": 0.2894942101157977, "grad_norm": 1.235725998878479, "learning_rate": 5.8353448710360824e-06, "loss": 0.4419, "step": 4825 }, { "epoch": 0.2895542089158217, "grad_norm": 1.1994343996047974, "learning_rate": 5.834838210553763e-06, "loss": 0.4135, "step": 4826 }, { "epoch": 0.2896142077158457, "grad_norm": 1.3938674926757812, "learning_rate": 5.834331461895241e-06, "loss": 0.456, "step": 4827 }, { "epoch": 0.28967420651586967, "grad_norm": 1.3898533582687378, "learning_rate": 5.833824625079655e-06, "loss": 0.4085, "step": 4828 }, { "epoch": 0.28973420531589367, "grad_norm": 1.2889617681503296, "learning_rate": 5.833317700126145e-06, "loss": 0.4883, "step": 4829 }, { "epoch": 0.28979420411591766, "grad_norm": 1.3174798488616943, "learning_rate": 5.832810687053858e-06, "loss": 0.4625, "step": 4830 }, { "epoch": 0.28985420291594166, "grad_norm": 1.3081579208374023, "learning_rate": 5.832303585881938e-06, "loss": 0.3975, "step": 4831 }, { "epoch": 0.28991420171596566, "grad_norm": 1.1321027278900146, "learning_rate": 5.831796396629539e-06, "loss": 0.4812, "step": 4832 }, { "epoch": 0.28997420051598966, "grad_norm": 1.3697813749313354, "learning_rate": 5.831289119315812e-06, "loss": 0.4299, "step": 4833 }, { "epoch": 0.29003419931601365, "grad_norm": 1.227150321006775, "learning_rate": 5.830781753959918e-06, "loss": 0.3888, "step": 4834 }, { "epoch": 0.2900941981160377, "grad_norm": 1.2788821458816528, "learning_rate": 5.830274300581015e-06, "loss": 0.4468, "step": 4835 }, { "epoch": 0.2901541969160617, "grad_norm": 1.374084711074829, "learning_rate": 5.829766759198268e-06, "loss": 0.4731, "step": 4836 }, { "epoch": 0.2902141957160857, "grad_norm": 1.2393772602081299, "learning_rate": 5.829259129830847e-06, "loss": 0.4302, "step": 4837 }, { "epoch": 0.2902741945161097, "grad_norm": 1.266872525215149, "learning_rate": 5.8287514124979185e-06, "loss": 0.4551, "step": 4838 }, { "epoch": 0.2903341933161337, "grad_norm": 1.2758443355560303, "learning_rate": 5.82824360721866e-06, "loss": 0.47, "step": 4839 }, { "epoch": 0.2903941921161577, "grad_norm": 1.3347581624984741, "learning_rate": 5.827735714012247e-06, "loss": 0.4274, "step": 4840 }, { "epoch": 0.2904541909161817, "grad_norm": 1.2119184732437134, "learning_rate": 5.827227732897863e-06, "loss": 0.4204, "step": 4841 }, { "epoch": 0.2905141897162057, "grad_norm": 1.1168224811553955, "learning_rate": 5.826719663894688e-06, "loss": 0.4015, "step": 4842 }, { "epoch": 0.2905741885162297, "grad_norm": 1.2236477136611938, "learning_rate": 5.826211507021913e-06, "loss": 0.4141, "step": 4843 }, { "epoch": 0.2906341873162537, "grad_norm": 1.3111393451690674, "learning_rate": 5.825703262298728e-06, "loss": 0.4033, "step": 4844 }, { "epoch": 0.2906941861162777, "grad_norm": 1.2685115337371826, "learning_rate": 5.825194929744326e-06, "loss": 0.4202, "step": 4845 }, { "epoch": 0.2907541849163017, "grad_norm": 1.2904306650161743, "learning_rate": 5.824686509377906e-06, "loss": 0.4094, "step": 4846 }, { "epoch": 0.29081418371632567, "grad_norm": 1.3496705293655396, "learning_rate": 5.824178001218666e-06, "loss": 0.4471, "step": 4847 }, { "epoch": 0.29087418251634967, "grad_norm": 1.1787341833114624, "learning_rate": 5.823669405285814e-06, "loss": 0.4286, "step": 4848 }, { "epoch": 0.29093418131637366, "grad_norm": 1.252634882926941, "learning_rate": 5.823160721598553e-06, "loss": 0.4538, "step": 4849 }, { "epoch": 0.29099418011639766, "grad_norm": 1.2305457592010498, "learning_rate": 5.822651950176098e-06, "loss": 0.4466, "step": 4850 }, { "epoch": 0.29105417891642166, "grad_norm": 1.3033359050750732, "learning_rate": 5.822143091037659e-06, "loss": 0.4786, "step": 4851 }, { "epoch": 0.29111417771644565, "grad_norm": 1.2677457332611084, "learning_rate": 5.821634144202456e-06, "loss": 0.4494, "step": 4852 }, { "epoch": 0.29117417651646965, "grad_norm": 1.2917464971542358, "learning_rate": 5.821125109689709e-06, "loss": 0.4352, "step": 4853 }, { "epoch": 0.29123417531649365, "grad_norm": 1.2623820304870605, "learning_rate": 5.820615987518641e-06, "loss": 0.3885, "step": 4854 }, { "epoch": 0.29129417411651765, "grad_norm": 1.3567142486572266, "learning_rate": 5.820106777708481e-06, "loss": 0.4646, "step": 4855 }, { "epoch": 0.29135417291654164, "grad_norm": 1.342551589012146, "learning_rate": 5.8195974802784565e-06, "loss": 0.4425, "step": 4856 }, { "epoch": 0.2914141717165657, "grad_norm": 1.324535846710205, "learning_rate": 5.819088095247805e-06, "loss": 0.4992, "step": 4857 }, { "epoch": 0.2914741705165897, "grad_norm": 1.3314820528030396, "learning_rate": 5.81857862263576e-06, "loss": 0.4813, "step": 4858 }, { "epoch": 0.2915341693166137, "grad_norm": 1.282073736190796, "learning_rate": 5.818069062461565e-06, "loss": 0.4442, "step": 4859 }, { "epoch": 0.2915941681166377, "grad_norm": 1.2781139612197876, "learning_rate": 5.817559414744463e-06, "loss": 0.4618, "step": 4860 }, { "epoch": 0.2916541669166617, "grad_norm": 1.141025424003601, "learning_rate": 5.8170496795037e-06, "loss": 0.4121, "step": 4861 }, { "epoch": 0.2917141657166857, "grad_norm": 1.2424036264419556, "learning_rate": 5.816539856758527e-06, "loss": 0.4474, "step": 4862 }, { "epoch": 0.2917741645167097, "grad_norm": 1.2245131731033325, "learning_rate": 5.816029946528198e-06, "loss": 0.4145, "step": 4863 }, { "epoch": 0.2918341633167337, "grad_norm": 1.2637745141983032, "learning_rate": 5.815519948831971e-06, "loss": 0.4336, "step": 4864 }, { "epoch": 0.29189416211675767, "grad_norm": 1.2765491008758545, "learning_rate": 5.815009863689103e-06, "loss": 0.4332, "step": 4865 }, { "epoch": 0.29195416091678167, "grad_norm": 1.313607096672058, "learning_rate": 5.8144996911188606e-06, "loss": 0.4289, "step": 4866 }, { "epoch": 0.29201415971680567, "grad_norm": 1.2156800031661987, "learning_rate": 5.813989431140509e-06, "loss": 0.4058, "step": 4867 }, { "epoch": 0.29207415851682966, "grad_norm": 1.4010226726531982, "learning_rate": 5.81347908377332e-06, "loss": 0.4592, "step": 4868 }, { "epoch": 0.29213415731685366, "grad_norm": 1.323175311088562, "learning_rate": 5.812968649036566e-06, "loss": 0.4723, "step": 4869 }, { "epoch": 0.29219415611687766, "grad_norm": 1.2690294981002808, "learning_rate": 5.812458126949523e-06, "loss": 0.4777, "step": 4870 }, { "epoch": 0.29225415491690165, "grad_norm": 1.236989974975586, "learning_rate": 5.811947517531472e-06, "loss": 0.376, "step": 4871 }, { "epoch": 0.29231415371692565, "grad_norm": 1.1894422769546509, "learning_rate": 5.811436820801697e-06, "loss": 0.4578, "step": 4872 }, { "epoch": 0.29237415251694965, "grad_norm": 1.5738033056259155, "learning_rate": 5.810926036779485e-06, "loss": 0.4628, "step": 4873 }, { "epoch": 0.29243415131697365, "grad_norm": 1.0640432834625244, "learning_rate": 5.810415165484124e-06, "loss": 0.3823, "step": 4874 }, { "epoch": 0.29249415011699764, "grad_norm": 1.1535427570343018, "learning_rate": 5.80990420693491e-06, "loss": 0.4138, "step": 4875 }, { "epoch": 0.29255414891702164, "grad_norm": 1.2696168422698975, "learning_rate": 5.809393161151138e-06, "loss": 0.4473, "step": 4876 }, { "epoch": 0.29261414771704564, "grad_norm": 1.2312630414962769, "learning_rate": 5.808882028152106e-06, "loss": 0.4207, "step": 4877 }, { "epoch": 0.29267414651706963, "grad_norm": 1.2696559429168701, "learning_rate": 5.80837080795712e-06, "loss": 0.4746, "step": 4878 }, { "epoch": 0.2927341453170937, "grad_norm": 1.2336437702178955, "learning_rate": 5.807859500585485e-06, "loss": 0.4644, "step": 4879 }, { "epoch": 0.2927941441171177, "grad_norm": 1.297849178314209, "learning_rate": 5.8073481060565124e-06, "loss": 0.4417, "step": 4880 }, { "epoch": 0.2928541429171417, "grad_norm": 1.2101659774780273, "learning_rate": 5.806836624389513e-06, "loss": 0.4148, "step": 4881 }, { "epoch": 0.2929141417171657, "grad_norm": 1.2577269077301025, "learning_rate": 5.806325055603804e-06, "loss": 0.4954, "step": 4882 }, { "epoch": 0.2929741405171897, "grad_norm": 1.3486309051513672, "learning_rate": 5.805813399718706e-06, "loss": 0.4577, "step": 4883 }, { "epoch": 0.29303413931721367, "grad_norm": 1.1753833293914795, "learning_rate": 5.80530165675354e-06, "loss": 0.433, "step": 4884 }, { "epoch": 0.29309413811723767, "grad_norm": 1.2538845539093018, "learning_rate": 5.804789826727635e-06, "loss": 0.4578, "step": 4885 }, { "epoch": 0.29315413691726167, "grad_norm": 1.2276122570037842, "learning_rate": 5.804277909660319e-06, "loss": 0.4268, "step": 4886 }, { "epoch": 0.29321413571728566, "grad_norm": 1.1506521701812744, "learning_rate": 5.803765905570924e-06, "loss": 0.443, "step": 4887 }, { "epoch": 0.29327413451730966, "grad_norm": 1.3774570226669312, "learning_rate": 5.803253814478785e-06, "loss": 0.4913, "step": 4888 }, { "epoch": 0.29333413331733366, "grad_norm": 1.136613130569458, "learning_rate": 5.802741636403245e-06, "loss": 0.4012, "step": 4889 }, { "epoch": 0.29339413211735765, "grad_norm": 1.290064811706543, "learning_rate": 5.802229371363644e-06, "loss": 0.4226, "step": 4890 }, { "epoch": 0.29345413091738165, "grad_norm": 1.28272545337677, "learning_rate": 5.801717019379328e-06, "loss": 0.4588, "step": 4891 }, { "epoch": 0.29351412971740565, "grad_norm": 1.2698920965194702, "learning_rate": 5.801204580469648e-06, "loss": 0.4943, "step": 4892 }, { "epoch": 0.29357412851742964, "grad_norm": 1.218355417251587, "learning_rate": 5.8006920546539556e-06, "loss": 0.4411, "step": 4893 }, { "epoch": 0.29363412731745364, "grad_norm": 1.3303260803222656, "learning_rate": 5.8001794419516045e-06, "loss": 0.4849, "step": 4894 }, { "epoch": 0.29369412611747764, "grad_norm": 1.343730092048645, "learning_rate": 5.799666742381956e-06, "loss": 0.4109, "step": 4895 }, { "epoch": 0.29375412491750164, "grad_norm": 1.267769694328308, "learning_rate": 5.7991539559643735e-06, "loss": 0.4714, "step": 4896 }, { "epoch": 0.29381412371752563, "grad_norm": 1.3420649766921997, "learning_rate": 5.798641082718218e-06, "loss": 0.4443, "step": 4897 }, { "epoch": 0.29387412251754963, "grad_norm": 1.0407496690750122, "learning_rate": 5.798128122662864e-06, "loss": 0.3709, "step": 4898 }, { "epoch": 0.2939341213175736, "grad_norm": 1.421331763267517, "learning_rate": 5.797615075817681e-06, "loss": 0.4256, "step": 4899 }, { "epoch": 0.2939941201175976, "grad_norm": 1.3521332740783691, "learning_rate": 5.797101942202043e-06, "loss": 0.47, "step": 4900 }, { "epoch": 0.2940541189176216, "grad_norm": 1.0591782331466675, "learning_rate": 5.79658872183533e-06, "loss": 0.4158, "step": 4901 }, { "epoch": 0.2941141177176457, "grad_norm": 1.23531973361969, "learning_rate": 5.796075414736926e-06, "loss": 0.4482, "step": 4902 }, { "epoch": 0.29417411651766967, "grad_norm": 1.1920233964920044, "learning_rate": 5.7955620209262135e-06, "loss": 0.4324, "step": 4903 }, { "epoch": 0.29423411531769367, "grad_norm": 1.416912317276001, "learning_rate": 5.795048540422582e-06, "loss": 0.512, "step": 4904 }, { "epoch": 0.29429411411771766, "grad_norm": 1.1869099140167236, "learning_rate": 5.7945349732454234e-06, "loss": 0.4358, "step": 4905 }, { "epoch": 0.29435411291774166, "grad_norm": 1.2891525030136108, "learning_rate": 5.794021319414132e-06, "loss": 0.4637, "step": 4906 }, { "epoch": 0.29441411171776566, "grad_norm": 1.3391625881195068, "learning_rate": 5.793507578948108e-06, "loss": 0.4758, "step": 4907 }, { "epoch": 0.29447411051778966, "grad_norm": 1.3712528944015503, "learning_rate": 5.792993751866752e-06, "loss": 0.4034, "step": 4908 }, { "epoch": 0.29453410931781365, "grad_norm": 1.2423313856124878, "learning_rate": 5.7924798381894685e-06, "loss": 0.4238, "step": 4909 }, { "epoch": 0.29459410811783765, "grad_norm": 1.3274197578430176, "learning_rate": 5.791965837935666e-06, "loss": 0.4538, "step": 4910 }, { "epoch": 0.29465410691786165, "grad_norm": 1.2627553939819336, "learning_rate": 5.791451751124756e-06, "loss": 0.4293, "step": 4911 }, { "epoch": 0.29471410571788564, "grad_norm": 1.2922611236572266, "learning_rate": 5.7909375777761525e-06, "loss": 0.431, "step": 4912 }, { "epoch": 0.29477410451790964, "grad_norm": 1.246665358543396, "learning_rate": 5.790423317909276e-06, "loss": 0.4945, "step": 4913 }, { "epoch": 0.29483410331793364, "grad_norm": 1.1618181467056274, "learning_rate": 5.789908971543544e-06, "loss": 0.4055, "step": 4914 }, { "epoch": 0.29489410211795763, "grad_norm": 1.3964951038360596, "learning_rate": 5.789394538698384e-06, "loss": 0.4448, "step": 4915 }, { "epoch": 0.29495410091798163, "grad_norm": 1.2340561151504517, "learning_rate": 5.788880019393222e-06, "loss": 0.4397, "step": 4916 }, { "epoch": 0.29501409971800563, "grad_norm": 1.2495664358139038, "learning_rate": 5.788365413647491e-06, "loss": 0.4633, "step": 4917 }, { "epoch": 0.2950740985180296, "grad_norm": 1.2715952396392822, "learning_rate": 5.787850721480622e-06, "loss": 0.489, "step": 4918 }, { "epoch": 0.2951340973180536, "grad_norm": 1.474686622619629, "learning_rate": 5.787335942912056e-06, "loss": 0.466, "step": 4919 }, { "epoch": 0.2951940961180776, "grad_norm": 1.2299206256866455, "learning_rate": 5.786821077961233e-06, "loss": 0.4431, "step": 4920 }, { "epoch": 0.2952540949181016, "grad_norm": 1.2679795026779175, "learning_rate": 5.786306126647596e-06, "loss": 0.5462, "step": 4921 }, { "epoch": 0.2953140937181256, "grad_norm": 1.3433043956756592, "learning_rate": 5.785791088990594e-06, "loss": 0.4419, "step": 4922 }, { "epoch": 0.2953740925181496, "grad_norm": 1.1423451900482178, "learning_rate": 5.785275965009674e-06, "loss": 0.395, "step": 4923 }, { "epoch": 0.29543409131817366, "grad_norm": 1.263593316078186, "learning_rate": 5.784760754724295e-06, "loss": 0.5067, "step": 4924 }, { "epoch": 0.29549409011819766, "grad_norm": 1.2386358976364136, "learning_rate": 5.78424545815391e-06, "loss": 0.4331, "step": 4925 }, { "epoch": 0.29555408891822166, "grad_norm": 1.2279064655303955, "learning_rate": 5.783730075317982e-06, "loss": 0.4497, "step": 4926 }, { "epoch": 0.29561408771824566, "grad_norm": 1.290889859199524, "learning_rate": 5.783214606235974e-06, "loss": 0.4645, "step": 4927 }, { "epoch": 0.29567408651826965, "grad_norm": 1.164631962776184, "learning_rate": 5.782699050927352e-06, "loss": 0.3894, "step": 4928 }, { "epoch": 0.29573408531829365, "grad_norm": 1.2154821157455444, "learning_rate": 5.782183409411587e-06, "loss": 0.4349, "step": 4929 }, { "epoch": 0.29579408411831765, "grad_norm": 1.3351019620895386, "learning_rate": 5.781667681708152e-06, "loss": 0.4241, "step": 4930 }, { "epoch": 0.29585408291834164, "grad_norm": 1.32025146484375, "learning_rate": 5.7811518678365245e-06, "loss": 0.4804, "step": 4931 }, { "epoch": 0.29591408171836564, "grad_norm": 1.2006251811981201, "learning_rate": 5.780635967816183e-06, "loss": 0.4342, "step": 4932 }, { "epoch": 0.29597408051838964, "grad_norm": 1.1806215047836304, "learning_rate": 5.780119981666612e-06, "loss": 0.4219, "step": 4933 }, { "epoch": 0.29603407931841363, "grad_norm": 1.20438551902771, "learning_rate": 5.779603909407298e-06, "loss": 0.4632, "step": 4934 }, { "epoch": 0.29609407811843763, "grad_norm": 1.2569459676742554, "learning_rate": 5.779087751057729e-06, "loss": 0.4431, "step": 4935 }, { "epoch": 0.29615407691846163, "grad_norm": 1.2553913593292236, "learning_rate": 5.7785715066374006e-06, "loss": 0.4273, "step": 4936 }, { "epoch": 0.2962140757184856, "grad_norm": 1.2821108102798462, "learning_rate": 5.7780551761658065e-06, "loss": 0.4549, "step": 4937 }, { "epoch": 0.2962740745185096, "grad_norm": 1.2543500661849976, "learning_rate": 5.7775387596624465e-06, "loss": 0.4494, "step": 4938 }, { "epoch": 0.2963340733185336, "grad_norm": 1.1096010208129883, "learning_rate": 5.777022257146826e-06, "loss": 0.3777, "step": 4939 }, { "epoch": 0.2963940721185576, "grad_norm": 1.3586030006408691, "learning_rate": 5.776505668638448e-06, "loss": 0.5194, "step": 4940 }, { "epoch": 0.2964540709185816, "grad_norm": 1.3387778997421265, "learning_rate": 5.775988994156822e-06, "loss": 0.4387, "step": 4941 }, { "epoch": 0.2965140697186056, "grad_norm": 1.2737782001495361, "learning_rate": 5.7754722337214616e-06, "loss": 0.4625, "step": 4942 }, { "epoch": 0.2965740685186296, "grad_norm": 1.2379791736602783, "learning_rate": 5.774955387351882e-06, "loss": 0.4304, "step": 4943 }, { "epoch": 0.2966340673186536, "grad_norm": 1.2667481899261475, "learning_rate": 5.774438455067602e-06, "loss": 0.4666, "step": 4944 }, { "epoch": 0.2966940661186776, "grad_norm": 1.2712453603744507, "learning_rate": 5.773921436888144e-06, "loss": 0.4645, "step": 4945 }, { "epoch": 0.2967540649187016, "grad_norm": 1.3061894178390503, "learning_rate": 5.773404332833033e-06, "loss": 0.5132, "step": 4946 }, { "epoch": 0.29681406371872565, "grad_norm": 1.3309282064437866, "learning_rate": 5.772887142921799e-06, "loss": 0.4516, "step": 4947 }, { "epoch": 0.29687406251874965, "grad_norm": 1.2832883596420288, "learning_rate": 5.772369867173971e-06, "loss": 0.4505, "step": 4948 }, { "epoch": 0.29693406131877365, "grad_norm": 1.2690097093582153, "learning_rate": 5.7718525056090876e-06, "loss": 0.4302, "step": 4949 }, { "epoch": 0.29699406011879764, "grad_norm": 1.2825607061386108, "learning_rate": 5.771335058246685e-06, "loss": 0.4542, "step": 4950 }, { "epoch": 0.29705405891882164, "grad_norm": 1.3088197708129883, "learning_rate": 5.770817525106305e-06, "loss": 0.4489, "step": 4951 }, { "epoch": 0.29711405771884564, "grad_norm": 1.1888201236724854, "learning_rate": 5.770299906207494e-06, "loss": 0.4571, "step": 4952 }, { "epoch": 0.29717405651886963, "grad_norm": 1.2165571451187134, "learning_rate": 5.769782201569798e-06, "loss": 0.3955, "step": 4953 }, { "epoch": 0.29723405531889363, "grad_norm": 1.291719913482666, "learning_rate": 5.769264411212769e-06, "loss": 0.4466, "step": 4954 }, { "epoch": 0.29729405411891763, "grad_norm": 1.2859950065612793, "learning_rate": 5.768746535155964e-06, "loss": 0.4116, "step": 4955 }, { "epoch": 0.2973540529189416, "grad_norm": 1.3291312456130981, "learning_rate": 5.768228573418937e-06, "loss": 0.3885, "step": 4956 }, { "epoch": 0.2974140517189656, "grad_norm": 1.2716591358184814, "learning_rate": 5.7677105260212505e-06, "loss": 0.421, "step": 4957 }, { "epoch": 0.2974740505189896, "grad_norm": 1.2802650928497314, "learning_rate": 5.76719239298247e-06, "loss": 0.4008, "step": 4958 }, { "epoch": 0.2975340493190136, "grad_norm": 1.2434757947921753, "learning_rate": 5.766674174322162e-06, "loss": 0.4144, "step": 4959 }, { "epoch": 0.2975940481190376, "grad_norm": 1.233764886856079, "learning_rate": 5.766155870059897e-06, "loss": 0.4698, "step": 4960 }, { "epoch": 0.2976540469190616, "grad_norm": 1.1605572700500488, "learning_rate": 5.76563748021525e-06, "loss": 0.4322, "step": 4961 }, { "epoch": 0.2977140457190856, "grad_norm": 1.2864155769348145, "learning_rate": 5.765119004807797e-06, "loss": 0.4833, "step": 4962 }, { "epoch": 0.2977740445191096, "grad_norm": 1.4189709424972534, "learning_rate": 5.76460044385712e-06, "loss": 0.4539, "step": 4963 }, { "epoch": 0.2978340433191336, "grad_norm": 1.4032907485961914, "learning_rate": 5.764081797382799e-06, "loss": 0.4695, "step": 4964 }, { "epoch": 0.2978940421191576, "grad_norm": 1.2890949249267578, "learning_rate": 5.763563065404426e-06, "loss": 0.4684, "step": 4965 }, { "epoch": 0.2979540409191816, "grad_norm": 1.1766276359558105, "learning_rate": 5.76304424794159e-06, "loss": 0.4177, "step": 4966 }, { "epoch": 0.2980140397192056, "grad_norm": 1.183281421661377, "learning_rate": 5.762525345013881e-06, "loss": 0.4433, "step": 4967 }, { "epoch": 0.2980740385192296, "grad_norm": 1.189733862876892, "learning_rate": 5.762006356640898e-06, "loss": 0.442, "step": 4968 }, { "epoch": 0.29813403731925364, "grad_norm": 1.2561571598052979, "learning_rate": 5.761487282842241e-06, "loss": 0.5092, "step": 4969 }, { "epoch": 0.29819403611927764, "grad_norm": 1.2422431707382202, "learning_rate": 5.760968123637513e-06, "loss": 0.4485, "step": 4970 }, { "epoch": 0.29825403491930164, "grad_norm": 1.31088387966156, "learning_rate": 5.76044887904632e-06, "loss": 0.435, "step": 4971 }, { "epoch": 0.29831403371932563, "grad_norm": 1.310156226158142, "learning_rate": 5.75992954908827e-06, "loss": 0.4449, "step": 4972 }, { "epoch": 0.29837403251934963, "grad_norm": 1.326781988143921, "learning_rate": 5.759410133782978e-06, "loss": 0.4623, "step": 4973 }, { "epoch": 0.2984340313193736, "grad_norm": 1.2847493886947632, "learning_rate": 5.758890633150059e-06, "loss": 0.4526, "step": 4974 }, { "epoch": 0.2984940301193976, "grad_norm": 1.2514666318893433, "learning_rate": 5.758371047209134e-06, "loss": 0.4056, "step": 4975 }, { "epoch": 0.2985540289194216, "grad_norm": 1.3355357646942139, "learning_rate": 5.7578513759798205e-06, "loss": 0.4632, "step": 4976 }, { "epoch": 0.2986140277194456, "grad_norm": 1.2125524282455444, "learning_rate": 5.7573316194817495e-06, "loss": 0.4123, "step": 4977 }, { "epoch": 0.2986740265194696, "grad_norm": 1.2966012954711914, "learning_rate": 5.756811777734547e-06, "loss": 0.4253, "step": 4978 }, { "epoch": 0.2987340253194936, "grad_norm": 1.2910441160202026, "learning_rate": 5.756291850757846e-06, "loss": 0.4344, "step": 4979 }, { "epoch": 0.2987940241195176, "grad_norm": 1.2098549604415894, "learning_rate": 5.755771838571281e-06, "loss": 0.4364, "step": 4980 }, { "epoch": 0.2988540229195416, "grad_norm": 1.4456521272659302, "learning_rate": 5.7552517411944906e-06, "loss": 0.494, "step": 4981 }, { "epoch": 0.2989140217195656, "grad_norm": 1.1640211343765259, "learning_rate": 5.754731558647117e-06, "loss": 0.4137, "step": 4982 }, { "epoch": 0.2989740205195896, "grad_norm": 1.491205096244812, "learning_rate": 5.754211290948805e-06, "loss": 0.4317, "step": 4983 }, { "epoch": 0.2990340193196136, "grad_norm": 1.3708326816558838, "learning_rate": 5.753690938119203e-06, "loss": 0.519, "step": 4984 }, { "epoch": 0.2990940181196376, "grad_norm": 1.2956324815750122, "learning_rate": 5.753170500177962e-06, "loss": 0.3941, "step": 4985 }, { "epoch": 0.2991540169196616, "grad_norm": 1.4085441827774048, "learning_rate": 5.752649977144737e-06, "loss": 0.457, "step": 4986 }, { "epoch": 0.2992140157196856, "grad_norm": 1.2991443872451782, "learning_rate": 5.752129369039185e-06, "loss": 0.4602, "step": 4987 }, { "epoch": 0.2992740145197096, "grad_norm": 1.32611083984375, "learning_rate": 5.751608675880967e-06, "loss": 0.442, "step": 4988 }, { "epoch": 0.2993340133197336, "grad_norm": 1.2095354795455933, "learning_rate": 5.751087897689748e-06, "loss": 0.4526, "step": 4989 }, { "epoch": 0.2993940121197576, "grad_norm": 1.2596477270126343, "learning_rate": 5.750567034485195e-06, "loss": 0.4228, "step": 4990 }, { "epoch": 0.2994540109197816, "grad_norm": 1.20638906955719, "learning_rate": 5.750046086286979e-06, "loss": 0.4758, "step": 4991 }, { "epoch": 0.29951400971980563, "grad_norm": 1.2478246688842773, "learning_rate": 5.749525053114773e-06, "loss": 0.4558, "step": 4992 }, { "epoch": 0.2995740085198296, "grad_norm": 1.2215571403503418, "learning_rate": 5.749003934988255e-06, "loss": 0.446, "step": 4993 }, { "epoch": 0.2996340073198536, "grad_norm": 1.3342517614364624, "learning_rate": 5.748482731927104e-06, "loss": 0.4453, "step": 4994 }, { "epoch": 0.2996940061198776, "grad_norm": 1.2649812698364258, "learning_rate": 5.747961443951005e-06, "loss": 0.3992, "step": 4995 }, { "epoch": 0.2997540049199016, "grad_norm": 1.3268123865127563, "learning_rate": 5.7474400710796435e-06, "loss": 0.4115, "step": 4996 }, { "epoch": 0.2998140037199256, "grad_norm": 1.2618584632873535, "learning_rate": 5.746918613332711e-06, "loss": 0.4322, "step": 4997 }, { "epoch": 0.2998740025199496, "grad_norm": 1.3545119762420654, "learning_rate": 5.746397070729898e-06, "loss": 0.4409, "step": 4998 }, { "epoch": 0.2999340013199736, "grad_norm": 1.2611013650894165, "learning_rate": 5.745875443290903e-06, "loss": 0.4693, "step": 4999 }, { "epoch": 0.2999940001199976, "grad_norm": 1.3104777336120605, "learning_rate": 5.745353731035425e-06, "loss": 0.4678, "step": 5000 }, { "epoch": 0.3000539989200216, "grad_norm": 1.295926809310913, "learning_rate": 5.744831933983166e-06, "loss": 0.467, "step": 5001 }, { "epoch": 0.3001139977200456, "grad_norm": 1.180582880973816, "learning_rate": 5.744310052153832e-06, "loss": 0.3748, "step": 5002 }, { "epoch": 0.3001739965200696, "grad_norm": 1.4001072645187378, "learning_rate": 5.7437880855671324e-06, "loss": 0.4422, "step": 5003 }, { "epoch": 0.3002339953200936, "grad_norm": 1.8040845394134521, "learning_rate": 5.74326603424278e-06, "loss": 0.4716, "step": 5004 }, { "epoch": 0.3002939941201176, "grad_norm": 1.3574548959732056, "learning_rate": 5.742743898200488e-06, "loss": 0.4962, "step": 5005 }, { "epoch": 0.3003539929201416, "grad_norm": 1.153702735900879, "learning_rate": 5.742221677459978e-06, "loss": 0.3553, "step": 5006 }, { "epoch": 0.3004139917201656, "grad_norm": 1.2048710584640503, "learning_rate": 5.741699372040972e-06, "loss": 0.4596, "step": 5007 }, { "epoch": 0.3004739905201896, "grad_norm": 1.4398515224456787, "learning_rate": 5.741176981963193e-06, "loss": 0.544, "step": 5008 }, { "epoch": 0.3005339893202136, "grad_norm": 1.3590220212936401, "learning_rate": 5.74065450724637e-06, "loss": 0.4762, "step": 5009 }, { "epoch": 0.3005939881202376, "grad_norm": 1.166369080543518, "learning_rate": 5.740131947910235e-06, "loss": 0.3799, "step": 5010 }, { "epoch": 0.3006539869202616, "grad_norm": 1.2997255325317383, "learning_rate": 5.739609303974521e-06, "loss": 0.4174, "step": 5011 }, { "epoch": 0.30071398572028557, "grad_norm": 1.1804770231246948, "learning_rate": 5.739086575458968e-06, "loss": 0.428, "step": 5012 }, { "epoch": 0.30077398452030957, "grad_norm": 1.2453982830047607, "learning_rate": 5.738563762383317e-06, "loss": 0.4299, "step": 5013 }, { "epoch": 0.3008339833203336, "grad_norm": 1.3279472589492798, "learning_rate": 5.738040864767311e-06, "loss": 0.5, "step": 5014 }, { "epoch": 0.3008939821203576, "grad_norm": 1.205245852470398, "learning_rate": 5.737517882630699e-06, "loss": 0.4707, "step": 5015 }, { "epoch": 0.3009539809203816, "grad_norm": 1.3916480541229248, "learning_rate": 5.736994815993229e-06, "loss": 0.4835, "step": 5016 }, { "epoch": 0.3010139797204056, "grad_norm": 1.343602180480957, "learning_rate": 5.736471664874658e-06, "loss": 0.4466, "step": 5017 }, { "epoch": 0.3010739785204296, "grad_norm": 1.1987059116363525, "learning_rate": 5.73594842929474e-06, "loss": 0.4921, "step": 5018 }, { "epoch": 0.3011339773204536, "grad_norm": 1.1214064359664917, "learning_rate": 5.735425109273238e-06, "loss": 0.3844, "step": 5019 }, { "epoch": 0.3011939761204776, "grad_norm": 1.2093793153762817, "learning_rate": 5.734901704829914e-06, "loss": 0.3958, "step": 5020 }, { "epoch": 0.3012539749205016, "grad_norm": 1.3320120573043823, "learning_rate": 5.734378215984534e-06, "loss": 0.4698, "step": 5021 }, { "epoch": 0.3013139737205256, "grad_norm": 1.3102341890335083, "learning_rate": 5.73385464275687e-06, "loss": 0.4214, "step": 5022 }, { "epoch": 0.3013739725205496, "grad_norm": 1.4325112104415894, "learning_rate": 5.7333309851666926e-06, "loss": 0.4804, "step": 5023 }, { "epoch": 0.3014339713205736, "grad_norm": 1.3721989393234253, "learning_rate": 5.732807243233779e-06, "loss": 0.4379, "step": 5024 }, { "epoch": 0.3014939701205976, "grad_norm": 1.2638270854949951, "learning_rate": 5.732283416977909e-06, "loss": 0.4044, "step": 5025 }, { "epoch": 0.3015539689206216, "grad_norm": 1.3288124799728394, "learning_rate": 5.731759506418865e-06, "loss": 0.5123, "step": 5026 }, { "epoch": 0.3016139677206456, "grad_norm": 1.4654325246810913, "learning_rate": 5.731235511576431e-06, "loss": 0.4939, "step": 5027 }, { "epoch": 0.3016739665206696, "grad_norm": 1.384597897529602, "learning_rate": 5.730711432470398e-06, "loss": 0.4611, "step": 5028 }, { "epoch": 0.3017339653206936, "grad_norm": 1.1505223512649536, "learning_rate": 5.730187269120557e-06, "loss": 0.4675, "step": 5029 }, { "epoch": 0.3017939641207176, "grad_norm": 1.2819008827209473, "learning_rate": 5.729663021546704e-06, "loss": 0.4579, "step": 5030 }, { "epoch": 0.30185396292074157, "grad_norm": 1.248149037361145, "learning_rate": 5.729138689768636e-06, "loss": 0.4408, "step": 5031 }, { "epoch": 0.30191396172076557, "grad_norm": 1.173696756362915, "learning_rate": 5.728614273806157e-06, "loss": 0.4637, "step": 5032 }, { "epoch": 0.30197396052078956, "grad_norm": 1.3709032535552979, "learning_rate": 5.728089773679069e-06, "loss": 0.4553, "step": 5033 }, { "epoch": 0.30203395932081356, "grad_norm": 1.0940897464752197, "learning_rate": 5.727565189407182e-06, "loss": 0.4207, "step": 5034 }, { "epoch": 0.30209395812083756, "grad_norm": 1.334747314453125, "learning_rate": 5.727040521010307e-06, "loss": 0.409, "step": 5035 }, { "epoch": 0.3021539569208616, "grad_norm": 1.242241382598877, "learning_rate": 5.726515768508257e-06, "loss": 0.4225, "step": 5036 }, { "epoch": 0.3022139557208856, "grad_norm": 1.1895326375961304, "learning_rate": 5.725990931920851e-06, "loss": 0.3932, "step": 5037 }, { "epoch": 0.3022739545209096, "grad_norm": 1.3097048997879028, "learning_rate": 5.725466011267909e-06, "loss": 0.4478, "step": 5038 }, { "epoch": 0.3023339533209336, "grad_norm": 1.167309284210205, "learning_rate": 5.724941006569255e-06, "loss": 0.4226, "step": 5039 }, { "epoch": 0.3023939521209576, "grad_norm": 1.241195797920227, "learning_rate": 5.724415917844716e-06, "loss": 0.4188, "step": 5040 }, { "epoch": 0.3024539509209816, "grad_norm": 1.1619794368743896, "learning_rate": 5.723890745114122e-06, "loss": 0.3878, "step": 5041 }, { "epoch": 0.3025139497210056, "grad_norm": 1.1821173429489136, "learning_rate": 5.723365488397306e-06, "loss": 0.4234, "step": 5042 }, { "epoch": 0.3025739485210296, "grad_norm": 1.0805859565734863, "learning_rate": 5.722840147714106e-06, "loss": 0.368, "step": 5043 }, { "epoch": 0.3026339473210536, "grad_norm": 1.4630873203277588, "learning_rate": 5.7223147230843615e-06, "loss": 0.4411, "step": 5044 }, { "epoch": 0.3026939461210776, "grad_norm": 1.3088526725769043, "learning_rate": 5.721789214527914e-06, "loss": 0.5186, "step": 5045 }, { "epoch": 0.3027539449211016, "grad_norm": 1.275364875793457, "learning_rate": 5.721263622064609e-06, "loss": 0.4297, "step": 5046 }, { "epoch": 0.3028139437211256, "grad_norm": 1.1251572370529175, "learning_rate": 5.7207379457142995e-06, "loss": 0.4316, "step": 5047 }, { "epoch": 0.3028739425211496, "grad_norm": 1.2822245359420776, "learning_rate": 5.720212185496834e-06, "loss": 0.4015, "step": 5048 }, { "epoch": 0.30293394132117357, "grad_norm": 1.1210391521453857, "learning_rate": 5.71968634143207e-06, "loss": 0.3809, "step": 5049 }, { "epoch": 0.30299394012119757, "grad_norm": 1.1201097965240479, "learning_rate": 5.7191604135398654e-06, "loss": 0.408, "step": 5050 }, { "epoch": 0.30305393892122157, "grad_norm": 1.268182396888733, "learning_rate": 5.7186344018400826e-06, "loss": 0.3988, "step": 5051 }, { "epoch": 0.30311393772124556, "grad_norm": 1.193556547164917, "learning_rate": 5.7181083063525866e-06, "loss": 0.3878, "step": 5052 }, { "epoch": 0.30317393652126956, "grad_norm": 1.1998220682144165, "learning_rate": 5.7175821270972455e-06, "loss": 0.4357, "step": 5053 }, { "epoch": 0.30323393532129356, "grad_norm": 1.2280611991882324, "learning_rate": 5.717055864093931e-06, "loss": 0.4067, "step": 5054 }, { "epoch": 0.30329393412131755, "grad_norm": 1.274827241897583, "learning_rate": 5.716529517362516e-06, "loss": 0.4345, "step": 5055 }, { "epoch": 0.30335393292134155, "grad_norm": 1.177266240119934, "learning_rate": 5.716003086922881e-06, "loss": 0.3818, "step": 5056 }, { "epoch": 0.30341393172136555, "grad_norm": 1.2753825187683105, "learning_rate": 5.7154765727949056e-06, "loss": 0.473, "step": 5057 }, { "epoch": 0.30347393052138955, "grad_norm": 1.2851659059524536, "learning_rate": 5.714949974998473e-06, "loss": 0.3692, "step": 5058 }, { "epoch": 0.3035339293214136, "grad_norm": 1.2608569860458374, "learning_rate": 5.714423293553471e-06, "loss": 0.4451, "step": 5059 }, { "epoch": 0.3035939281214376, "grad_norm": 1.614034652709961, "learning_rate": 5.713896528479791e-06, "loss": 0.4982, "step": 5060 }, { "epoch": 0.3036539269214616, "grad_norm": 1.225503921508789, "learning_rate": 5.713369679797325e-06, "loss": 0.4746, "step": 5061 }, { "epoch": 0.3037139257214856, "grad_norm": 1.1841105222702026, "learning_rate": 5.712842747525971e-06, "loss": 0.4329, "step": 5062 }, { "epoch": 0.3037739245215096, "grad_norm": 1.2893403768539429, "learning_rate": 5.712315731685627e-06, "loss": 0.4634, "step": 5063 }, { "epoch": 0.3038339233215336, "grad_norm": 1.2406644821166992, "learning_rate": 5.711788632296197e-06, "loss": 0.4221, "step": 5064 }, { "epoch": 0.3038939221215576, "grad_norm": 1.2440626621246338, "learning_rate": 5.711261449377588e-06, "loss": 0.471, "step": 5065 }, { "epoch": 0.3039539209215816, "grad_norm": 1.2641472816467285, "learning_rate": 5.710734182949709e-06, "loss": 0.4085, "step": 5066 }, { "epoch": 0.3040139197216056, "grad_norm": 1.2789894342422485, "learning_rate": 5.7102068330324715e-06, "loss": 0.4368, "step": 5067 }, { "epoch": 0.30407391852162957, "grad_norm": 1.344092845916748, "learning_rate": 5.709679399645791e-06, "loss": 0.45, "step": 5068 }, { "epoch": 0.30413391732165357, "grad_norm": 1.1867501735687256, "learning_rate": 5.709151882809588e-06, "loss": 0.4173, "step": 5069 }, { "epoch": 0.30419391612167757, "grad_norm": 1.258746862411499, "learning_rate": 5.708624282543782e-06, "loss": 0.4462, "step": 5070 }, { "epoch": 0.30425391492170156, "grad_norm": 1.1302227973937988, "learning_rate": 5.7080965988683004e-06, "loss": 0.3739, "step": 5071 }, { "epoch": 0.30431391372172556, "grad_norm": 1.4148207902908325, "learning_rate": 5.70756883180307e-06, "loss": 0.4537, "step": 5072 }, { "epoch": 0.30437391252174956, "grad_norm": 1.2696319818496704, "learning_rate": 5.7070409813680215e-06, "loss": 0.5236, "step": 5073 }, { "epoch": 0.30443391132177355, "grad_norm": 1.1252968311309814, "learning_rate": 5.706513047583092e-06, "loss": 0.4686, "step": 5074 }, { "epoch": 0.30449391012179755, "grad_norm": 1.2171087265014648, "learning_rate": 5.705985030468216e-06, "loss": 0.4211, "step": 5075 }, { "epoch": 0.30455390892182155, "grad_norm": 1.4532790184020996, "learning_rate": 5.705456930043337e-06, "loss": 0.4736, "step": 5076 }, { "epoch": 0.30461390772184554, "grad_norm": 1.351931095123291, "learning_rate": 5.7049287463283965e-06, "loss": 0.4764, "step": 5077 }, { "epoch": 0.30467390652186954, "grad_norm": 1.2626657485961914, "learning_rate": 5.7044004793433445e-06, "loss": 0.4528, "step": 5078 }, { "epoch": 0.30473390532189354, "grad_norm": 1.1255443096160889, "learning_rate": 5.703872129108129e-06, "loss": 0.3724, "step": 5079 }, { "epoch": 0.30479390412191754, "grad_norm": 1.2317569255828857, "learning_rate": 5.703343695642704e-06, "loss": 0.4401, "step": 5080 }, { "epoch": 0.3048539029219416, "grad_norm": 1.130177617073059, "learning_rate": 5.702815178967026e-06, "loss": 0.3959, "step": 5081 }, { "epoch": 0.3049139017219656, "grad_norm": 1.4201905727386475, "learning_rate": 5.7022865791010546e-06, "loss": 0.4599, "step": 5082 }, { "epoch": 0.3049739005219896, "grad_norm": 1.2748006582260132, "learning_rate": 5.701757896064752e-06, "loss": 0.4038, "step": 5083 }, { "epoch": 0.3050338993220136, "grad_norm": 1.1791456937789917, "learning_rate": 5.701229129878086e-06, "loss": 0.4322, "step": 5084 }, { "epoch": 0.3050938981220376, "grad_norm": 1.2496403455734253, "learning_rate": 5.700700280561024e-06, "loss": 0.4814, "step": 5085 }, { "epoch": 0.3051538969220616, "grad_norm": 1.3591089248657227, "learning_rate": 5.70017134813354e-06, "loss": 0.4438, "step": 5086 }, { "epoch": 0.30521389572208557, "grad_norm": 1.256174087524414, "learning_rate": 5.699642332615606e-06, "loss": 0.4196, "step": 5087 }, { "epoch": 0.30527389452210957, "grad_norm": 1.4343242645263672, "learning_rate": 5.699113234027203e-06, "loss": 0.4471, "step": 5088 }, { "epoch": 0.30533389332213356, "grad_norm": 1.3405112028121948, "learning_rate": 5.698584052388314e-06, "loss": 0.4326, "step": 5089 }, { "epoch": 0.30539389212215756, "grad_norm": 1.0625876188278198, "learning_rate": 5.69805478771892e-06, "loss": 0.3835, "step": 5090 }, { "epoch": 0.30545389092218156, "grad_norm": 1.2606918811798096, "learning_rate": 5.697525440039013e-06, "loss": 0.401, "step": 5091 }, { "epoch": 0.30551388972220556, "grad_norm": 1.1688337326049805, "learning_rate": 5.69699600936858e-06, "loss": 0.424, "step": 5092 }, { "epoch": 0.30557388852222955, "grad_norm": 1.1100473403930664, "learning_rate": 5.696466495727619e-06, "loss": 0.4289, "step": 5093 }, { "epoch": 0.30563388732225355, "grad_norm": 1.4308418035507202, "learning_rate": 5.695936899136125e-06, "loss": 0.4683, "step": 5094 }, { "epoch": 0.30569388612227755, "grad_norm": 1.3086870908737183, "learning_rate": 5.695407219614098e-06, "loss": 0.4516, "step": 5095 }, { "epoch": 0.30575388492230154, "grad_norm": 1.2631546258926392, "learning_rate": 5.694877457181544e-06, "loss": 0.4898, "step": 5096 }, { "epoch": 0.30581388372232554, "grad_norm": 1.333979606628418, "learning_rate": 5.694347611858467e-06, "loss": 0.4345, "step": 5097 }, { "epoch": 0.30587388252234954, "grad_norm": 1.2530313730239868, "learning_rate": 5.693817683664879e-06, "loss": 0.4301, "step": 5098 }, { "epoch": 0.30593388132237354, "grad_norm": 1.3850120306015015, "learning_rate": 5.693287672620792e-06, "loss": 0.4331, "step": 5099 }, { "epoch": 0.30599388012239753, "grad_norm": 1.2303404808044434, "learning_rate": 5.692757578746222e-06, "loss": 0.4473, "step": 5100 }, { "epoch": 0.30605387892242153, "grad_norm": 1.3309847116470337, "learning_rate": 5.6922274020611875e-06, "loss": 0.4563, "step": 5101 }, { "epoch": 0.3061138777224455, "grad_norm": 1.3242961168289185, "learning_rate": 5.691697142585713e-06, "loss": 0.4623, "step": 5102 }, { "epoch": 0.3061738765224695, "grad_norm": 1.1898068189620972, "learning_rate": 5.6911668003398225e-06, "loss": 0.4184, "step": 5103 }, { "epoch": 0.3062338753224936, "grad_norm": 1.3702155351638794, "learning_rate": 5.690636375343545e-06, "loss": 0.4155, "step": 5104 }, { "epoch": 0.3062938741225176, "grad_norm": 1.3595199584960938, "learning_rate": 5.690105867616912e-06, "loss": 0.4774, "step": 5105 }, { "epoch": 0.30635387292254157, "grad_norm": 1.3400541543960571, "learning_rate": 5.689575277179959e-06, "loss": 0.4605, "step": 5106 }, { "epoch": 0.30641387172256557, "grad_norm": 1.3485357761383057, "learning_rate": 5.689044604052723e-06, "loss": 0.4332, "step": 5107 }, { "epoch": 0.30647387052258956, "grad_norm": 1.4917783737182617, "learning_rate": 5.688513848255246e-06, "loss": 0.4488, "step": 5108 }, { "epoch": 0.30653386932261356, "grad_norm": 1.127943754196167, "learning_rate": 5.68798300980757e-06, "loss": 0.3784, "step": 5109 }, { "epoch": 0.30659386812263756, "grad_norm": 1.1908751726150513, "learning_rate": 5.687452088729746e-06, "loss": 0.4052, "step": 5110 }, { "epoch": 0.30665386692266156, "grad_norm": 1.3179280757904053, "learning_rate": 5.686921085041821e-06, "loss": 0.4544, "step": 5111 }, { "epoch": 0.30671386572268555, "grad_norm": 1.2457916736602783, "learning_rate": 5.6863899987638524e-06, "loss": 0.4096, "step": 5112 }, { "epoch": 0.30677386452270955, "grad_norm": 1.2868131399154663, "learning_rate": 5.685858829915893e-06, "loss": 0.4581, "step": 5113 }, { "epoch": 0.30683386332273355, "grad_norm": 1.312996506690979, "learning_rate": 5.685327578518006e-06, "loss": 0.4253, "step": 5114 }, { "epoch": 0.30689386212275754, "grad_norm": 1.3726649284362793, "learning_rate": 5.68479624459025e-06, "loss": 0.4796, "step": 5115 }, { "epoch": 0.30695386092278154, "grad_norm": 1.4540637731552124, "learning_rate": 5.684264828152695e-06, "loss": 0.4726, "step": 5116 }, { "epoch": 0.30701385972280554, "grad_norm": 1.1051840782165527, "learning_rate": 5.6837333292254094e-06, "loss": 0.395, "step": 5117 }, { "epoch": 0.30707385852282953, "grad_norm": 1.3288103342056274, "learning_rate": 5.683201747828466e-06, "loss": 0.4206, "step": 5118 }, { "epoch": 0.30713385732285353, "grad_norm": 1.2326889038085938, "learning_rate": 5.682670083981937e-06, "loss": 0.4601, "step": 5119 }, { "epoch": 0.30719385612287753, "grad_norm": 1.2419068813323975, "learning_rate": 5.682138337705905e-06, "loss": 0.4909, "step": 5120 }, { "epoch": 0.3072538549229015, "grad_norm": 1.2625337839126587, "learning_rate": 5.681606509020449e-06, "loss": 0.452, "step": 5121 }, { "epoch": 0.3073138537229255, "grad_norm": 1.161468744277954, "learning_rate": 5.6810745979456545e-06, "loss": 0.3666, "step": 5122 }, { "epoch": 0.3073738525229495, "grad_norm": 1.3817533254623413, "learning_rate": 5.68054260450161e-06, "loss": 0.4564, "step": 5123 }, { "epoch": 0.3074338513229735, "grad_norm": 1.2036983966827393, "learning_rate": 5.680010528708405e-06, "loss": 0.4362, "step": 5124 }, { "epoch": 0.3074938501229975, "grad_norm": 1.2800025939941406, "learning_rate": 5.6794783705861355e-06, "loss": 0.4107, "step": 5125 }, { "epoch": 0.30755384892302157, "grad_norm": 1.138386845588684, "learning_rate": 5.678946130154896e-06, "loss": 0.433, "step": 5126 }, { "epoch": 0.30761384772304556, "grad_norm": 1.234534502029419, "learning_rate": 5.67841380743479e-06, "loss": 0.4311, "step": 5127 }, { "epoch": 0.30767384652306956, "grad_norm": 1.273558497428894, "learning_rate": 5.6778814024459175e-06, "loss": 0.5065, "step": 5128 }, { "epoch": 0.30773384532309356, "grad_norm": 1.3128248453140259, "learning_rate": 5.677348915208389e-06, "loss": 0.412, "step": 5129 }, { "epoch": 0.30779384412311755, "grad_norm": 1.4270989894866943, "learning_rate": 5.676816345742311e-06, "loss": 0.4573, "step": 5130 }, { "epoch": 0.30785384292314155, "grad_norm": 1.1896538734436035, "learning_rate": 5.676283694067796e-06, "loss": 0.4522, "step": 5131 }, { "epoch": 0.30791384172316555, "grad_norm": 1.2306705713272095, "learning_rate": 5.675750960204962e-06, "loss": 0.4477, "step": 5132 }, { "epoch": 0.30797384052318955, "grad_norm": 1.2969484329223633, "learning_rate": 5.675218144173928e-06, "loss": 0.4197, "step": 5133 }, { "epoch": 0.30803383932321354, "grad_norm": 1.223893404006958, "learning_rate": 5.674685245994814e-06, "loss": 0.4322, "step": 5134 }, { "epoch": 0.30809383812323754, "grad_norm": 1.3792178630828857, "learning_rate": 5.674152265687746e-06, "loss": 0.4133, "step": 5135 }, { "epoch": 0.30815383692326154, "grad_norm": 1.2360265254974365, "learning_rate": 5.673619203272853e-06, "loss": 0.4196, "step": 5136 }, { "epoch": 0.30821383572328553, "grad_norm": 1.1677836179733276, "learning_rate": 5.673086058770264e-06, "loss": 0.4271, "step": 5137 }, { "epoch": 0.30827383452330953, "grad_norm": 1.3452688455581665, "learning_rate": 5.672552832200116e-06, "loss": 0.4913, "step": 5138 }, { "epoch": 0.30833383332333353, "grad_norm": 1.2682864665985107, "learning_rate": 5.672019523582547e-06, "loss": 0.4215, "step": 5139 }, { "epoch": 0.3083938321233575, "grad_norm": 1.2227153778076172, "learning_rate": 5.671486132937694e-06, "loss": 0.4229, "step": 5140 }, { "epoch": 0.3084538309233815, "grad_norm": 1.2999545335769653, "learning_rate": 5.6709526602857044e-06, "loss": 0.4094, "step": 5141 }, { "epoch": 0.3085138297234055, "grad_norm": 1.2195781469345093, "learning_rate": 5.670419105646724e-06, "loss": 0.4143, "step": 5142 }, { "epoch": 0.3085738285234295, "grad_norm": 1.267124891281128, "learning_rate": 5.669885469040902e-06, "loss": 0.4192, "step": 5143 }, { "epoch": 0.3086338273234535, "grad_norm": 1.2344269752502441, "learning_rate": 5.669351750488391e-06, "loss": 0.476, "step": 5144 }, { "epoch": 0.3086938261234775, "grad_norm": 1.244314432144165, "learning_rate": 5.668817950009349e-06, "loss": 0.4562, "step": 5145 }, { "epoch": 0.3087538249235015, "grad_norm": 1.1481666564941406, "learning_rate": 5.668284067623933e-06, "loss": 0.4467, "step": 5146 }, { "epoch": 0.3088138237235255, "grad_norm": 1.2450437545776367, "learning_rate": 5.667750103352307e-06, "loss": 0.4248, "step": 5147 }, { "epoch": 0.3088738225235495, "grad_norm": 1.1703333854675293, "learning_rate": 5.667216057214636e-06, "loss": 0.4391, "step": 5148 }, { "epoch": 0.30893382132357355, "grad_norm": 1.303200125694275, "learning_rate": 5.666681929231089e-06, "loss": 0.4282, "step": 5149 }, { "epoch": 0.30899382012359755, "grad_norm": 1.2799179553985596, "learning_rate": 5.666147719421836e-06, "loss": 0.4166, "step": 5150 }, { "epoch": 0.30905381892362155, "grad_norm": 1.3839884996414185, "learning_rate": 5.665613427807052e-06, "loss": 0.4859, "step": 5151 }, { "epoch": 0.30911381772364555, "grad_norm": 1.2973908185958862, "learning_rate": 5.665079054406916e-06, "loss": 0.428, "step": 5152 }, { "epoch": 0.30917381652366954, "grad_norm": 1.3825397491455078, "learning_rate": 5.66454459924161e-06, "loss": 0.4848, "step": 5153 }, { "epoch": 0.30923381532369354, "grad_norm": 1.1353371143341064, "learning_rate": 5.664010062331313e-06, "loss": 0.3844, "step": 5154 }, { "epoch": 0.30929381412371754, "grad_norm": 1.2818455696105957, "learning_rate": 5.663475443696218e-06, "loss": 0.4782, "step": 5155 }, { "epoch": 0.30935381292374153, "grad_norm": 1.0856050252914429, "learning_rate": 5.662940743356511e-06, "loss": 0.3797, "step": 5156 }, { "epoch": 0.30941381172376553, "grad_norm": 1.2861946821212769, "learning_rate": 5.662405961332386e-06, "loss": 0.3991, "step": 5157 }, { "epoch": 0.3094738105237895, "grad_norm": 1.2970850467681885, "learning_rate": 5.661871097644039e-06, "loss": 0.4516, "step": 5158 }, { "epoch": 0.3095338093238135, "grad_norm": 1.3758479356765747, "learning_rate": 5.661336152311672e-06, "loss": 0.4397, "step": 5159 }, { "epoch": 0.3095938081238375, "grad_norm": 1.16097891330719, "learning_rate": 5.660801125355484e-06, "loss": 0.4151, "step": 5160 }, { "epoch": 0.3096538069238615, "grad_norm": 1.2089080810546875, "learning_rate": 5.660266016795683e-06, "loss": 0.4031, "step": 5161 }, { "epoch": 0.3097138057238855, "grad_norm": 1.2957525253295898, "learning_rate": 5.659730826652477e-06, "loss": 0.4339, "step": 5162 }, { "epoch": 0.3097738045239095, "grad_norm": 1.4767109155654907, "learning_rate": 5.659195554946076e-06, "loss": 0.4563, "step": 5163 }, { "epoch": 0.3098338033239335, "grad_norm": 1.3470582962036133, "learning_rate": 5.658660201696698e-06, "loss": 0.3913, "step": 5164 }, { "epoch": 0.3098938021239575, "grad_norm": 1.3034478425979614, "learning_rate": 5.658124766924558e-06, "loss": 0.4306, "step": 5165 }, { "epoch": 0.3099538009239815, "grad_norm": 1.4039981365203857, "learning_rate": 5.657589250649878e-06, "loss": 0.4598, "step": 5166 }, { "epoch": 0.3100137997240055, "grad_norm": 1.2868444919586182, "learning_rate": 5.657053652892882e-06, "loss": 0.4079, "step": 5167 }, { "epoch": 0.3100737985240295, "grad_norm": 1.2947313785552979, "learning_rate": 5.656517973673798e-06, "loss": 0.4419, "step": 5168 }, { "epoch": 0.3101337973240535, "grad_norm": 1.3419348001480103, "learning_rate": 5.655982213012853e-06, "loss": 0.4577, "step": 5169 }, { "epoch": 0.3101937961240775, "grad_norm": 1.1493914127349854, "learning_rate": 5.655446370930284e-06, "loss": 0.3798, "step": 5170 }, { "epoch": 0.31025379492410154, "grad_norm": 1.282894492149353, "learning_rate": 5.654910447446325e-06, "loss": 0.4172, "step": 5171 }, { "epoch": 0.31031379372412554, "grad_norm": 1.322131872177124, "learning_rate": 5.654374442581216e-06, "loss": 0.435, "step": 5172 }, { "epoch": 0.31037379252414954, "grad_norm": 1.225011944770813, "learning_rate": 5.653838356355201e-06, "loss": 0.3833, "step": 5173 }, { "epoch": 0.31043379132417354, "grad_norm": 1.283442497253418, "learning_rate": 5.653302188788523e-06, "loss": 0.3717, "step": 5174 }, { "epoch": 0.31049379012419753, "grad_norm": 1.2191084623336792, "learning_rate": 5.652765939901432e-06, "loss": 0.4139, "step": 5175 }, { "epoch": 0.31055378892422153, "grad_norm": 1.2720921039581299, "learning_rate": 5.652229609714179e-06, "loss": 0.4419, "step": 5176 }, { "epoch": 0.3106137877242455, "grad_norm": 1.4076975584030151, "learning_rate": 5.65169319824702e-06, "loss": 0.4419, "step": 5177 }, { "epoch": 0.3106737865242695, "grad_norm": 1.400454044342041, "learning_rate": 5.651156705520212e-06, "loss": 0.4162, "step": 5178 }, { "epoch": 0.3107337853242935, "grad_norm": 1.3042571544647217, "learning_rate": 5.6506201315540155e-06, "loss": 0.4534, "step": 5179 }, { "epoch": 0.3107937841243175, "grad_norm": 1.2198762893676758, "learning_rate": 5.650083476368695e-06, "loss": 0.4343, "step": 5180 }, { "epoch": 0.3108537829243415, "grad_norm": 1.2005717754364014, "learning_rate": 5.649546739984517e-06, "loss": 0.4113, "step": 5181 }, { "epoch": 0.3109137817243655, "grad_norm": 1.1328980922698975, "learning_rate": 5.649009922421752e-06, "loss": 0.4415, "step": 5182 }, { "epoch": 0.3109737805243895, "grad_norm": 1.2065259218215942, "learning_rate": 5.6484730237006736e-06, "loss": 0.4049, "step": 5183 }, { "epoch": 0.3110337793244135, "grad_norm": 1.3273504972457886, "learning_rate": 5.647936043841557e-06, "loss": 0.4556, "step": 5184 }, { "epoch": 0.3110937781244375, "grad_norm": 1.2986958026885986, "learning_rate": 5.647398982864682e-06, "loss": 0.4072, "step": 5185 }, { "epoch": 0.3111537769244615, "grad_norm": 1.2796775102615356, "learning_rate": 5.6468618407903314e-06, "loss": 0.4554, "step": 5186 }, { "epoch": 0.3112137757244855, "grad_norm": 1.1866528987884521, "learning_rate": 5.64632461763879e-06, "loss": 0.4722, "step": 5187 }, { "epoch": 0.3112737745245095, "grad_norm": 1.3893301486968994, "learning_rate": 5.6457873134303455e-06, "loss": 0.464, "step": 5188 }, { "epoch": 0.3113337733245335, "grad_norm": 1.177424669265747, "learning_rate": 5.645249928185292e-06, "loss": 0.396, "step": 5189 }, { "epoch": 0.3113937721245575, "grad_norm": 1.239358901977539, "learning_rate": 5.644712461923921e-06, "loss": 0.4583, "step": 5190 }, { "epoch": 0.3114537709245815, "grad_norm": 1.3824892044067383, "learning_rate": 5.644174914666532e-06, "loss": 0.4656, "step": 5191 }, { "epoch": 0.3115137697246055, "grad_norm": 1.2783048152923584, "learning_rate": 5.643637286433426e-06, "loss": 0.4949, "step": 5192 }, { "epoch": 0.3115737685246295, "grad_norm": 1.237291693687439, "learning_rate": 5.643099577244906e-06, "loss": 0.4279, "step": 5193 }, { "epoch": 0.31163376732465353, "grad_norm": 1.2439172267913818, "learning_rate": 5.642561787121278e-06, "loss": 0.4414, "step": 5194 }, { "epoch": 0.31169376612467753, "grad_norm": 1.2530272006988525, "learning_rate": 5.642023916082854e-06, "loss": 0.4271, "step": 5195 }, { "epoch": 0.3117537649247015, "grad_norm": 1.292866826057434, "learning_rate": 5.641485964149944e-06, "loss": 0.4393, "step": 5196 }, { "epoch": 0.3118137637247255, "grad_norm": 1.2232191562652588, "learning_rate": 5.640947931342868e-06, "loss": 0.4438, "step": 5197 }, { "epoch": 0.3118737625247495, "grad_norm": 1.1764254570007324, "learning_rate": 5.640409817681941e-06, "loss": 0.4588, "step": 5198 }, { "epoch": 0.3119337613247735, "grad_norm": 1.4931166172027588, "learning_rate": 5.639871623187487e-06, "loss": 0.431, "step": 5199 }, { "epoch": 0.3119937601247975, "grad_norm": 1.367020845413208, "learning_rate": 5.639333347879831e-06, "loss": 0.4674, "step": 5200 }, { "epoch": 0.3120537589248215, "grad_norm": 1.2407292127609253, "learning_rate": 5.638794991779302e-06, "loss": 0.4699, "step": 5201 }, { "epoch": 0.3121137577248455, "grad_norm": 1.2665035724639893, "learning_rate": 5.638256554906229e-06, "loss": 0.4288, "step": 5202 }, { "epoch": 0.3121737565248695, "grad_norm": 1.2780787944793701, "learning_rate": 5.63771803728095e-06, "loss": 0.4319, "step": 5203 }, { "epoch": 0.3122337553248935, "grad_norm": 1.2383277416229248, "learning_rate": 5.637179438923798e-06, "loss": 0.4517, "step": 5204 }, { "epoch": 0.3122937541249175, "grad_norm": 1.0980497598648071, "learning_rate": 5.6366407598551176e-06, "loss": 0.4198, "step": 5205 }, { "epoch": 0.3123537529249415, "grad_norm": 1.2085158824920654, "learning_rate": 5.636102000095249e-06, "loss": 0.4106, "step": 5206 }, { "epoch": 0.3124137517249655, "grad_norm": 1.2974441051483154, "learning_rate": 5.63556315966454e-06, "loss": 0.4862, "step": 5207 }, { "epoch": 0.3124737505249895, "grad_norm": 1.2696776390075684, "learning_rate": 5.63502423858334e-06, "loss": 0.4746, "step": 5208 }, { "epoch": 0.3125337493250135, "grad_norm": 1.3513712882995605, "learning_rate": 5.634485236872003e-06, "loss": 0.5063, "step": 5209 }, { "epoch": 0.3125937481250375, "grad_norm": 1.3606711626052856, "learning_rate": 5.633946154550881e-06, "loss": 0.4322, "step": 5210 }, { "epoch": 0.3126537469250615, "grad_norm": 1.2922837734222412, "learning_rate": 5.633406991640336e-06, "loss": 0.4299, "step": 5211 }, { "epoch": 0.3127137457250855, "grad_norm": 1.155092477798462, "learning_rate": 5.632867748160729e-06, "loss": 0.4287, "step": 5212 }, { "epoch": 0.3127737445251095, "grad_norm": 1.1582717895507812, "learning_rate": 5.632328424132425e-06, "loss": 0.4449, "step": 5213 }, { "epoch": 0.3128337433251335, "grad_norm": 1.2581524848937988, "learning_rate": 5.63178901957579e-06, "loss": 0.4629, "step": 5214 }, { "epoch": 0.31289374212515747, "grad_norm": 1.2230918407440186, "learning_rate": 5.631249534511197e-06, "loss": 0.4168, "step": 5215 }, { "epoch": 0.3129537409251815, "grad_norm": 1.3083457946777344, "learning_rate": 5.630709968959018e-06, "loss": 0.4248, "step": 5216 }, { "epoch": 0.3130137397252055, "grad_norm": 1.219032883644104, "learning_rate": 5.630170322939632e-06, "loss": 0.4492, "step": 5217 }, { "epoch": 0.3130737385252295, "grad_norm": 1.2542243003845215, "learning_rate": 5.629630596473418e-06, "loss": 0.4831, "step": 5218 }, { "epoch": 0.3131337373252535, "grad_norm": 1.20008385181427, "learning_rate": 5.629090789580758e-06, "loss": 0.4515, "step": 5219 }, { "epoch": 0.3131937361252775, "grad_norm": 1.3765455484390259, "learning_rate": 5.62855090228204e-06, "loss": 0.4552, "step": 5220 }, { "epoch": 0.3132537349253015, "grad_norm": 1.1404938697814941, "learning_rate": 5.628010934597652e-06, "loss": 0.4232, "step": 5221 }, { "epoch": 0.3133137337253255, "grad_norm": 1.3112457990646362, "learning_rate": 5.627470886547985e-06, "loss": 0.4497, "step": 5222 }, { "epoch": 0.3133737325253495, "grad_norm": 1.288455843925476, "learning_rate": 5.626930758153437e-06, "loss": 0.4564, "step": 5223 }, { "epoch": 0.3134337313253735, "grad_norm": 1.2350467443466187, "learning_rate": 5.626390549434404e-06, "loss": 0.4225, "step": 5224 }, { "epoch": 0.3134937301253975, "grad_norm": 1.1624417304992676, "learning_rate": 5.625850260411287e-06, "loss": 0.4386, "step": 5225 }, { "epoch": 0.3135537289254215, "grad_norm": 1.2714866399765015, "learning_rate": 5.625309891104491e-06, "loss": 0.4036, "step": 5226 }, { "epoch": 0.3136137277254455, "grad_norm": 1.294650673866272, "learning_rate": 5.624769441534425e-06, "loss": 0.4729, "step": 5227 }, { "epoch": 0.3136737265254695, "grad_norm": 1.245314359664917, "learning_rate": 5.624228911721496e-06, "loss": 0.4389, "step": 5228 }, { "epoch": 0.3137337253254935, "grad_norm": 1.2931973934173584, "learning_rate": 5.623688301686119e-06, "loss": 0.4912, "step": 5229 }, { "epoch": 0.3137937241255175, "grad_norm": 1.1827051639556885, "learning_rate": 5.62314761144871e-06, "loss": 0.4488, "step": 5230 }, { "epoch": 0.3138537229255415, "grad_norm": 1.1972029209136963, "learning_rate": 5.622606841029691e-06, "loss": 0.4406, "step": 5231 }, { "epoch": 0.3139137217255655, "grad_norm": 1.3856655359268188, "learning_rate": 5.62206599044948e-06, "loss": 0.4149, "step": 5232 }, { "epoch": 0.3139737205255895, "grad_norm": 1.1712452173233032, "learning_rate": 5.621525059728506e-06, "loss": 0.4091, "step": 5233 }, { "epoch": 0.31403371932561347, "grad_norm": 1.3487087488174438, "learning_rate": 5.620984048887195e-06, "loss": 0.463, "step": 5234 }, { "epoch": 0.31409371812563747, "grad_norm": 1.129441738128662, "learning_rate": 5.62044295794598e-06, "loss": 0.4331, "step": 5235 }, { "epoch": 0.31415371692566146, "grad_norm": 1.3310832977294922, "learning_rate": 5.619901786925295e-06, "loss": 0.4654, "step": 5236 }, { "epoch": 0.31421371572568546, "grad_norm": 1.4185545444488525, "learning_rate": 5.619360535845578e-06, "loss": 0.4661, "step": 5237 }, { "epoch": 0.3142737145257095, "grad_norm": 1.2411444187164307, "learning_rate": 5.61881920472727e-06, "loss": 0.4313, "step": 5238 }, { "epoch": 0.3143337133257335, "grad_norm": 1.222981572151184, "learning_rate": 5.6182777935908125e-06, "loss": 0.4687, "step": 5239 }, { "epoch": 0.3143937121257575, "grad_norm": 1.1904054880142212, "learning_rate": 5.617736302456655e-06, "loss": 0.461, "step": 5240 }, { "epoch": 0.3144537109257815, "grad_norm": 1.1817790269851685, "learning_rate": 5.617194731345245e-06, "loss": 0.3901, "step": 5241 }, { "epoch": 0.3145137097258055, "grad_norm": 1.3502984046936035, "learning_rate": 5.6166530802770365e-06, "loss": 0.4974, "step": 5242 }, { "epoch": 0.3145737085258295, "grad_norm": 1.3922431468963623, "learning_rate": 5.616111349272484e-06, "loss": 0.4251, "step": 5243 }, { "epoch": 0.3146337073258535, "grad_norm": 1.3185213804244995, "learning_rate": 5.615569538352047e-06, "loss": 0.4148, "step": 5244 }, { "epoch": 0.3146937061258775, "grad_norm": 1.213729977607727, "learning_rate": 5.6150276475361876e-06, "loss": 0.3999, "step": 5245 }, { "epoch": 0.3147537049259015, "grad_norm": 1.231115698814392, "learning_rate": 5.614485676845369e-06, "loss": 0.4284, "step": 5246 }, { "epoch": 0.3148137037259255, "grad_norm": 1.2062232494354248, "learning_rate": 5.613943626300061e-06, "loss": 0.372, "step": 5247 }, { "epoch": 0.3148737025259495, "grad_norm": 1.0584022998809814, "learning_rate": 5.613401495920732e-06, "loss": 0.393, "step": 5248 }, { "epoch": 0.3149337013259735, "grad_norm": 1.2760874032974243, "learning_rate": 5.6128592857278585e-06, "loss": 0.4299, "step": 5249 }, { "epoch": 0.3149937001259975, "grad_norm": 1.241952657699585, "learning_rate": 5.612316995741915e-06, "loss": 0.4201, "step": 5250 }, { "epoch": 0.3150536989260215, "grad_norm": 1.3277175426483154, "learning_rate": 5.611774625983382e-06, "loss": 0.4601, "step": 5251 }, { "epoch": 0.31511369772604547, "grad_norm": 1.2004222869873047, "learning_rate": 5.611232176472743e-06, "loss": 0.4354, "step": 5252 }, { "epoch": 0.31517369652606947, "grad_norm": 1.2025569677352905, "learning_rate": 5.610689647230483e-06, "loss": 0.4554, "step": 5253 }, { "epoch": 0.31523369532609347, "grad_norm": 1.2781606912612915, "learning_rate": 5.610147038277093e-06, "loss": 0.3801, "step": 5254 }, { "epoch": 0.31529369412611746, "grad_norm": 1.2549020051956177, "learning_rate": 5.609604349633062e-06, "loss": 0.4083, "step": 5255 }, { "epoch": 0.31535369292614146, "grad_norm": 1.30403733253479, "learning_rate": 5.609061581318886e-06, "loss": 0.4013, "step": 5256 }, { "epoch": 0.31541369172616546, "grad_norm": 1.337548851966858, "learning_rate": 5.608518733355063e-06, "loss": 0.482, "step": 5257 }, { "epoch": 0.31547369052618945, "grad_norm": 1.2730978727340698, "learning_rate": 5.607975805762094e-06, "loss": 0.4408, "step": 5258 }, { "epoch": 0.31553368932621345, "grad_norm": 1.2335952520370483, "learning_rate": 5.607432798560483e-06, "loss": 0.4154, "step": 5259 }, { "epoch": 0.31559368812623745, "grad_norm": 1.1197514533996582, "learning_rate": 5.6068897117707366e-06, "loss": 0.4023, "step": 5260 }, { "epoch": 0.3156536869262615, "grad_norm": 1.2729029655456543, "learning_rate": 5.606346545413364e-06, "loss": 0.4415, "step": 5261 }, { "epoch": 0.3157136857262855, "grad_norm": 1.129098892211914, "learning_rate": 5.60580329950888e-06, "loss": 0.4107, "step": 5262 }, { "epoch": 0.3157736845263095, "grad_norm": 1.404755711555481, "learning_rate": 5.6052599740777985e-06, "loss": 0.4336, "step": 5263 }, { "epoch": 0.3158336833263335, "grad_norm": 1.2678098678588867, "learning_rate": 5.60471656914064e-06, "loss": 0.4608, "step": 5264 }, { "epoch": 0.3158936821263575, "grad_norm": 1.2441471815109253, "learning_rate": 5.6041730847179255e-06, "loss": 0.4345, "step": 5265 }, { "epoch": 0.3159536809263815, "grad_norm": 1.2755746841430664, "learning_rate": 5.603629520830179e-06, "loss": 0.4571, "step": 5266 }, { "epoch": 0.3160136797264055, "grad_norm": 1.170042872428894, "learning_rate": 5.603085877497931e-06, "loss": 0.4321, "step": 5267 }, { "epoch": 0.3160736785264295, "grad_norm": 1.3470958471298218, "learning_rate": 5.602542154741711e-06, "loss": 0.431, "step": 5268 }, { "epoch": 0.3161336773264535, "grad_norm": 1.1779234409332275, "learning_rate": 5.601998352582052e-06, "loss": 0.4653, "step": 5269 }, { "epoch": 0.3161936761264775, "grad_norm": 1.3394166231155396, "learning_rate": 5.601454471039492e-06, "loss": 0.4351, "step": 5270 }, { "epoch": 0.31625367492650147, "grad_norm": 1.2271456718444824, "learning_rate": 5.600910510134572e-06, "loss": 0.4461, "step": 5271 }, { "epoch": 0.31631367372652547, "grad_norm": 1.3071517944335938, "learning_rate": 5.600366469887833e-06, "loss": 0.4195, "step": 5272 }, { "epoch": 0.31637367252654947, "grad_norm": 1.2747607231140137, "learning_rate": 5.599822350319822e-06, "loss": 0.4142, "step": 5273 }, { "epoch": 0.31643367132657346, "grad_norm": 1.3044219017028809, "learning_rate": 5.599278151451088e-06, "loss": 0.42, "step": 5274 }, { "epoch": 0.31649367012659746, "grad_norm": 1.3768796920776367, "learning_rate": 5.5987338733021805e-06, "loss": 0.4546, "step": 5275 }, { "epoch": 0.31655366892662146, "grad_norm": 1.0798131227493286, "learning_rate": 5.598189515893658e-06, "loss": 0.4095, "step": 5276 }, { "epoch": 0.31661366772664545, "grad_norm": 1.340955376625061, "learning_rate": 5.597645079246077e-06, "loss": 0.4623, "step": 5277 }, { "epoch": 0.31667366652666945, "grad_norm": 1.2199032306671143, "learning_rate": 5.597100563379998e-06, "loss": 0.4265, "step": 5278 }, { "epoch": 0.31673366532669345, "grad_norm": 1.1562790870666504, "learning_rate": 5.596555968315984e-06, "loss": 0.4083, "step": 5279 }, { "epoch": 0.31679366412671744, "grad_norm": 1.2223018407821655, "learning_rate": 5.596011294074603e-06, "loss": 0.4456, "step": 5280 }, { "epoch": 0.31685366292674144, "grad_norm": 1.2740944623947144, "learning_rate": 5.595466540676427e-06, "loss": 0.4564, "step": 5281 }, { "epoch": 0.31691366172676544, "grad_norm": 1.2400840520858765, "learning_rate": 5.594921708142025e-06, "loss": 0.375, "step": 5282 }, { "epoch": 0.3169736605267895, "grad_norm": 1.331276535987854, "learning_rate": 5.594376796491976e-06, "loss": 0.4354, "step": 5283 }, { "epoch": 0.3170336593268135, "grad_norm": 1.283244013786316, "learning_rate": 5.593831805746855e-06, "loss": 0.4283, "step": 5284 }, { "epoch": 0.3170936581268375, "grad_norm": 1.27194344997406, "learning_rate": 5.593286735927248e-06, "loss": 0.4251, "step": 5285 }, { "epoch": 0.3171536569268615, "grad_norm": 1.0696275234222412, "learning_rate": 5.5927415870537375e-06, "loss": 0.4246, "step": 5286 }, { "epoch": 0.3172136557268855, "grad_norm": 1.2692779302597046, "learning_rate": 5.592196359146911e-06, "loss": 0.4767, "step": 5287 }, { "epoch": 0.3172736545269095, "grad_norm": 1.1786102056503296, "learning_rate": 5.591651052227361e-06, "loss": 0.4227, "step": 5288 }, { "epoch": 0.3173336533269335, "grad_norm": 1.3735812902450562, "learning_rate": 5.59110566631568e-06, "loss": 0.4575, "step": 5289 }, { "epoch": 0.31739365212695747, "grad_norm": 1.1962158679962158, "learning_rate": 5.590560201432465e-06, "loss": 0.425, "step": 5290 }, { "epoch": 0.31745365092698147, "grad_norm": 1.343642234802246, "learning_rate": 5.590014657598316e-06, "loss": 0.5019, "step": 5291 }, { "epoch": 0.31751364972700546, "grad_norm": 1.392289400100708, "learning_rate": 5.589469034833835e-06, "loss": 0.4475, "step": 5292 }, { "epoch": 0.31757364852702946, "grad_norm": 1.327473759651184, "learning_rate": 5.588923333159628e-06, "loss": 0.446, "step": 5293 }, { "epoch": 0.31763364732705346, "grad_norm": 1.2446184158325195, "learning_rate": 5.588377552596304e-06, "loss": 0.4119, "step": 5294 }, { "epoch": 0.31769364612707746, "grad_norm": 1.4323121309280396, "learning_rate": 5.587831693164476e-06, "loss": 0.4589, "step": 5295 }, { "epoch": 0.31775364492710145, "grad_norm": 1.2903552055358887, "learning_rate": 5.587285754884755e-06, "loss": 0.4599, "step": 5296 }, { "epoch": 0.31781364372712545, "grad_norm": 1.2375963926315308, "learning_rate": 5.586739737777761e-06, "loss": 0.4251, "step": 5297 }, { "epoch": 0.31787364252714945, "grad_norm": 1.4511659145355225, "learning_rate": 5.586193641864115e-06, "loss": 0.4396, "step": 5298 }, { "epoch": 0.31793364132717344, "grad_norm": 1.2844082117080688, "learning_rate": 5.5856474671644394e-06, "loss": 0.4012, "step": 5299 }, { "epoch": 0.31799364012719744, "grad_norm": 1.5013492107391357, "learning_rate": 5.585101213699361e-06, "loss": 0.4696, "step": 5300 }, { "epoch": 0.31805363892722144, "grad_norm": 1.3120077848434448, "learning_rate": 5.5845548814895105e-06, "loss": 0.4093, "step": 5301 }, { "epoch": 0.31811363772724544, "grad_norm": 1.169507622718811, "learning_rate": 5.584008470555519e-06, "loss": 0.3971, "step": 5302 }, { "epoch": 0.31817363652726943, "grad_norm": 1.3572074174880981, "learning_rate": 5.583461980918022e-06, "loss": 0.4678, "step": 5303 }, { "epoch": 0.31823363532729343, "grad_norm": 1.3470218181610107, "learning_rate": 5.582915412597657e-06, "loss": 0.427, "step": 5304 }, { "epoch": 0.3182936341273174, "grad_norm": 1.349531888961792, "learning_rate": 5.582368765615068e-06, "loss": 0.4948, "step": 5305 }, { "epoch": 0.3183536329273415, "grad_norm": 1.1539570093154907, "learning_rate": 5.581822039990898e-06, "loss": 0.4284, "step": 5306 }, { "epoch": 0.3184136317273655, "grad_norm": 1.1573407649993896, "learning_rate": 5.581275235745793e-06, "loss": 0.4032, "step": 5307 }, { "epoch": 0.3184736305273895, "grad_norm": 1.2423595190048218, "learning_rate": 5.5807283529004055e-06, "loss": 0.4569, "step": 5308 }, { "epoch": 0.31853362932741347, "grad_norm": 1.2636953592300415, "learning_rate": 5.580181391475388e-06, "loss": 0.4741, "step": 5309 }, { "epoch": 0.31859362812743747, "grad_norm": 1.2720909118652344, "learning_rate": 5.579634351491397e-06, "loss": 0.4848, "step": 5310 }, { "epoch": 0.31865362692746146, "grad_norm": 1.2792755365371704, "learning_rate": 5.579087232969091e-06, "loss": 0.4804, "step": 5311 }, { "epoch": 0.31871362572748546, "grad_norm": 1.2772105932235718, "learning_rate": 5.578540035929132e-06, "loss": 0.4438, "step": 5312 }, { "epoch": 0.31877362452750946, "grad_norm": 1.2512824535369873, "learning_rate": 5.577992760392187e-06, "loss": 0.3999, "step": 5313 }, { "epoch": 0.31883362332753346, "grad_norm": 1.2977678775787354, "learning_rate": 5.577445406378922e-06, "loss": 0.4321, "step": 5314 }, { "epoch": 0.31889362212755745, "grad_norm": 1.433233618736267, "learning_rate": 5.576897973910009e-06, "loss": 0.4781, "step": 5315 }, { "epoch": 0.31895362092758145, "grad_norm": 1.3768025636672974, "learning_rate": 5.576350463006121e-06, "loss": 0.4578, "step": 5316 }, { "epoch": 0.31901361972760545, "grad_norm": 1.1405558586120605, "learning_rate": 5.575802873687935e-06, "loss": 0.3706, "step": 5317 }, { "epoch": 0.31907361852762944, "grad_norm": 1.259690761566162, "learning_rate": 5.575255205976134e-06, "loss": 0.3631, "step": 5318 }, { "epoch": 0.31913361732765344, "grad_norm": 1.2890353202819824, "learning_rate": 5.574707459891398e-06, "loss": 0.442, "step": 5319 }, { "epoch": 0.31919361612767744, "grad_norm": 1.1085067987442017, "learning_rate": 5.574159635454414e-06, "loss": 0.4144, "step": 5320 }, { "epoch": 0.31925361492770143, "grad_norm": 1.2507061958312988, "learning_rate": 5.573611732685869e-06, "loss": 0.4746, "step": 5321 }, { "epoch": 0.31931361372772543, "grad_norm": 1.2572083473205566, "learning_rate": 5.573063751606457e-06, "loss": 0.4609, "step": 5322 }, { "epoch": 0.31937361252774943, "grad_norm": 1.2675206661224365, "learning_rate": 5.572515692236872e-06, "loss": 0.4412, "step": 5323 }, { "epoch": 0.3194336113277734, "grad_norm": 1.2080862522125244, "learning_rate": 5.571967554597812e-06, "loss": 0.436, "step": 5324 }, { "epoch": 0.3194936101277974, "grad_norm": 1.1282482147216797, "learning_rate": 5.571419338709977e-06, "loss": 0.4123, "step": 5325 }, { "epoch": 0.3195536089278214, "grad_norm": 1.239605188369751, "learning_rate": 5.570871044594071e-06, "loss": 0.4612, "step": 5326 }, { "epoch": 0.3196136077278454, "grad_norm": 1.1826673746109009, "learning_rate": 5.570322672270801e-06, "loss": 0.4405, "step": 5327 }, { "epoch": 0.31967360652786947, "grad_norm": 1.2653430700302124, "learning_rate": 5.5697742217608744e-06, "loss": 0.4642, "step": 5328 }, { "epoch": 0.31973360532789347, "grad_norm": 1.4219111204147339, "learning_rate": 5.5692256930850064e-06, "loss": 0.5009, "step": 5329 }, { "epoch": 0.31979360412791746, "grad_norm": 1.5154746770858765, "learning_rate": 5.5686770862639116e-06, "loss": 0.5126, "step": 5330 }, { "epoch": 0.31985360292794146, "grad_norm": 1.3249074220657349, "learning_rate": 5.568128401318308e-06, "loss": 0.4959, "step": 5331 }, { "epoch": 0.31991360172796546, "grad_norm": 1.3593562841415405, "learning_rate": 5.567579638268918e-06, "loss": 0.4744, "step": 5332 }, { "epoch": 0.31997360052798945, "grad_norm": 1.2592518329620361, "learning_rate": 5.567030797136462e-06, "loss": 0.3862, "step": 5333 }, { "epoch": 0.32003359932801345, "grad_norm": 1.3477561473846436, "learning_rate": 5.566481877941672e-06, "loss": 0.4968, "step": 5334 }, { "epoch": 0.32009359812803745, "grad_norm": 1.3243436813354492, "learning_rate": 5.565932880705277e-06, "loss": 0.4206, "step": 5335 }, { "epoch": 0.32015359692806145, "grad_norm": 1.3306578397750854, "learning_rate": 5.565383805448009e-06, "loss": 0.4544, "step": 5336 }, { "epoch": 0.32021359572808544, "grad_norm": 1.2636977434158325, "learning_rate": 5.564834652190604e-06, "loss": 0.3974, "step": 5337 }, { "epoch": 0.32027359452810944, "grad_norm": 1.3940532207489014, "learning_rate": 5.564285420953803e-06, "loss": 0.4518, "step": 5338 }, { "epoch": 0.32033359332813344, "grad_norm": 1.19734787940979, "learning_rate": 5.563736111758344e-06, "loss": 0.4386, "step": 5339 }, { "epoch": 0.32039359212815743, "grad_norm": 1.3365538120269775, "learning_rate": 5.5631867246249766e-06, "loss": 0.4448, "step": 5340 }, { "epoch": 0.32045359092818143, "grad_norm": 1.3831002712249756, "learning_rate": 5.5626372595744455e-06, "loss": 0.4154, "step": 5341 }, { "epoch": 0.32051358972820543, "grad_norm": 1.2807111740112305, "learning_rate": 5.562087716627503e-06, "loss": 0.4217, "step": 5342 }, { "epoch": 0.3205735885282294, "grad_norm": 1.1555466651916504, "learning_rate": 5.561538095804902e-06, "loss": 0.4216, "step": 5343 }, { "epoch": 0.3206335873282534, "grad_norm": 1.2837297916412354, "learning_rate": 5.5609883971274e-06, "loss": 0.4493, "step": 5344 }, { "epoch": 0.3206935861282774, "grad_norm": 1.0558708906173706, "learning_rate": 5.560438620615754e-06, "loss": 0.4117, "step": 5345 }, { "epoch": 0.3207535849283014, "grad_norm": 1.267927646636963, "learning_rate": 5.55988876629073e-06, "loss": 0.4706, "step": 5346 }, { "epoch": 0.3208135837283254, "grad_norm": 1.2272082567214966, "learning_rate": 5.559338834173093e-06, "loss": 0.4538, "step": 5347 }, { "epoch": 0.3208735825283494, "grad_norm": 1.292445421218872, "learning_rate": 5.558788824283609e-06, "loss": 0.4681, "step": 5348 }, { "epoch": 0.3209335813283734, "grad_norm": 1.1014466285705566, "learning_rate": 5.558238736643052e-06, "loss": 0.3944, "step": 5349 }, { "epoch": 0.3209935801283974, "grad_norm": 1.2804009914398193, "learning_rate": 5.557688571272195e-06, "loss": 0.4234, "step": 5350 }, { "epoch": 0.32105357892842146, "grad_norm": 1.2395775318145752, "learning_rate": 5.5571383281918155e-06, "loss": 0.443, "step": 5351 }, { "epoch": 0.32111357772844545, "grad_norm": 1.2674081325531006, "learning_rate": 5.556588007422693e-06, "loss": 0.4379, "step": 5352 }, { "epoch": 0.32117357652846945, "grad_norm": 1.3648685216903687, "learning_rate": 5.5560376089856125e-06, "loss": 0.4603, "step": 5353 }, { "epoch": 0.32123357532849345, "grad_norm": 1.122757911682129, "learning_rate": 5.555487132901358e-06, "loss": 0.434, "step": 5354 }, { "epoch": 0.32129357412851745, "grad_norm": 1.2398799657821655, "learning_rate": 5.554936579190719e-06, "loss": 0.4393, "step": 5355 }, { "epoch": 0.32135357292854144, "grad_norm": 1.2644017934799194, "learning_rate": 5.554385947874488e-06, "loss": 0.4361, "step": 5356 }, { "epoch": 0.32141357172856544, "grad_norm": 1.3210030794143677, "learning_rate": 5.55383523897346e-06, "loss": 0.4597, "step": 5357 }, { "epoch": 0.32147357052858944, "grad_norm": 1.3355700969696045, "learning_rate": 5.553284452508432e-06, "loss": 0.4652, "step": 5358 }, { "epoch": 0.32153356932861343, "grad_norm": 1.3971707820892334, "learning_rate": 5.552733588500205e-06, "loss": 0.5078, "step": 5359 }, { "epoch": 0.32159356812863743, "grad_norm": 1.3726271390914917, "learning_rate": 5.552182646969583e-06, "loss": 0.4902, "step": 5360 }, { "epoch": 0.3216535669286614, "grad_norm": 1.28843355178833, "learning_rate": 5.551631627937371e-06, "loss": 0.4291, "step": 5361 }, { "epoch": 0.3217135657286854, "grad_norm": 1.340735912322998, "learning_rate": 5.5510805314243795e-06, "loss": 0.4395, "step": 5362 }, { "epoch": 0.3217735645287094, "grad_norm": 1.140268087387085, "learning_rate": 5.550529357451422e-06, "loss": 0.4403, "step": 5363 }, { "epoch": 0.3218335633287334, "grad_norm": 1.2039549350738525, "learning_rate": 5.549978106039313e-06, "loss": 0.4293, "step": 5364 }, { "epoch": 0.3218935621287574, "grad_norm": 1.1872165203094482, "learning_rate": 5.54942677720887e-06, "loss": 0.4539, "step": 5365 }, { "epoch": 0.3219535609287814, "grad_norm": 1.2542915344238281, "learning_rate": 5.548875370980915e-06, "loss": 0.4087, "step": 5366 }, { "epoch": 0.3220135597288054, "grad_norm": 1.2420097589492798, "learning_rate": 5.548323887376271e-06, "loss": 0.4671, "step": 5367 }, { "epoch": 0.3220735585288294, "grad_norm": 1.215990424156189, "learning_rate": 5.547772326415766e-06, "loss": 0.4622, "step": 5368 }, { "epoch": 0.3221335573288534, "grad_norm": 1.2632046937942505, "learning_rate": 5.547220688120232e-06, "loss": 0.4572, "step": 5369 }, { "epoch": 0.3221935561288774, "grad_norm": 1.3104546070098877, "learning_rate": 5.546668972510498e-06, "loss": 0.4479, "step": 5370 }, { "epoch": 0.3222535549289014, "grad_norm": 1.15961492061615, "learning_rate": 5.546117179607402e-06, "loss": 0.397, "step": 5371 }, { "epoch": 0.3223135537289254, "grad_norm": 1.2310774326324463, "learning_rate": 5.545565309431782e-06, "loss": 0.3965, "step": 5372 }, { "epoch": 0.32237355252894945, "grad_norm": 1.3592826128005981, "learning_rate": 5.54501336200448e-06, "loss": 0.4451, "step": 5373 }, { "epoch": 0.32243355132897344, "grad_norm": 1.405921459197998, "learning_rate": 5.544461337346341e-06, "loss": 0.5242, "step": 5374 }, { "epoch": 0.32249355012899744, "grad_norm": 1.2463308572769165, "learning_rate": 5.54390923547821e-06, "loss": 0.4788, "step": 5375 }, { "epoch": 0.32255354892902144, "grad_norm": 1.2726835012435913, "learning_rate": 5.54335705642094e-06, "loss": 0.4304, "step": 5376 }, { "epoch": 0.32261354772904544, "grad_norm": 1.3644402027130127, "learning_rate": 5.542804800195384e-06, "loss": 0.4334, "step": 5377 }, { "epoch": 0.32267354652906943, "grad_norm": 1.4042669534683228, "learning_rate": 5.5422524668223974e-06, "loss": 0.5164, "step": 5378 }, { "epoch": 0.32273354532909343, "grad_norm": 1.2737029790878296, "learning_rate": 5.541700056322841e-06, "loss": 0.4512, "step": 5379 }, { "epoch": 0.3227935441291174, "grad_norm": 1.3352932929992676, "learning_rate": 5.541147568717573e-06, "loss": 0.4313, "step": 5380 }, { "epoch": 0.3228535429291414, "grad_norm": 1.2747021913528442, "learning_rate": 5.5405950040274625e-06, "loss": 0.4511, "step": 5381 }, { "epoch": 0.3229135417291654, "grad_norm": 1.2601358890533447, "learning_rate": 5.540042362273375e-06, "loss": 0.4622, "step": 5382 }, { "epoch": 0.3229735405291894, "grad_norm": 1.4501160383224487, "learning_rate": 5.539489643476182e-06, "loss": 0.4753, "step": 5383 }, { "epoch": 0.3230335393292134, "grad_norm": 1.1867798566818237, "learning_rate": 5.538936847656757e-06, "loss": 0.461, "step": 5384 }, { "epoch": 0.3230935381292374, "grad_norm": 1.2122594118118286, "learning_rate": 5.538383974835977e-06, "loss": 0.4002, "step": 5385 }, { "epoch": 0.3231535369292614, "grad_norm": 1.1738853454589844, "learning_rate": 5.53783102503472e-06, "loss": 0.463, "step": 5386 }, { "epoch": 0.3232135357292854, "grad_norm": 1.1790804862976074, "learning_rate": 5.53727799827387e-06, "loss": 0.3702, "step": 5387 }, { "epoch": 0.3232735345293094, "grad_norm": 1.2217650413513184, "learning_rate": 5.536724894574313e-06, "loss": 0.4234, "step": 5388 }, { "epoch": 0.3233335333293334, "grad_norm": 1.110305905342102, "learning_rate": 5.536171713956935e-06, "loss": 0.3803, "step": 5389 }, { "epoch": 0.3233935321293574, "grad_norm": 1.2183226346969604, "learning_rate": 5.535618456442628e-06, "loss": 0.3975, "step": 5390 }, { "epoch": 0.3234535309293814, "grad_norm": 1.284500241279602, "learning_rate": 5.535065122052287e-06, "loss": 0.4188, "step": 5391 }, { "epoch": 0.3235135297294054, "grad_norm": 1.1274714469909668, "learning_rate": 5.534511710806806e-06, "loss": 0.4068, "step": 5392 }, { "epoch": 0.3235735285294294, "grad_norm": 1.3566516637802124, "learning_rate": 5.533958222727089e-06, "loss": 0.4321, "step": 5393 }, { "epoch": 0.3236335273294534, "grad_norm": 1.4161639213562012, "learning_rate": 5.533404657834036e-06, "loss": 0.4615, "step": 5394 }, { "epoch": 0.32369352612947744, "grad_norm": 1.343377709388733, "learning_rate": 5.532851016148554e-06, "loss": 0.4992, "step": 5395 }, { "epoch": 0.32375352492950144, "grad_norm": 1.1022913455963135, "learning_rate": 5.53229729769155e-06, "loss": 0.4161, "step": 5396 }, { "epoch": 0.32381352372952543, "grad_norm": 1.3107047080993652, "learning_rate": 5.531743502483936e-06, "loss": 0.4407, "step": 5397 }, { "epoch": 0.32387352252954943, "grad_norm": 1.238029956817627, "learning_rate": 5.531189630546627e-06, "loss": 0.4221, "step": 5398 }, { "epoch": 0.3239335213295734, "grad_norm": 1.3870949745178223, "learning_rate": 5.5306356819005385e-06, "loss": 0.4892, "step": 5399 }, { "epoch": 0.3239935201295974, "grad_norm": 1.1111462116241455, "learning_rate": 5.530081656566592e-06, "loss": 0.4255, "step": 5400 }, { "epoch": 0.3240535189296214, "grad_norm": 1.0559778213500977, "learning_rate": 5.529527554565713e-06, "loss": 0.3793, "step": 5401 }, { "epoch": 0.3241135177296454, "grad_norm": 1.2227145433425903, "learning_rate": 5.528973375918823e-06, "loss": 0.4562, "step": 5402 }, { "epoch": 0.3241735165296694, "grad_norm": 1.1715859174728394, "learning_rate": 5.528419120646851e-06, "loss": 0.4174, "step": 5403 }, { "epoch": 0.3242335153296934, "grad_norm": 1.2026116847991943, "learning_rate": 5.527864788770732e-06, "loss": 0.3658, "step": 5404 }, { "epoch": 0.3242935141297174, "grad_norm": 1.239917516708374, "learning_rate": 5.527310380311399e-06, "loss": 0.4537, "step": 5405 }, { "epoch": 0.3243535129297414, "grad_norm": 1.201049566268921, "learning_rate": 5.526755895289789e-06, "loss": 0.4279, "step": 5406 }, { "epoch": 0.3244135117297654, "grad_norm": 1.3497567176818848, "learning_rate": 5.5262013337268426e-06, "loss": 0.4881, "step": 5407 }, { "epoch": 0.3244735105297894, "grad_norm": 1.3266150951385498, "learning_rate": 5.525646695643503e-06, "loss": 0.5084, "step": 5408 }, { "epoch": 0.3245335093298134, "grad_norm": 1.2431904077529907, "learning_rate": 5.5250919810607165e-06, "loss": 0.4377, "step": 5409 }, { "epoch": 0.3245935081298374, "grad_norm": 1.2555586099624634, "learning_rate": 5.524537189999433e-06, "loss": 0.3899, "step": 5410 }, { "epoch": 0.3246535069298614, "grad_norm": 1.221761703491211, "learning_rate": 5.523982322480603e-06, "loss": 0.4433, "step": 5411 }, { "epoch": 0.3247135057298854, "grad_norm": 1.3493958711624146, "learning_rate": 5.523427378525182e-06, "loss": 0.4368, "step": 5412 }, { "epoch": 0.3247735045299094, "grad_norm": 1.2097095251083374, "learning_rate": 5.522872358154128e-06, "loss": 0.4617, "step": 5413 }, { "epoch": 0.3248335033299334, "grad_norm": 1.224642038345337, "learning_rate": 5.5223172613884005e-06, "loss": 0.42, "step": 5414 }, { "epoch": 0.3248935021299574, "grad_norm": 1.3574804067611694, "learning_rate": 5.521762088248963e-06, "loss": 0.4725, "step": 5415 }, { "epoch": 0.3249535009299814, "grad_norm": 1.1671777963638306, "learning_rate": 5.521206838756784e-06, "loss": 0.4445, "step": 5416 }, { "epoch": 0.3250134997300054, "grad_norm": 1.3312644958496094, "learning_rate": 5.520651512932831e-06, "loss": 0.4309, "step": 5417 }, { "epoch": 0.3250734985300294, "grad_norm": 1.1719316244125366, "learning_rate": 5.5200961107980765e-06, "loss": 0.4309, "step": 5418 }, { "epoch": 0.3251334973300534, "grad_norm": 1.3170114755630493, "learning_rate": 5.519540632373495e-06, "loss": 0.4017, "step": 5419 }, { "epoch": 0.3251934961300774, "grad_norm": 1.3345955610275269, "learning_rate": 5.518985077680066e-06, "loss": 0.4761, "step": 5420 }, { "epoch": 0.3252534949301014, "grad_norm": 1.2798469066619873, "learning_rate": 5.518429446738767e-06, "loss": 0.4632, "step": 5421 }, { "epoch": 0.3253134937301254, "grad_norm": 1.2360252141952515, "learning_rate": 5.517873739570586e-06, "loss": 0.4185, "step": 5422 }, { "epoch": 0.3253734925301494, "grad_norm": 1.2101881504058838, "learning_rate": 5.517317956196506e-06, "loss": 0.4129, "step": 5423 }, { "epoch": 0.3254334913301734, "grad_norm": 1.3396347761154175, "learning_rate": 5.516762096637517e-06, "loss": 0.439, "step": 5424 }, { "epoch": 0.3254934901301974, "grad_norm": 1.172681212425232, "learning_rate": 5.516206160914613e-06, "loss": 0.3796, "step": 5425 }, { "epoch": 0.3255534889302214, "grad_norm": 1.379462718963623, "learning_rate": 5.515650149048788e-06, "loss": 0.426, "step": 5426 }, { "epoch": 0.3256134877302454, "grad_norm": 1.1426186561584473, "learning_rate": 5.515094061061039e-06, "loss": 0.4067, "step": 5427 }, { "epoch": 0.3256734865302694, "grad_norm": 1.2032949924468994, "learning_rate": 5.514537896972371e-06, "loss": 0.3722, "step": 5428 }, { "epoch": 0.3257334853302934, "grad_norm": 1.1586226224899292, "learning_rate": 5.513981656803781e-06, "loss": 0.3825, "step": 5429 }, { "epoch": 0.3257934841303174, "grad_norm": 1.1981006860733032, "learning_rate": 5.513425340576282e-06, "loss": 0.4168, "step": 5430 }, { "epoch": 0.3258534829303414, "grad_norm": 1.2248241901397705, "learning_rate": 5.512868948310881e-06, "loss": 0.462, "step": 5431 }, { "epoch": 0.3259134817303654, "grad_norm": 1.1732707023620605, "learning_rate": 5.51231248002859e-06, "loss": 0.4428, "step": 5432 }, { "epoch": 0.3259734805303894, "grad_norm": 1.229981541633606, "learning_rate": 5.511755935750425e-06, "loss": 0.4426, "step": 5433 }, { "epoch": 0.3260334793304134, "grad_norm": 1.146209478378296, "learning_rate": 5.511199315497402e-06, "loss": 0.447, "step": 5434 }, { "epoch": 0.3260934781304374, "grad_norm": 1.3364027738571167, "learning_rate": 5.510642619290547e-06, "loss": 0.427, "step": 5435 }, { "epoch": 0.3261534769304614, "grad_norm": 1.3412142992019653, "learning_rate": 5.510085847150878e-06, "loss": 0.5147, "step": 5436 }, { "epoch": 0.32621347573048537, "grad_norm": 1.3183915615081787, "learning_rate": 5.509528999099426e-06, "loss": 0.4272, "step": 5437 }, { "epoch": 0.32627347453050937, "grad_norm": 1.2945752143859863, "learning_rate": 5.508972075157218e-06, "loss": 0.4234, "step": 5438 }, { "epoch": 0.32633347333053336, "grad_norm": 1.2804116010665894, "learning_rate": 5.508415075345288e-06, "loss": 0.4711, "step": 5439 }, { "epoch": 0.3263934721305574, "grad_norm": 1.3832449913024902, "learning_rate": 5.507857999684671e-06, "loss": 0.4342, "step": 5440 }, { "epoch": 0.3264534709305814, "grad_norm": 1.2658764123916626, "learning_rate": 5.507300848196405e-06, "loss": 0.4457, "step": 5441 }, { "epoch": 0.3265134697306054, "grad_norm": 1.2935727834701538, "learning_rate": 5.5067436209015315e-06, "loss": 0.4124, "step": 5442 }, { "epoch": 0.3265734685306294, "grad_norm": 1.4163134098052979, "learning_rate": 5.506186317821094e-06, "loss": 0.5086, "step": 5443 }, { "epoch": 0.3266334673306534, "grad_norm": 1.2952877283096313, "learning_rate": 5.50562893897614e-06, "loss": 0.4868, "step": 5444 }, { "epoch": 0.3266934661306774, "grad_norm": 1.2702723741531372, "learning_rate": 5.505071484387718e-06, "loss": 0.4481, "step": 5445 }, { "epoch": 0.3267534649307014, "grad_norm": 1.1009489297866821, "learning_rate": 5.504513954076881e-06, "loss": 0.4333, "step": 5446 }, { "epoch": 0.3268134637307254, "grad_norm": 1.207790732383728, "learning_rate": 5.503956348064684e-06, "loss": 0.374, "step": 5447 }, { "epoch": 0.3268734625307494, "grad_norm": 1.1097376346588135, "learning_rate": 5.503398666372188e-06, "loss": 0.4103, "step": 5448 }, { "epoch": 0.3269334613307734, "grad_norm": 1.235426425933838, "learning_rate": 5.502840909020449e-06, "loss": 0.4169, "step": 5449 }, { "epoch": 0.3269934601307974, "grad_norm": 1.0594804286956787, "learning_rate": 5.502283076030535e-06, "loss": 0.4209, "step": 5450 }, { "epoch": 0.3270534589308214, "grad_norm": 1.1662198305130005, "learning_rate": 5.501725167423512e-06, "loss": 0.4316, "step": 5451 }, { "epoch": 0.3271134577308454, "grad_norm": 1.379082441329956, "learning_rate": 5.501167183220449e-06, "loss": 0.4404, "step": 5452 }, { "epoch": 0.3271734565308694, "grad_norm": 1.2134785652160645, "learning_rate": 5.5006091234424185e-06, "loss": 0.4543, "step": 5453 }, { "epoch": 0.3272334553308934, "grad_norm": 1.274139642715454, "learning_rate": 5.500050988110497e-06, "loss": 0.4688, "step": 5454 }, { "epoch": 0.32729345413091737, "grad_norm": 1.204057216644287, "learning_rate": 5.4994927772457615e-06, "loss": 0.4342, "step": 5455 }, { "epoch": 0.32735345293094137, "grad_norm": 1.3025507926940918, "learning_rate": 5.4989344908692925e-06, "loss": 0.4004, "step": 5456 }, { "epoch": 0.32741345173096537, "grad_norm": 1.156001091003418, "learning_rate": 5.498376129002176e-06, "loss": 0.4679, "step": 5457 }, { "epoch": 0.32747345053098936, "grad_norm": 1.1985377073287964, "learning_rate": 5.497817691665497e-06, "loss": 0.4055, "step": 5458 }, { "epoch": 0.32753344933101336, "grad_norm": 1.2959662675857544, "learning_rate": 5.4972591788803466e-06, "loss": 0.4795, "step": 5459 }, { "epoch": 0.32759344813103736, "grad_norm": 1.2355406284332275, "learning_rate": 5.496700590667815e-06, "loss": 0.4635, "step": 5460 }, { "epoch": 0.32765344693106135, "grad_norm": 1.1853083372116089, "learning_rate": 5.496141927049002e-06, "loss": 0.3977, "step": 5461 }, { "epoch": 0.32771344573108535, "grad_norm": 1.278133749961853, "learning_rate": 5.495583188044999e-06, "loss": 0.426, "step": 5462 }, { "epoch": 0.3277734445311094, "grad_norm": 1.4101812839508057, "learning_rate": 5.495024373676913e-06, "loss": 0.4584, "step": 5463 }, { "epoch": 0.3278334433311334, "grad_norm": 1.3455839157104492, "learning_rate": 5.494465483965845e-06, "loss": 0.4018, "step": 5464 }, { "epoch": 0.3278934421311574, "grad_norm": 1.2045562267303467, "learning_rate": 5.493906518932904e-06, "loss": 0.451, "step": 5465 }, { "epoch": 0.3279534409311814, "grad_norm": 1.3053854703903198, "learning_rate": 5.493347478599196e-06, "loss": 0.4353, "step": 5466 }, { "epoch": 0.3280134397312054, "grad_norm": 1.3880369663238525, "learning_rate": 5.492788362985838e-06, "loss": 0.4294, "step": 5467 }, { "epoch": 0.3280734385312294, "grad_norm": 1.2618259191513062, "learning_rate": 5.4922291721139395e-06, "loss": 0.4508, "step": 5468 }, { "epoch": 0.3281334373312534, "grad_norm": 1.3135653734207153, "learning_rate": 5.491669906004623e-06, "loss": 0.4624, "step": 5469 }, { "epoch": 0.3281934361312774, "grad_norm": 1.2700812816619873, "learning_rate": 5.491110564679009e-06, "loss": 0.4235, "step": 5470 }, { "epoch": 0.3282534349313014, "grad_norm": 1.445703387260437, "learning_rate": 5.49055114815822e-06, "loss": 0.433, "step": 5471 }, { "epoch": 0.3283134337313254, "grad_norm": 1.2579575777053833, "learning_rate": 5.489991656463382e-06, "loss": 0.4583, "step": 5472 }, { "epoch": 0.3283734325313494, "grad_norm": 1.3206448554992676, "learning_rate": 5.489432089615627e-06, "loss": 0.5212, "step": 5473 }, { "epoch": 0.32843343133137337, "grad_norm": 1.2614234685897827, "learning_rate": 5.488872447636085e-06, "loss": 0.3887, "step": 5474 }, { "epoch": 0.32849343013139737, "grad_norm": 1.1917316913604736, "learning_rate": 5.488312730545892e-06, "loss": 0.4164, "step": 5475 }, { "epoch": 0.32855342893142137, "grad_norm": 1.3723838329315186, "learning_rate": 5.487752938366186e-06, "loss": 0.4104, "step": 5476 }, { "epoch": 0.32861342773144536, "grad_norm": 1.1086827516555786, "learning_rate": 5.487193071118108e-06, "loss": 0.4154, "step": 5477 }, { "epoch": 0.32867342653146936, "grad_norm": 1.2478581666946411, "learning_rate": 5.486633128822802e-06, "loss": 0.3921, "step": 5478 }, { "epoch": 0.32873342533149336, "grad_norm": 1.4095311164855957, "learning_rate": 5.486073111501412e-06, "loss": 0.4028, "step": 5479 }, { "epoch": 0.32879342413151735, "grad_norm": 1.3579585552215576, "learning_rate": 5.48551301917509e-06, "loss": 0.4404, "step": 5480 }, { "epoch": 0.32885342293154135, "grad_norm": 1.205889344215393, "learning_rate": 5.484952851864987e-06, "loss": 0.4078, "step": 5481 }, { "epoch": 0.32891342173156535, "grad_norm": 1.2201740741729736, "learning_rate": 5.484392609592259e-06, "loss": 0.3861, "step": 5482 }, { "epoch": 0.32897342053158934, "grad_norm": 1.228109359741211, "learning_rate": 5.483832292378062e-06, "loss": 0.4449, "step": 5483 }, { "epoch": 0.32903341933161334, "grad_norm": 1.1503535509109497, "learning_rate": 5.483271900243559e-06, "loss": 0.3981, "step": 5484 }, { "epoch": 0.3290934181316374, "grad_norm": 1.1121333837509155, "learning_rate": 5.48271143320991e-06, "loss": 0.406, "step": 5485 }, { "epoch": 0.3291534169316614, "grad_norm": 1.151419997215271, "learning_rate": 5.482150891298285e-06, "loss": 0.389, "step": 5486 }, { "epoch": 0.3292134157316854, "grad_norm": 1.225793480873108, "learning_rate": 5.481590274529851e-06, "loss": 0.4631, "step": 5487 }, { "epoch": 0.3292734145317094, "grad_norm": 1.189057469367981, "learning_rate": 5.4810295829257796e-06, "loss": 0.4239, "step": 5488 }, { "epoch": 0.3293334133317334, "grad_norm": 1.4019063711166382, "learning_rate": 5.480468816507247e-06, "loss": 0.4351, "step": 5489 }, { "epoch": 0.3293934121317574, "grad_norm": 1.4144337177276611, "learning_rate": 5.479907975295432e-06, "loss": 0.5173, "step": 5490 }, { "epoch": 0.3294534109317814, "grad_norm": 1.3183643817901611, "learning_rate": 5.479347059311511e-06, "loss": 0.4131, "step": 5491 }, { "epoch": 0.3295134097318054, "grad_norm": 1.1880273818969727, "learning_rate": 5.47878606857667e-06, "loss": 0.4828, "step": 5492 }, { "epoch": 0.32957340853182937, "grad_norm": 1.2717864513397217, "learning_rate": 5.478225003112094e-06, "loss": 0.4519, "step": 5493 }, { "epoch": 0.32963340733185337, "grad_norm": 1.2780224084854126, "learning_rate": 5.477663862938974e-06, "loss": 0.4218, "step": 5494 }, { "epoch": 0.32969340613187736, "grad_norm": 1.3288376331329346, "learning_rate": 5.477102648078499e-06, "loss": 0.4583, "step": 5495 }, { "epoch": 0.32975340493190136, "grad_norm": 1.2512314319610596, "learning_rate": 5.476541358551865e-06, "loss": 0.4051, "step": 5496 }, { "epoch": 0.32981340373192536, "grad_norm": 1.1545695066452026, "learning_rate": 5.4759799943802685e-06, "loss": 0.4099, "step": 5497 }, { "epoch": 0.32987340253194936, "grad_norm": 1.1793087720870972, "learning_rate": 5.475418555584911e-06, "loss": 0.3619, "step": 5498 }, { "epoch": 0.32993340133197335, "grad_norm": 1.3844681978225708, "learning_rate": 5.474857042186993e-06, "loss": 0.3846, "step": 5499 }, { "epoch": 0.32999340013199735, "grad_norm": 1.3204569816589355, "learning_rate": 5.474295454207723e-06, "loss": 0.4658, "step": 5500 }, { "epoch": 0.33005339893202135, "grad_norm": 1.3310930728912354, "learning_rate": 5.473733791668308e-06, "loss": 0.4565, "step": 5501 }, { "epoch": 0.33011339773204534, "grad_norm": 1.382498025894165, "learning_rate": 5.473172054589961e-06, "loss": 0.4896, "step": 5502 }, { "epoch": 0.33017339653206934, "grad_norm": 1.3045397996902466, "learning_rate": 5.472610242993895e-06, "loss": 0.4606, "step": 5503 }, { "epoch": 0.33023339533209334, "grad_norm": 1.2431306838989258, "learning_rate": 5.472048356901328e-06, "loss": 0.4301, "step": 5504 }, { "epoch": 0.33029339413211734, "grad_norm": 1.2149200439453125, "learning_rate": 5.471486396333477e-06, "loss": 0.4776, "step": 5505 }, { "epoch": 0.33035339293214133, "grad_norm": 1.2388548851013184, "learning_rate": 5.470924361311569e-06, "loss": 0.4374, "step": 5506 }, { "epoch": 0.33041339173216533, "grad_norm": 1.3111376762390137, "learning_rate": 5.470362251856827e-06, "loss": 0.4455, "step": 5507 }, { "epoch": 0.3304733905321894, "grad_norm": 1.3888740539550781, "learning_rate": 5.46980006799048e-06, "loss": 0.479, "step": 5508 }, { "epoch": 0.3305333893322134, "grad_norm": 1.385941982269287, "learning_rate": 5.469237809733757e-06, "loss": 0.4424, "step": 5509 }, { "epoch": 0.3305933881322374, "grad_norm": 1.2718397378921509, "learning_rate": 5.468675477107895e-06, "loss": 0.41, "step": 5510 }, { "epoch": 0.3306533869322614, "grad_norm": 1.1516733169555664, "learning_rate": 5.468113070134129e-06, "loss": 0.4485, "step": 5511 }, { "epoch": 0.33071338573228537, "grad_norm": 1.2885832786560059, "learning_rate": 5.4675505888337e-06, "loss": 0.4464, "step": 5512 }, { "epoch": 0.33077338453230937, "grad_norm": 1.1684705018997192, "learning_rate": 5.466988033227849e-06, "loss": 0.424, "step": 5513 }, { "epoch": 0.33083338333233336, "grad_norm": 1.3335951566696167, "learning_rate": 5.46642540333782e-06, "loss": 0.4697, "step": 5514 }, { "epoch": 0.33089338213235736, "grad_norm": 1.2214187383651733, "learning_rate": 5.465862699184865e-06, "loss": 0.4042, "step": 5515 }, { "epoch": 0.33095338093238136, "grad_norm": 1.2378947734832764, "learning_rate": 5.46529992079023e-06, "loss": 0.4761, "step": 5516 }, { "epoch": 0.33101337973240536, "grad_norm": 1.207586407661438, "learning_rate": 5.464737068175172e-06, "loss": 0.4097, "step": 5517 }, { "epoch": 0.33107337853242935, "grad_norm": 1.2951924800872803, "learning_rate": 5.464174141360946e-06, "loss": 0.4636, "step": 5518 }, { "epoch": 0.33113337733245335, "grad_norm": 1.1396726369857788, "learning_rate": 5.463611140368813e-06, "loss": 0.4364, "step": 5519 }, { "epoch": 0.33119337613247735, "grad_norm": 1.3018442392349243, "learning_rate": 5.4630480652200314e-06, "loss": 0.4136, "step": 5520 }, { "epoch": 0.33125337493250134, "grad_norm": 1.1990307569503784, "learning_rate": 5.462484915935869e-06, "loss": 0.4088, "step": 5521 }, { "epoch": 0.33131337373252534, "grad_norm": 1.2884618043899536, "learning_rate": 5.461921692537592e-06, "loss": 0.448, "step": 5522 }, { "epoch": 0.33137337253254934, "grad_norm": 1.3025017976760864, "learning_rate": 5.4613583950464715e-06, "loss": 0.4678, "step": 5523 }, { "epoch": 0.33143337133257333, "grad_norm": 1.192892074584961, "learning_rate": 5.46079502348378e-06, "loss": 0.4478, "step": 5524 }, { "epoch": 0.33149337013259733, "grad_norm": 1.2561343908309937, "learning_rate": 5.460231577870794e-06, "loss": 0.4302, "step": 5525 }, { "epoch": 0.33155336893262133, "grad_norm": 1.3087414503097534, "learning_rate": 5.459668058228792e-06, "loss": 0.4513, "step": 5526 }, { "epoch": 0.3316133677326453, "grad_norm": 1.1919872760772705, "learning_rate": 5.459104464579056e-06, "loss": 0.4431, "step": 5527 }, { "epoch": 0.3316733665326693, "grad_norm": 1.2225594520568848, "learning_rate": 5.45854079694287e-06, "loss": 0.3827, "step": 5528 }, { "epoch": 0.3317333653326933, "grad_norm": 1.2266898155212402, "learning_rate": 5.457977055341521e-06, "loss": 0.4212, "step": 5529 }, { "epoch": 0.33179336413271737, "grad_norm": 1.2630209922790527, "learning_rate": 5.457413239796299e-06, "loss": 0.4756, "step": 5530 }, { "epoch": 0.33185336293274137, "grad_norm": 1.2730098962783813, "learning_rate": 5.456849350328498e-06, "loss": 0.4087, "step": 5531 }, { "epoch": 0.33191336173276537, "grad_norm": 1.3714418411254883, "learning_rate": 5.45628538695941e-06, "loss": 0.4531, "step": 5532 }, { "epoch": 0.33197336053278936, "grad_norm": 1.2470320463180542, "learning_rate": 5.4557213497103375e-06, "loss": 0.4215, "step": 5533 }, { "epoch": 0.33203335933281336, "grad_norm": 1.282531976699829, "learning_rate": 5.455157238602579e-06, "loss": 0.4374, "step": 5534 }, { "epoch": 0.33209335813283736, "grad_norm": 1.2650699615478516, "learning_rate": 5.45459305365744e-06, "loss": 0.4328, "step": 5535 }, { "epoch": 0.33215335693286135, "grad_norm": 1.288156509399414, "learning_rate": 5.454028794896227e-06, "loss": 0.4597, "step": 5536 }, { "epoch": 0.33221335573288535, "grad_norm": 1.3682377338409424, "learning_rate": 5.4534644623402485e-06, "loss": 0.4242, "step": 5537 }, { "epoch": 0.33227335453290935, "grad_norm": 1.3633626699447632, "learning_rate": 5.452900056010816e-06, "loss": 0.4484, "step": 5538 }, { "epoch": 0.33233335333293335, "grad_norm": 1.4222180843353271, "learning_rate": 5.452335575929247e-06, "loss": 0.3929, "step": 5539 }, { "epoch": 0.33239335213295734, "grad_norm": 1.4315319061279297, "learning_rate": 5.451771022116857e-06, "loss": 0.439, "step": 5540 }, { "epoch": 0.33245335093298134, "grad_norm": 1.1804070472717285, "learning_rate": 5.451206394594969e-06, "loss": 0.4295, "step": 5541 }, { "epoch": 0.33251334973300534, "grad_norm": 1.433969259262085, "learning_rate": 5.450641693384905e-06, "loss": 0.4939, "step": 5542 }, { "epoch": 0.33257334853302933, "grad_norm": 1.2924494743347168, "learning_rate": 5.450076918507991e-06, "loss": 0.4457, "step": 5543 }, { "epoch": 0.33263334733305333, "grad_norm": 1.1027570962905884, "learning_rate": 5.4495120699855564e-06, "loss": 0.4454, "step": 5544 }, { "epoch": 0.33269334613307733, "grad_norm": 1.1094743013381958, "learning_rate": 5.448947147838932e-06, "loss": 0.4168, "step": 5545 }, { "epoch": 0.3327533449331013, "grad_norm": 1.311375617980957, "learning_rate": 5.448382152089454e-06, "loss": 0.4589, "step": 5546 }, { "epoch": 0.3328133437331253, "grad_norm": 1.2253730297088623, "learning_rate": 5.447817082758459e-06, "loss": 0.459, "step": 5547 }, { "epoch": 0.3328733425331493, "grad_norm": 1.2056901454925537, "learning_rate": 5.447251939867288e-06, "loss": 0.4595, "step": 5548 }, { "epoch": 0.3329333413331733, "grad_norm": 1.2723480463027954, "learning_rate": 5.446686723437282e-06, "loss": 0.4521, "step": 5549 }, { "epoch": 0.3329933401331973, "grad_norm": 1.187370777130127, "learning_rate": 5.446121433489787e-06, "loss": 0.4396, "step": 5550 }, { "epoch": 0.3330533389332213, "grad_norm": 1.2532199621200562, "learning_rate": 5.445556070046152e-06, "loss": 0.4619, "step": 5551 }, { "epoch": 0.33311333773324536, "grad_norm": 1.1197155714035034, "learning_rate": 5.44499063312773e-06, "loss": 0.3854, "step": 5552 }, { "epoch": 0.33317333653326936, "grad_norm": 1.2804169654846191, "learning_rate": 5.44442512275587e-06, "loss": 0.3995, "step": 5553 }, { "epoch": 0.33323333533329336, "grad_norm": 1.2427140474319458, "learning_rate": 5.443859538951934e-06, "loss": 0.436, "step": 5554 }, { "epoch": 0.33329333413331735, "grad_norm": 1.3264360427856445, "learning_rate": 5.44329388173728e-06, "loss": 0.4494, "step": 5555 }, { "epoch": 0.33335333293334135, "grad_norm": 1.2702122926712036, "learning_rate": 5.442728151133268e-06, "loss": 0.3897, "step": 5556 }, { "epoch": 0.33341333173336535, "grad_norm": 1.1692701578140259, "learning_rate": 5.442162347161265e-06, "loss": 0.414, "step": 5557 }, { "epoch": 0.33347333053338934, "grad_norm": 1.393707036972046, "learning_rate": 5.44159646984264e-06, "loss": 0.4816, "step": 5558 }, { "epoch": 0.33353332933341334, "grad_norm": 1.3037062883377075, "learning_rate": 5.441030519198761e-06, "loss": 0.4087, "step": 5559 }, { "epoch": 0.33359332813343734, "grad_norm": 1.1318480968475342, "learning_rate": 5.4404644952510024e-06, "loss": 0.4236, "step": 5560 }, { "epoch": 0.33365332693346134, "grad_norm": 1.3664900064468384, "learning_rate": 5.439898398020741e-06, "loss": 0.4484, "step": 5561 }, { "epoch": 0.33371332573348533, "grad_norm": 1.3951549530029297, "learning_rate": 5.439332227529356e-06, "loss": 0.484, "step": 5562 }, { "epoch": 0.33377332453350933, "grad_norm": 1.2723138332366943, "learning_rate": 5.438765983798227e-06, "loss": 0.4223, "step": 5563 }, { "epoch": 0.3338333233335333, "grad_norm": 1.2131617069244385, "learning_rate": 5.4381996668487416e-06, "loss": 0.4628, "step": 5564 }, { "epoch": 0.3338933221335573, "grad_norm": 1.1828926801681519, "learning_rate": 5.437633276702283e-06, "loss": 0.4431, "step": 5565 }, { "epoch": 0.3339533209335813, "grad_norm": 1.3021944761276245, "learning_rate": 5.437066813380245e-06, "loss": 0.5047, "step": 5566 }, { "epoch": 0.3340133197336053, "grad_norm": 1.2546814680099487, "learning_rate": 5.436500276904018e-06, "loss": 0.4326, "step": 5567 }, { "epoch": 0.3340733185336293, "grad_norm": 1.0944348573684692, "learning_rate": 5.435933667294999e-06, "loss": 0.3584, "step": 5568 }, { "epoch": 0.3341333173336533, "grad_norm": 1.1201435327529907, "learning_rate": 5.435366984574584e-06, "loss": 0.3749, "step": 5569 }, { "epoch": 0.3341933161336773, "grad_norm": 1.1763709783554077, "learning_rate": 5.434800228764177e-06, "loss": 0.4451, "step": 5570 }, { "epoch": 0.3342533149337013, "grad_norm": 1.3261746168136597, "learning_rate": 5.434233399885179e-06, "loss": 0.4156, "step": 5571 }, { "epoch": 0.3343133137337253, "grad_norm": 1.1862637996673584, "learning_rate": 5.433666497958999e-06, "loss": 0.3927, "step": 5572 }, { "epoch": 0.3343733125337493, "grad_norm": 1.2560523748397827, "learning_rate": 5.433099523007045e-06, "loss": 0.4268, "step": 5573 }, { "epoch": 0.3344333113337733, "grad_norm": 1.126978874206543, "learning_rate": 5.432532475050728e-06, "loss": 0.3848, "step": 5574 }, { "epoch": 0.33449331013379735, "grad_norm": 1.3148101568222046, "learning_rate": 5.4319653541114654e-06, "loss": 0.4032, "step": 5575 }, { "epoch": 0.33455330893382135, "grad_norm": 1.2608338594436646, "learning_rate": 5.431398160210673e-06, "loss": 0.4391, "step": 5576 }, { "epoch": 0.33461330773384534, "grad_norm": 1.1964643001556396, "learning_rate": 5.430830893369771e-06, "loss": 0.4228, "step": 5577 }, { "epoch": 0.33467330653386934, "grad_norm": 1.194954514503479, "learning_rate": 5.430263553610184e-06, "loss": 0.4008, "step": 5578 }, { "epoch": 0.33473330533389334, "grad_norm": 1.3175727128982544, "learning_rate": 5.4296961409533354e-06, "loss": 0.4475, "step": 5579 }, { "epoch": 0.33479330413391734, "grad_norm": 1.1150388717651367, "learning_rate": 5.429128655420656e-06, "loss": 0.4034, "step": 5580 }, { "epoch": 0.33485330293394133, "grad_norm": 1.2450529336929321, "learning_rate": 5.428561097033577e-06, "loss": 0.3973, "step": 5581 }, { "epoch": 0.33491330173396533, "grad_norm": 1.1922739744186401, "learning_rate": 5.427993465813531e-06, "loss": 0.4311, "step": 5582 }, { "epoch": 0.3349733005339893, "grad_norm": 1.848633885383606, "learning_rate": 5.427425761781955e-06, "loss": 0.4353, "step": 5583 }, { "epoch": 0.3350332993340133, "grad_norm": 1.2938144207000732, "learning_rate": 5.426857984960291e-06, "loss": 0.4515, "step": 5584 }, { "epoch": 0.3350932981340373, "grad_norm": 1.2712770700454712, "learning_rate": 5.426290135369979e-06, "loss": 0.4095, "step": 5585 }, { "epoch": 0.3351532969340613, "grad_norm": 1.290425419807434, "learning_rate": 5.425722213032465e-06, "loss": 0.4225, "step": 5586 }, { "epoch": 0.3352132957340853, "grad_norm": 1.2155903577804565, "learning_rate": 5.425154217969196e-06, "loss": 0.4217, "step": 5587 }, { "epoch": 0.3352732945341093, "grad_norm": 1.4003562927246094, "learning_rate": 5.424586150201623e-06, "loss": 0.4502, "step": 5588 }, { "epoch": 0.3353332933341333, "grad_norm": 1.2823377847671509, "learning_rate": 5.4240180097512e-06, "loss": 0.4239, "step": 5589 }, { "epoch": 0.3353932921341573, "grad_norm": 1.2353076934814453, "learning_rate": 5.423449796639383e-06, "loss": 0.4414, "step": 5590 }, { "epoch": 0.3354532909341813, "grad_norm": 1.400812029838562, "learning_rate": 5.422881510887629e-06, "loss": 0.4168, "step": 5591 }, { "epoch": 0.3355132897342053, "grad_norm": 1.3565667867660522, "learning_rate": 5.422313152517402e-06, "loss": 0.4374, "step": 5592 }, { "epoch": 0.3355732885342293, "grad_norm": 1.436267614364624, "learning_rate": 5.421744721550165e-06, "loss": 0.472, "step": 5593 }, { "epoch": 0.3356332873342533, "grad_norm": 1.2211252450942993, "learning_rate": 5.421176218007386e-06, "loss": 0.4273, "step": 5594 }, { "epoch": 0.3356932861342773, "grad_norm": 1.328229308128357, "learning_rate": 5.420607641910534e-06, "loss": 0.44, "step": 5595 }, { "epoch": 0.3357532849343013, "grad_norm": 1.194914698600769, "learning_rate": 5.42003899328108e-06, "loss": 0.4361, "step": 5596 }, { "epoch": 0.33581328373432534, "grad_norm": 1.3684648275375366, "learning_rate": 5.4194702721405025e-06, "loss": 0.4248, "step": 5597 }, { "epoch": 0.33587328253434934, "grad_norm": 1.3081488609313965, "learning_rate": 5.418901478510278e-06, "loss": 0.4356, "step": 5598 }, { "epoch": 0.33593328133437333, "grad_norm": 1.3362529277801514, "learning_rate": 5.418332612411886e-06, "loss": 0.4764, "step": 5599 }, { "epoch": 0.33599328013439733, "grad_norm": 1.3023016452789307, "learning_rate": 5.417763673866812e-06, "loss": 0.4523, "step": 5600 }, { "epoch": 0.33605327893442133, "grad_norm": 1.3191745281219482, "learning_rate": 5.41719466289654e-06, "loss": 0.4125, "step": 5601 }, { "epoch": 0.3361132777344453, "grad_norm": 1.2026234865188599, "learning_rate": 5.416625579522562e-06, "loss": 0.4531, "step": 5602 }, { "epoch": 0.3361732765344693, "grad_norm": 1.2802534103393555, "learning_rate": 5.416056423766368e-06, "loss": 0.4392, "step": 5603 }, { "epoch": 0.3362332753344933, "grad_norm": 1.2643662691116333, "learning_rate": 5.415487195649452e-06, "loss": 0.3897, "step": 5604 }, { "epoch": 0.3362932741345173, "grad_norm": 1.3728580474853516, "learning_rate": 5.414917895193311e-06, "loss": 0.4255, "step": 5605 }, { "epoch": 0.3363532729345413, "grad_norm": 1.1191266775131226, "learning_rate": 5.414348522419446e-06, "loss": 0.4087, "step": 5606 }, { "epoch": 0.3364132717345653, "grad_norm": 1.237009882926941, "learning_rate": 5.413779077349359e-06, "loss": 0.4528, "step": 5607 }, { "epoch": 0.3364732705345893, "grad_norm": 1.4094972610473633, "learning_rate": 5.4132095600045546e-06, "loss": 0.5074, "step": 5608 }, { "epoch": 0.3365332693346133, "grad_norm": 1.3764270544052124, "learning_rate": 5.4126399704065415e-06, "loss": 0.495, "step": 5609 }, { "epoch": 0.3365932681346373, "grad_norm": 1.3076571226119995, "learning_rate": 5.4120703085768325e-06, "loss": 0.4252, "step": 5610 }, { "epoch": 0.3366532669346613, "grad_norm": 1.2426388263702393, "learning_rate": 5.411500574536938e-06, "loss": 0.4309, "step": 5611 }, { "epoch": 0.3367132657346853, "grad_norm": 1.268135666847229, "learning_rate": 5.410930768308377e-06, "loss": 0.4172, "step": 5612 }, { "epoch": 0.3367732645347093, "grad_norm": 1.3001610040664673, "learning_rate": 5.410360889912665e-06, "loss": 0.426, "step": 5613 }, { "epoch": 0.3368332633347333, "grad_norm": 1.3180325031280518, "learning_rate": 5.409790939371326e-06, "loss": 0.445, "step": 5614 }, { "epoch": 0.3368932621347573, "grad_norm": 1.0941466093063354, "learning_rate": 5.409220916705886e-06, "loss": 0.4368, "step": 5615 }, { "epoch": 0.3369532609347813, "grad_norm": 1.141175389289856, "learning_rate": 5.408650821937869e-06, "loss": 0.4383, "step": 5616 }, { "epoch": 0.3370132597348053, "grad_norm": 1.2474514245986938, "learning_rate": 5.408080655088806e-06, "loss": 0.4715, "step": 5617 }, { "epoch": 0.3370732585348293, "grad_norm": 1.130774736404419, "learning_rate": 5.407510416180232e-06, "loss": 0.409, "step": 5618 }, { "epoch": 0.3371332573348533, "grad_norm": 1.3130825757980347, "learning_rate": 5.4069401052336786e-06, "loss": 0.4407, "step": 5619 }, { "epoch": 0.33719325613487733, "grad_norm": 1.1868582963943481, "learning_rate": 5.406369722270684e-06, "loss": 0.4, "step": 5620 }, { "epoch": 0.3372532549349013, "grad_norm": 1.1295086145401, "learning_rate": 5.405799267312792e-06, "loss": 0.4342, "step": 5621 }, { "epoch": 0.3373132537349253, "grad_norm": 1.242434024810791, "learning_rate": 5.405228740381545e-06, "loss": 0.4211, "step": 5622 }, { "epoch": 0.3373732525349493, "grad_norm": 1.1889578104019165, "learning_rate": 5.404658141498488e-06, "loss": 0.4497, "step": 5623 }, { "epoch": 0.3374332513349733, "grad_norm": 1.1466314792633057, "learning_rate": 5.404087470685171e-06, "loss": 0.4025, "step": 5624 }, { "epoch": 0.3374932501349973, "grad_norm": 1.1295077800750732, "learning_rate": 5.403516727963144e-06, "loss": 0.4464, "step": 5625 }, { "epoch": 0.3375532489350213, "grad_norm": 1.3274930715560913, "learning_rate": 5.402945913353963e-06, "loss": 0.4835, "step": 5626 }, { "epoch": 0.3376132477350453, "grad_norm": 1.2019528150558472, "learning_rate": 5.4023750268791854e-06, "loss": 0.4193, "step": 5627 }, { "epoch": 0.3376732465350693, "grad_norm": 1.2277255058288574, "learning_rate": 5.40180406856037e-06, "loss": 0.4498, "step": 5628 }, { "epoch": 0.3377332453350933, "grad_norm": 1.229170799255371, "learning_rate": 5.4012330384190796e-06, "loss": 0.4396, "step": 5629 }, { "epoch": 0.3377932441351173, "grad_norm": 1.1810641288757324, "learning_rate": 5.400661936476879e-06, "loss": 0.4363, "step": 5630 }, { "epoch": 0.3378532429351413, "grad_norm": 1.2854948043823242, "learning_rate": 5.400090762755337e-06, "loss": 0.3981, "step": 5631 }, { "epoch": 0.3379132417351653, "grad_norm": 1.2551392316818237, "learning_rate": 5.399519517276023e-06, "loss": 0.4219, "step": 5632 }, { "epoch": 0.3379732405351893, "grad_norm": 1.335640549659729, "learning_rate": 5.398948200060511e-06, "loss": 0.4422, "step": 5633 }, { "epoch": 0.3380332393352133, "grad_norm": 1.351119875907898, "learning_rate": 5.3983768111303775e-06, "loss": 0.4261, "step": 5634 }, { "epoch": 0.3380932381352373, "grad_norm": 1.2543425559997559, "learning_rate": 5.397805350507201e-06, "loss": 0.4381, "step": 5635 }, { "epoch": 0.3381532369352613, "grad_norm": 1.3671250343322754, "learning_rate": 5.397233818212562e-06, "loss": 0.4545, "step": 5636 }, { "epoch": 0.3382132357352853, "grad_norm": 1.2515431642532349, "learning_rate": 5.396662214268046e-06, "loss": 0.4292, "step": 5637 }, { "epoch": 0.3382732345353093, "grad_norm": 1.1623775959014893, "learning_rate": 5.396090538695239e-06, "loss": 0.4173, "step": 5638 }, { "epoch": 0.33833323333533327, "grad_norm": 1.2493826150894165, "learning_rate": 5.395518791515731e-06, "loss": 0.4296, "step": 5639 }, { "epoch": 0.33839323213535727, "grad_norm": 1.2999013662338257, "learning_rate": 5.3949469727511144e-06, "loss": 0.4108, "step": 5640 }, { "epoch": 0.33845323093538127, "grad_norm": 1.1930022239685059, "learning_rate": 5.394375082422984e-06, "loss": 0.441, "step": 5641 }, { "epoch": 0.3385132297354053, "grad_norm": 1.3466479778289795, "learning_rate": 5.393803120552937e-06, "loss": 0.4556, "step": 5642 }, { "epoch": 0.3385732285354293, "grad_norm": 1.3949525356292725, "learning_rate": 5.393231087162575e-06, "loss": 0.4595, "step": 5643 }, { "epoch": 0.3386332273354533, "grad_norm": 1.2756539583206177, "learning_rate": 5.392658982273501e-06, "loss": 0.4237, "step": 5644 }, { "epoch": 0.3386932261354773, "grad_norm": 1.2862509489059448, "learning_rate": 5.392086805907319e-06, "loss": 0.3994, "step": 5645 }, { "epoch": 0.3387532249355013, "grad_norm": 1.2529202699661255, "learning_rate": 5.39151455808564e-06, "loss": 0.4339, "step": 5646 }, { "epoch": 0.3388132237355253, "grad_norm": 1.179232120513916, "learning_rate": 5.390942238830072e-06, "loss": 0.4427, "step": 5647 }, { "epoch": 0.3388732225355493, "grad_norm": 1.0956952571868896, "learning_rate": 5.390369848162231e-06, "loss": 0.4268, "step": 5648 }, { "epoch": 0.3389332213355733, "grad_norm": 1.3995298147201538, "learning_rate": 5.389797386103734e-06, "loss": 0.3818, "step": 5649 }, { "epoch": 0.3389932201355973, "grad_norm": 1.1180511713027954, "learning_rate": 5.3892248526762e-06, "loss": 0.3978, "step": 5650 }, { "epoch": 0.3390532189356213, "grad_norm": 1.3578643798828125, "learning_rate": 5.38865224790125e-06, "loss": 0.4058, "step": 5651 }, { "epoch": 0.3391132177356453, "grad_norm": 1.2807817459106445, "learning_rate": 5.38807957180051e-06, "loss": 0.4318, "step": 5652 }, { "epoch": 0.3391732165356693, "grad_norm": 1.2390958070755005, "learning_rate": 5.387506824395606e-06, "loss": 0.4454, "step": 5653 }, { "epoch": 0.3392332153356933, "grad_norm": 1.3740309476852417, "learning_rate": 5.386934005708168e-06, "loss": 0.4779, "step": 5654 }, { "epoch": 0.3392932141357173, "grad_norm": 1.137739658355713, "learning_rate": 5.38636111575983e-06, "loss": 0.368, "step": 5655 }, { "epoch": 0.3393532129357413, "grad_norm": 1.186165452003479, "learning_rate": 5.385788154572227e-06, "loss": 0.4025, "step": 5656 }, { "epoch": 0.3394132117357653, "grad_norm": 1.2533923387527466, "learning_rate": 5.385215122166998e-06, "loss": 0.4384, "step": 5657 }, { "epoch": 0.33947321053578927, "grad_norm": 1.2309582233428955, "learning_rate": 5.384642018565781e-06, "loss": 0.4352, "step": 5658 }, { "epoch": 0.33953320933581327, "grad_norm": 1.2227065563201904, "learning_rate": 5.384068843790223e-06, "loss": 0.4138, "step": 5659 }, { "epoch": 0.33959320813583727, "grad_norm": 1.314208745956421, "learning_rate": 5.383495597861968e-06, "loss": 0.4515, "step": 5660 }, { "epoch": 0.33965320693586126, "grad_norm": 1.3528472185134888, "learning_rate": 5.3829222808026655e-06, "loss": 0.4187, "step": 5661 }, { "epoch": 0.33971320573588526, "grad_norm": 1.1807560920715332, "learning_rate": 5.382348892633967e-06, "loss": 0.3928, "step": 5662 }, { "epoch": 0.33977320453590926, "grad_norm": 1.3414613008499146, "learning_rate": 5.381775433377527e-06, "loss": 0.4972, "step": 5663 }, { "epoch": 0.33983320333593325, "grad_norm": 1.1961524486541748, "learning_rate": 5.381201903055001e-06, "loss": 0.412, "step": 5664 }, { "epoch": 0.3398932021359573, "grad_norm": 1.30490243434906, "learning_rate": 5.3806283016880515e-06, "loss": 0.4508, "step": 5665 }, { "epoch": 0.3399532009359813, "grad_norm": 1.219170331954956, "learning_rate": 5.380054629298338e-06, "loss": 0.3791, "step": 5666 }, { "epoch": 0.3400131997360053, "grad_norm": 1.2078843116760254, "learning_rate": 5.379480885907528e-06, "loss": 0.4719, "step": 5667 }, { "epoch": 0.3400731985360293, "grad_norm": 1.364518165588379, "learning_rate": 5.378907071537286e-06, "loss": 0.4662, "step": 5668 }, { "epoch": 0.3401331973360533, "grad_norm": 1.4001153707504272, "learning_rate": 5.3783331862092855e-06, "loss": 0.4485, "step": 5669 }, { "epoch": 0.3401931961360773, "grad_norm": 1.2790740728378296, "learning_rate": 5.377759229945198e-06, "loss": 0.4506, "step": 5670 }, { "epoch": 0.3402531949361013, "grad_norm": 1.1800756454467773, "learning_rate": 5.3771852027667e-06, "loss": 0.3748, "step": 5671 }, { "epoch": 0.3403131937361253, "grad_norm": 1.0803824663162231, "learning_rate": 5.376611104695468e-06, "loss": 0.3778, "step": 5672 }, { "epoch": 0.3403731925361493, "grad_norm": 1.2068307399749756, "learning_rate": 5.376036935753185e-06, "loss": 0.4162, "step": 5673 }, { "epoch": 0.3404331913361733, "grad_norm": 1.3259668350219727, "learning_rate": 5.3754626959615335e-06, "loss": 0.4819, "step": 5674 }, { "epoch": 0.3404931901361973, "grad_norm": 1.295979380607605, "learning_rate": 5.374888385342202e-06, "loss": 0.4953, "step": 5675 }, { "epoch": 0.3405531889362213, "grad_norm": 1.3318955898284912, "learning_rate": 5.374314003916877e-06, "loss": 0.4028, "step": 5676 }, { "epoch": 0.34061318773624527, "grad_norm": 1.2347091436386108, "learning_rate": 5.373739551707252e-06, "loss": 0.3988, "step": 5677 }, { "epoch": 0.34067318653626927, "grad_norm": 1.252811312675476, "learning_rate": 5.373165028735021e-06, "loss": 0.4567, "step": 5678 }, { "epoch": 0.34073318533629326, "grad_norm": 1.3453782796859741, "learning_rate": 5.37259043502188e-06, "loss": 0.4565, "step": 5679 }, { "epoch": 0.34079318413631726, "grad_norm": 1.1449213027954102, "learning_rate": 5.3720157705895305e-06, "loss": 0.4308, "step": 5680 }, { "epoch": 0.34085318293634126, "grad_norm": 1.2217459678649902, "learning_rate": 5.3714410354596744e-06, "loss": 0.4166, "step": 5681 }, { "epoch": 0.34091318173636526, "grad_norm": 1.4318037033081055, "learning_rate": 5.370866229654016e-06, "loss": 0.4727, "step": 5682 }, { "epoch": 0.34097318053638925, "grad_norm": 1.3654826879501343, "learning_rate": 5.370291353194265e-06, "loss": 0.4833, "step": 5683 }, { "epoch": 0.34103317933641325, "grad_norm": 1.1779699325561523, "learning_rate": 5.3697164061021285e-06, "loss": 0.3889, "step": 5684 }, { "epoch": 0.34109317813643725, "grad_norm": 1.2150866985321045, "learning_rate": 5.369141388399323e-06, "loss": 0.4018, "step": 5685 }, { "epoch": 0.34115317693646124, "grad_norm": 1.103121042251587, "learning_rate": 5.368566300107565e-06, "loss": 0.4264, "step": 5686 }, { "epoch": 0.3412131757364853, "grad_norm": 1.2421547174453735, "learning_rate": 5.367991141248568e-06, "loss": 0.4363, "step": 5687 }, { "epoch": 0.3412731745365093, "grad_norm": 1.2935245037078857, "learning_rate": 5.367415911844058e-06, "loss": 0.4587, "step": 5688 }, { "epoch": 0.3413331733365333, "grad_norm": 1.1964203119277954, "learning_rate": 5.366840611915755e-06, "loss": 0.4389, "step": 5689 }, { "epoch": 0.3413931721365573, "grad_norm": 1.3317337036132812, "learning_rate": 5.366265241485389e-06, "loss": 0.485, "step": 5690 }, { "epoch": 0.3414531709365813, "grad_norm": 1.1524014472961426, "learning_rate": 5.365689800574687e-06, "loss": 0.3805, "step": 5691 }, { "epoch": 0.3415131697366053, "grad_norm": 1.2452466487884521, "learning_rate": 5.365114289205381e-06, "loss": 0.4006, "step": 5692 }, { "epoch": 0.3415731685366293, "grad_norm": 1.2616329193115234, "learning_rate": 5.364538707399207e-06, "loss": 0.4377, "step": 5693 }, { "epoch": 0.3416331673366533, "grad_norm": 1.3995033502578735, "learning_rate": 5.3639630551779e-06, "loss": 0.4314, "step": 5694 }, { "epoch": 0.3416931661366773, "grad_norm": 1.2441504001617432, "learning_rate": 5.3633873325632e-06, "loss": 0.4411, "step": 5695 }, { "epoch": 0.34175316493670127, "grad_norm": 1.2586299180984497, "learning_rate": 5.362811539576851e-06, "loss": 0.402, "step": 5696 }, { "epoch": 0.34181316373672527, "grad_norm": 1.4377447366714478, "learning_rate": 5.362235676240596e-06, "loss": 0.4607, "step": 5697 }, { "epoch": 0.34187316253674926, "grad_norm": 1.2984191179275513, "learning_rate": 5.361659742576185e-06, "loss": 0.467, "step": 5698 }, { "epoch": 0.34193316133677326, "grad_norm": 1.3268510103225708, "learning_rate": 5.361083738605366e-06, "loss": 0.3857, "step": 5699 }, { "epoch": 0.34199316013679726, "grad_norm": 1.3506265878677368, "learning_rate": 5.3605076643498945e-06, "loss": 0.4665, "step": 5700 }, { "epoch": 0.34205315893682126, "grad_norm": 1.3635143041610718, "learning_rate": 5.359931519831524e-06, "loss": 0.4956, "step": 5701 }, { "epoch": 0.34211315773684525, "grad_norm": 1.2656062841415405, "learning_rate": 5.359355305072015e-06, "loss": 0.4787, "step": 5702 }, { "epoch": 0.34217315653686925, "grad_norm": 1.11393141746521, "learning_rate": 5.358779020093125e-06, "loss": 0.4307, "step": 5703 }, { "epoch": 0.34223315533689325, "grad_norm": 1.297825813293457, "learning_rate": 5.358202664916623e-06, "loss": 0.419, "step": 5704 }, { "epoch": 0.34229315413691724, "grad_norm": 1.229970932006836, "learning_rate": 5.357626239564271e-06, "loss": 0.4708, "step": 5705 }, { "epoch": 0.34235315293694124, "grad_norm": 1.1224638223648071, "learning_rate": 5.3570497440578385e-06, "loss": 0.3961, "step": 5706 }, { "epoch": 0.34241315173696524, "grad_norm": 1.291792869567871, "learning_rate": 5.3564731784190985e-06, "loss": 0.4335, "step": 5707 }, { "epoch": 0.34247315053698923, "grad_norm": 1.1524139642715454, "learning_rate": 5.355896542669824e-06, "loss": 0.4054, "step": 5708 }, { "epoch": 0.3425331493370133, "grad_norm": 1.108212947845459, "learning_rate": 5.3553198368317946e-06, "loss": 0.4087, "step": 5709 }, { "epoch": 0.3425931481370373, "grad_norm": 1.2951806783676147, "learning_rate": 5.354743060926786e-06, "loss": 0.439, "step": 5710 }, { "epoch": 0.3426531469370613, "grad_norm": 1.3350763320922852, "learning_rate": 5.354166214976583e-06, "loss": 0.406, "step": 5711 }, { "epoch": 0.3427131457370853, "grad_norm": 1.256375789642334, "learning_rate": 5.353589299002969e-06, "loss": 0.3857, "step": 5712 }, { "epoch": 0.3427731445371093, "grad_norm": 1.301334261894226, "learning_rate": 5.353012313027732e-06, "loss": 0.3917, "step": 5713 }, { "epoch": 0.3428331433371333, "grad_norm": 1.2624574899673462, "learning_rate": 5.352435257072662e-06, "loss": 0.3638, "step": 5714 }, { "epoch": 0.34289314213715727, "grad_norm": 1.3402683734893799, "learning_rate": 5.3518581311595524e-06, "loss": 0.4633, "step": 5715 }, { "epoch": 0.34295314093718127, "grad_norm": 1.283453106880188, "learning_rate": 5.351280935310199e-06, "loss": 0.3976, "step": 5716 }, { "epoch": 0.34301313973720526, "grad_norm": 1.3754085302352905, "learning_rate": 5.350703669546398e-06, "loss": 0.4423, "step": 5717 }, { "epoch": 0.34307313853722926, "grad_norm": 1.1673072576522827, "learning_rate": 5.350126333889952e-06, "loss": 0.4238, "step": 5718 }, { "epoch": 0.34313313733725326, "grad_norm": 1.099015235900879, "learning_rate": 5.349548928362662e-06, "loss": 0.4214, "step": 5719 }, { "epoch": 0.34319313613727725, "grad_norm": 1.3218311071395874, "learning_rate": 5.348971452986337e-06, "loss": 0.4332, "step": 5720 }, { "epoch": 0.34325313493730125, "grad_norm": 1.243375539779663, "learning_rate": 5.348393907782784e-06, "loss": 0.4282, "step": 5721 }, { "epoch": 0.34331313373732525, "grad_norm": 1.3418352603912354, "learning_rate": 5.347816292773814e-06, "loss": 0.4592, "step": 5722 }, { "epoch": 0.34337313253734925, "grad_norm": 1.2431769371032715, "learning_rate": 5.347238607981242e-06, "loss": 0.4593, "step": 5723 }, { "epoch": 0.34343313133737324, "grad_norm": 1.3242608308792114, "learning_rate": 5.346660853426883e-06, "loss": 0.4599, "step": 5724 }, { "epoch": 0.34349313013739724, "grad_norm": 1.18746817111969, "learning_rate": 5.346083029132557e-06, "loss": 0.4169, "step": 5725 }, { "epoch": 0.34355312893742124, "grad_norm": 1.258212924003601, "learning_rate": 5.345505135120085e-06, "loss": 0.4604, "step": 5726 }, { "epoch": 0.34361312773744523, "grad_norm": 1.3327356576919556, "learning_rate": 5.344927171411294e-06, "loss": 0.4579, "step": 5727 }, { "epoch": 0.34367312653746923, "grad_norm": 1.3008254766464233, "learning_rate": 5.344349138028007e-06, "loss": 0.4161, "step": 5728 }, { "epoch": 0.34373312533749323, "grad_norm": 1.4261466264724731, "learning_rate": 5.343771034992058e-06, "loss": 0.4757, "step": 5729 }, { "epoch": 0.3437931241375172, "grad_norm": 1.2039926052093506, "learning_rate": 5.3431928623252756e-06, "loss": 0.3906, "step": 5730 }, { "epoch": 0.3438531229375412, "grad_norm": 1.2839018106460571, "learning_rate": 5.342614620049498e-06, "loss": 0.4777, "step": 5731 }, { "epoch": 0.3439131217375653, "grad_norm": 1.0538768768310547, "learning_rate": 5.34203630818656e-06, "loss": 0.4184, "step": 5732 }, { "epoch": 0.34397312053758927, "grad_norm": 1.1808375120162964, "learning_rate": 5.3414579267583025e-06, "loss": 0.4163, "step": 5733 }, { "epoch": 0.34403311933761327, "grad_norm": 1.3045530319213867, "learning_rate": 5.340879475786569e-06, "loss": 0.4251, "step": 5734 }, { "epoch": 0.34409311813763727, "grad_norm": 1.1467729806900024, "learning_rate": 5.340300955293205e-06, "loss": 0.3837, "step": 5735 }, { "epoch": 0.34415311693766126, "grad_norm": 1.2245292663574219, "learning_rate": 5.3397223653000585e-06, "loss": 0.4847, "step": 5736 }, { "epoch": 0.34421311573768526, "grad_norm": 1.3156629800796509, "learning_rate": 5.33914370582898e-06, "loss": 0.4707, "step": 5737 }, { "epoch": 0.34427311453770926, "grad_norm": 1.2842602729797363, "learning_rate": 5.338564976901823e-06, "loss": 0.3935, "step": 5738 }, { "epoch": 0.34433311333773325, "grad_norm": 1.1497917175292969, "learning_rate": 5.337986178540443e-06, "loss": 0.4416, "step": 5739 }, { "epoch": 0.34439311213775725, "grad_norm": 1.3276780843734741, "learning_rate": 5.337407310766699e-06, "loss": 0.4144, "step": 5740 }, { "epoch": 0.34445311093778125, "grad_norm": 1.411600947380066, "learning_rate": 5.336828373602452e-06, "loss": 0.538, "step": 5741 }, { "epoch": 0.34451310973780525, "grad_norm": 1.2855865955352783, "learning_rate": 5.336249367069566e-06, "loss": 0.4447, "step": 5742 }, { "epoch": 0.34457310853782924, "grad_norm": 1.2382152080535889, "learning_rate": 5.335670291189906e-06, "loss": 0.4391, "step": 5743 }, { "epoch": 0.34463310733785324, "grad_norm": 1.1342737674713135, "learning_rate": 5.3350911459853445e-06, "loss": 0.4135, "step": 5744 }, { "epoch": 0.34469310613787724, "grad_norm": 1.1778265237808228, "learning_rate": 5.33451193147775e-06, "loss": 0.4064, "step": 5745 }, { "epoch": 0.34475310493790123, "grad_norm": 1.169047236442566, "learning_rate": 5.333932647688999e-06, "loss": 0.443, "step": 5746 }, { "epoch": 0.34481310373792523, "grad_norm": 1.2533608675003052, "learning_rate": 5.333353294640966e-06, "loss": 0.4289, "step": 5747 }, { "epoch": 0.3448731025379492, "grad_norm": 1.2034518718719482, "learning_rate": 5.332773872355532e-06, "loss": 0.4242, "step": 5748 }, { "epoch": 0.3449331013379732, "grad_norm": 1.1770341396331787, "learning_rate": 5.332194380854579e-06, "loss": 0.4405, "step": 5749 }, { "epoch": 0.3449931001379972, "grad_norm": 1.2534393072128296, "learning_rate": 5.331614820159992e-06, "loss": 0.4324, "step": 5750 }, { "epoch": 0.3450530989380212, "grad_norm": 1.2809762954711914, "learning_rate": 5.331035190293657e-06, "loss": 0.4273, "step": 5751 }, { "epoch": 0.3451130977380452, "grad_norm": 1.2961018085479736, "learning_rate": 5.3304554912774664e-06, "loss": 0.4154, "step": 5752 }, { "epoch": 0.3451730965380692, "grad_norm": 1.2608789205551147, "learning_rate": 5.329875723133311e-06, "loss": 0.4618, "step": 5753 }, { "epoch": 0.34523309533809327, "grad_norm": 1.1818859577178955, "learning_rate": 5.329295885883086e-06, "loss": 0.3979, "step": 5754 }, { "epoch": 0.34529309413811726, "grad_norm": 1.1445918083190918, "learning_rate": 5.32871597954869e-06, "loss": 0.4722, "step": 5755 }, { "epoch": 0.34535309293814126, "grad_norm": 1.284740686416626, "learning_rate": 5.328136004152023e-06, "loss": 0.4136, "step": 5756 }, { "epoch": 0.34541309173816526, "grad_norm": 1.2505773305892944, "learning_rate": 5.3275559597149895e-06, "loss": 0.4498, "step": 5757 }, { "epoch": 0.34547309053818925, "grad_norm": 1.3618052005767822, "learning_rate": 5.326975846259492e-06, "loss": 0.4536, "step": 5758 }, { "epoch": 0.34553308933821325, "grad_norm": 1.2070075273513794, "learning_rate": 5.326395663807441e-06, "loss": 0.4255, "step": 5759 }, { "epoch": 0.34559308813823725, "grad_norm": 1.1818794012069702, "learning_rate": 5.325815412380745e-06, "loss": 0.452, "step": 5760 }, { "epoch": 0.34565308693826124, "grad_norm": 1.2913098335266113, "learning_rate": 5.325235092001322e-06, "loss": 0.426, "step": 5761 }, { "epoch": 0.34571308573828524, "grad_norm": 1.1349297761917114, "learning_rate": 5.324654702691083e-06, "loss": 0.3936, "step": 5762 }, { "epoch": 0.34577308453830924, "grad_norm": 1.317868709564209, "learning_rate": 5.324074244471951e-06, "loss": 0.4622, "step": 5763 }, { "epoch": 0.34583308333833324, "grad_norm": 1.3200316429138184, "learning_rate": 5.3234937173658445e-06, "loss": 0.4588, "step": 5764 }, { "epoch": 0.34589308213835723, "grad_norm": 1.117190957069397, "learning_rate": 5.3229131213946885e-06, "loss": 0.3767, "step": 5765 }, { "epoch": 0.34595308093838123, "grad_norm": 1.3375139236450195, "learning_rate": 5.322332456580408e-06, "loss": 0.4601, "step": 5766 }, { "epoch": 0.3460130797384052, "grad_norm": 1.3578859567642212, "learning_rate": 5.321751722944934e-06, "loss": 0.4547, "step": 5767 }, { "epoch": 0.3460730785384292, "grad_norm": 1.3455520868301392, "learning_rate": 5.321170920510197e-06, "loss": 0.4373, "step": 5768 }, { "epoch": 0.3461330773384532, "grad_norm": 1.2373080253601074, "learning_rate": 5.320590049298132e-06, "loss": 0.3708, "step": 5769 }, { "epoch": 0.3461930761384772, "grad_norm": 1.3141683340072632, "learning_rate": 5.3200091093306755e-06, "loss": 0.4172, "step": 5770 }, { "epoch": 0.3462530749385012, "grad_norm": 1.2680495977401733, "learning_rate": 5.319428100629767e-06, "loss": 0.4241, "step": 5771 }, { "epoch": 0.3463130737385252, "grad_norm": 1.3470375537872314, "learning_rate": 5.318847023217346e-06, "loss": 0.4188, "step": 5772 }, { "epoch": 0.3463730725385492, "grad_norm": 1.2722126245498657, "learning_rate": 5.318265877115362e-06, "loss": 0.3636, "step": 5773 }, { "epoch": 0.3464330713385732, "grad_norm": 1.3621575832366943, "learning_rate": 5.317684662345759e-06, "loss": 0.4604, "step": 5774 }, { "epoch": 0.3464930701385972, "grad_norm": 1.2910287380218506, "learning_rate": 5.317103378930488e-06, "loss": 0.4311, "step": 5775 }, { "epoch": 0.3465530689386212, "grad_norm": 1.189103603363037, "learning_rate": 5.316522026891501e-06, "loss": 0.4236, "step": 5776 }, { "epoch": 0.34661306773864525, "grad_norm": 1.3530220985412598, "learning_rate": 5.315940606250754e-06, "loss": 0.4532, "step": 5777 }, { "epoch": 0.34667306653866925, "grad_norm": 1.2930891513824463, "learning_rate": 5.315359117030203e-06, "loss": 0.4667, "step": 5778 }, { "epoch": 0.34673306533869325, "grad_norm": 1.325188159942627, "learning_rate": 5.314777559251808e-06, "loss": 0.421, "step": 5779 }, { "epoch": 0.34679306413871724, "grad_norm": 1.2082961797714233, "learning_rate": 5.314195932937534e-06, "loss": 0.4078, "step": 5780 }, { "epoch": 0.34685306293874124, "grad_norm": 1.3953883647918701, "learning_rate": 5.313614238109345e-06, "loss": 0.447, "step": 5781 }, { "epoch": 0.34691306173876524, "grad_norm": 1.227146863937378, "learning_rate": 5.313032474789208e-06, "loss": 0.4168, "step": 5782 }, { "epoch": 0.34697306053878924, "grad_norm": 1.3300083875656128, "learning_rate": 5.3124506429990955e-06, "loss": 0.4475, "step": 5783 }, { "epoch": 0.34703305933881323, "grad_norm": 1.218001365661621, "learning_rate": 5.311868742760979e-06, "loss": 0.4352, "step": 5784 }, { "epoch": 0.34709305813883723, "grad_norm": 1.2725496292114258, "learning_rate": 5.3112867740968345e-06, "loss": 0.4339, "step": 5785 }, { "epoch": 0.3471530569388612, "grad_norm": 1.2038291692733765, "learning_rate": 5.310704737028642e-06, "loss": 0.4485, "step": 5786 }, { "epoch": 0.3472130557388852, "grad_norm": 1.1714742183685303, "learning_rate": 5.31012263157838e-06, "loss": 0.4439, "step": 5787 }, { "epoch": 0.3472730545389092, "grad_norm": 1.2998789548873901, "learning_rate": 5.309540457768035e-06, "loss": 0.4231, "step": 5788 }, { "epoch": 0.3473330533389332, "grad_norm": 1.1583672761917114, "learning_rate": 5.308958215619589e-06, "loss": 0.4069, "step": 5789 }, { "epoch": 0.3473930521389572, "grad_norm": 1.387264609336853, "learning_rate": 5.308375905155034e-06, "loss": 0.4391, "step": 5790 }, { "epoch": 0.3474530509389812, "grad_norm": 1.279091238975525, "learning_rate": 5.30779352639636e-06, "loss": 0.4887, "step": 5791 }, { "epoch": 0.3475130497390052, "grad_norm": 1.309638261795044, "learning_rate": 5.307211079365561e-06, "loss": 0.4331, "step": 5792 }, { "epoch": 0.3475730485390292, "grad_norm": 1.2632734775543213, "learning_rate": 5.306628564084631e-06, "loss": 0.4108, "step": 5793 }, { "epoch": 0.3476330473390532, "grad_norm": 1.3052855730056763, "learning_rate": 5.306045980575574e-06, "loss": 0.3905, "step": 5794 }, { "epoch": 0.3476930461390772, "grad_norm": 1.2167227268218994, "learning_rate": 5.3054633288603875e-06, "loss": 0.4545, "step": 5795 }, { "epoch": 0.3477530449391012, "grad_norm": 1.2353397607803345, "learning_rate": 5.304880608961076e-06, "loss": 0.4365, "step": 5796 }, { "epoch": 0.3478130437391252, "grad_norm": 1.2949578762054443, "learning_rate": 5.3042978208996476e-06, "loss": 0.4319, "step": 5797 }, { "epoch": 0.3478730425391492, "grad_norm": 2.7592434883117676, "learning_rate": 5.303714964698111e-06, "loss": 0.3646, "step": 5798 }, { "epoch": 0.34793304133917324, "grad_norm": 1.348233938217163, "learning_rate": 5.3031320403784775e-06, "loss": 0.411, "step": 5799 }, { "epoch": 0.34799304013919724, "grad_norm": 1.1856921911239624, "learning_rate": 5.302549047962762e-06, "loss": 0.4178, "step": 5800 }, { "epoch": 0.34805303893922124, "grad_norm": 1.215550184249878, "learning_rate": 5.301965987472982e-06, "loss": 0.4337, "step": 5801 }, { "epoch": 0.34811303773924523, "grad_norm": 1.1545734405517578, "learning_rate": 5.301382858931155e-06, "loss": 0.4076, "step": 5802 }, { "epoch": 0.34817303653926923, "grad_norm": 1.424685001373291, "learning_rate": 5.300799662359307e-06, "loss": 0.4642, "step": 5803 }, { "epoch": 0.34823303533929323, "grad_norm": 1.14419424533844, "learning_rate": 5.300216397779457e-06, "loss": 0.4287, "step": 5804 }, { "epoch": 0.3482930341393172, "grad_norm": 1.217665433883667, "learning_rate": 5.299633065213638e-06, "loss": 0.4149, "step": 5805 }, { "epoch": 0.3483530329393412, "grad_norm": 1.2875887155532837, "learning_rate": 5.299049664683877e-06, "loss": 0.3829, "step": 5806 }, { "epoch": 0.3484130317393652, "grad_norm": 1.312764286994934, "learning_rate": 5.298466196212205e-06, "loss": 0.4663, "step": 5807 }, { "epoch": 0.3484730305393892, "grad_norm": 1.3361120223999023, "learning_rate": 5.297882659820659e-06, "loss": 0.4455, "step": 5808 }, { "epoch": 0.3485330293394132, "grad_norm": 1.3472951650619507, "learning_rate": 5.297299055531276e-06, "loss": 0.4008, "step": 5809 }, { "epoch": 0.3485930281394372, "grad_norm": 1.267128348350525, "learning_rate": 5.296715383366097e-06, "loss": 0.4525, "step": 5810 }, { "epoch": 0.3486530269394612, "grad_norm": 1.2703545093536377, "learning_rate": 5.296131643347164e-06, "loss": 0.4353, "step": 5811 }, { "epoch": 0.3487130257394852, "grad_norm": 1.362143635749817, "learning_rate": 5.295547835496521e-06, "loss": 0.4093, "step": 5812 }, { "epoch": 0.3487730245395092, "grad_norm": 1.305224895477295, "learning_rate": 5.294963959836217e-06, "loss": 0.4279, "step": 5813 }, { "epoch": 0.3488330233395332, "grad_norm": 1.5224618911743164, "learning_rate": 5.294380016388301e-06, "loss": 0.4836, "step": 5814 }, { "epoch": 0.3488930221395572, "grad_norm": 1.3620352745056152, "learning_rate": 5.293796005174829e-06, "loss": 0.4073, "step": 5815 }, { "epoch": 0.3489530209395812, "grad_norm": 1.3694298267364502, "learning_rate": 5.293211926217853e-06, "loss": 0.4675, "step": 5816 }, { "epoch": 0.3490130197396052, "grad_norm": 1.2874579429626465, "learning_rate": 5.292627779539434e-06, "loss": 0.4665, "step": 5817 }, { "epoch": 0.3490730185396292, "grad_norm": 1.3543545007705688, "learning_rate": 5.29204356516163e-06, "loss": 0.4684, "step": 5818 }, { "epoch": 0.3491330173396532, "grad_norm": 1.2607618570327759, "learning_rate": 5.291459283106505e-06, "loss": 0.4771, "step": 5819 }, { "epoch": 0.3491930161396772, "grad_norm": 1.335923433303833, "learning_rate": 5.290874933396124e-06, "loss": 0.4123, "step": 5820 }, { "epoch": 0.3492530149397012, "grad_norm": 1.3663259744644165, "learning_rate": 5.290290516052557e-06, "loss": 0.4378, "step": 5821 }, { "epoch": 0.34931301373972523, "grad_norm": 1.4536947011947632, "learning_rate": 5.289706031097874e-06, "loss": 0.4598, "step": 5822 }, { "epoch": 0.34937301253974923, "grad_norm": 1.323510766029358, "learning_rate": 5.289121478554148e-06, "loss": 0.444, "step": 5823 }, { "epoch": 0.3494330113397732, "grad_norm": 1.2497786283493042, "learning_rate": 5.288536858443453e-06, "loss": 0.4119, "step": 5824 }, { "epoch": 0.3494930101397972, "grad_norm": 1.24295973777771, "learning_rate": 5.287952170787872e-06, "loss": 0.4374, "step": 5825 }, { "epoch": 0.3495530089398212, "grad_norm": 1.2072107791900635, "learning_rate": 5.287367415609482e-06, "loss": 0.4258, "step": 5826 }, { "epoch": 0.3496130077398452, "grad_norm": 1.3316868543624878, "learning_rate": 5.286782592930368e-06, "loss": 0.4337, "step": 5827 }, { "epoch": 0.3496730065398692, "grad_norm": 1.2264277935028076, "learning_rate": 5.286197702772617e-06, "loss": 0.4684, "step": 5828 }, { "epoch": 0.3497330053398932, "grad_norm": 1.1571847200393677, "learning_rate": 5.285612745158315e-06, "loss": 0.3825, "step": 5829 }, { "epoch": 0.3497930041399172, "grad_norm": 1.2406904697418213, "learning_rate": 5.285027720109556e-06, "loss": 0.4128, "step": 5830 }, { "epoch": 0.3498530029399412, "grad_norm": 1.2953650951385498, "learning_rate": 5.284442627648432e-06, "loss": 0.4315, "step": 5831 }, { "epoch": 0.3499130017399652, "grad_norm": 1.3208352327346802, "learning_rate": 5.2838574677970405e-06, "loss": 0.4268, "step": 5832 }, { "epoch": 0.3499730005399892, "grad_norm": 1.2488248348236084, "learning_rate": 5.283272240577479e-06, "loss": 0.4274, "step": 5833 }, { "epoch": 0.3500329993400132, "grad_norm": 1.2747642993927002, "learning_rate": 5.282686946011851e-06, "loss": 0.498, "step": 5834 }, { "epoch": 0.3500929981400372, "grad_norm": 1.4893615245819092, "learning_rate": 5.282101584122258e-06, "loss": 0.4205, "step": 5835 }, { "epoch": 0.3501529969400612, "grad_norm": 1.339543342590332, "learning_rate": 5.281516154930807e-06, "loss": 0.5253, "step": 5836 }, { "epoch": 0.3502129957400852, "grad_norm": 1.3278776407241821, "learning_rate": 5.280930658459607e-06, "loss": 0.4294, "step": 5837 }, { "epoch": 0.3502729945401092, "grad_norm": 1.097120761871338, "learning_rate": 5.280345094730771e-06, "loss": 0.4634, "step": 5838 }, { "epoch": 0.3503329933401332, "grad_norm": 1.194119930267334, "learning_rate": 5.279759463766411e-06, "loss": 0.4, "step": 5839 }, { "epoch": 0.3503929921401572, "grad_norm": 1.2735110521316528, "learning_rate": 5.279173765588645e-06, "loss": 0.4843, "step": 5840 }, { "epoch": 0.3504529909401812, "grad_norm": 1.1745738983154297, "learning_rate": 5.278588000219593e-06, "loss": 0.3951, "step": 5841 }, { "epoch": 0.35051298974020517, "grad_norm": 1.2886546850204468, "learning_rate": 5.2780021676813745e-06, "loss": 0.4695, "step": 5842 }, { "epoch": 0.35057298854022917, "grad_norm": 1.2628988027572632, "learning_rate": 5.277416267996114e-06, "loss": 0.4426, "step": 5843 }, { "epoch": 0.3506329873402532, "grad_norm": 1.3274732828140259, "learning_rate": 5.2768303011859396e-06, "loss": 0.4512, "step": 5844 }, { "epoch": 0.3506929861402772, "grad_norm": 1.1939321756362915, "learning_rate": 5.27624426727298e-06, "loss": 0.4148, "step": 5845 }, { "epoch": 0.3507529849403012, "grad_norm": 1.332513689994812, "learning_rate": 5.275658166279367e-06, "loss": 0.4537, "step": 5846 }, { "epoch": 0.3508129837403252, "grad_norm": 1.283854365348816, "learning_rate": 5.275071998227234e-06, "loss": 0.4345, "step": 5847 }, { "epoch": 0.3508729825403492, "grad_norm": 1.2996904850006104, "learning_rate": 5.2744857631387205e-06, "loss": 0.4518, "step": 5848 }, { "epoch": 0.3509329813403732, "grad_norm": 1.2147215604782104, "learning_rate": 5.273899461035963e-06, "loss": 0.498, "step": 5849 }, { "epoch": 0.3509929801403972, "grad_norm": 1.3903696537017822, "learning_rate": 5.273313091941105e-06, "loss": 0.5043, "step": 5850 }, { "epoch": 0.3510529789404212, "grad_norm": 1.4165046215057373, "learning_rate": 5.272726655876292e-06, "loss": 0.4661, "step": 5851 }, { "epoch": 0.3511129777404452, "grad_norm": 1.2780463695526123, "learning_rate": 5.2721401528636685e-06, "loss": 0.4092, "step": 5852 }, { "epoch": 0.3511729765404692, "grad_norm": 1.2817084789276123, "learning_rate": 5.271553582925385e-06, "loss": 0.4213, "step": 5853 }, { "epoch": 0.3512329753404932, "grad_norm": 1.1275479793548584, "learning_rate": 5.270966946083594e-06, "loss": 0.4366, "step": 5854 }, { "epoch": 0.3512929741405172, "grad_norm": 1.2014402151107788, "learning_rate": 5.27038024236045e-06, "loss": 0.4117, "step": 5855 }, { "epoch": 0.3513529729405412, "grad_norm": 1.1770217418670654, "learning_rate": 5.269793471778111e-06, "loss": 0.479, "step": 5856 }, { "epoch": 0.3514129717405652, "grad_norm": 1.3400555849075317, "learning_rate": 5.269206634358735e-06, "loss": 0.4247, "step": 5857 }, { "epoch": 0.3514729705405892, "grad_norm": 1.2071518898010254, "learning_rate": 5.268619730124484e-06, "loss": 0.4528, "step": 5858 }, { "epoch": 0.3515329693406132, "grad_norm": 1.1600605249404907, "learning_rate": 5.268032759097527e-06, "loss": 0.4309, "step": 5859 }, { "epoch": 0.3515929681406372, "grad_norm": 1.3258235454559326, "learning_rate": 5.267445721300024e-06, "loss": 0.3895, "step": 5860 }, { "epoch": 0.35165296694066117, "grad_norm": 1.2530455589294434, "learning_rate": 5.266858616754149e-06, "loss": 0.3944, "step": 5861 }, { "epoch": 0.35171296574068517, "grad_norm": 1.0983526706695557, "learning_rate": 5.266271445482075e-06, "loss": 0.4009, "step": 5862 }, { "epoch": 0.35177296454070917, "grad_norm": 1.2416402101516724, "learning_rate": 5.265684207505976e-06, "loss": 0.4017, "step": 5863 }, { "epoch": 0.35183296334073316, "grad_norm": 1.3271890878677368, "learning_rate": 5.265096902848029e-06, "loss": 0.4737, "step": 5864 }, { "epoch": 0.35189296214075716, "grad_norm": 1.3572925329208374, "learning_rate": 5.264509531530413e-06, "loss": 0.4812, "step": 5865 }, { "epoch": 0.3519529609407812, "grad_norm": 1.256853699684143, "learning_rate": 5.263922093575309e-06, "loss": 0.4597, "step": 5866 }, { "epoch": 0.3520129597408052, "grad_norm": 1.1640959978103638, "learning_rate": 5.263334589004908e-06, "loss": 0.4127, "step": 5867 }, { "epoch": 0.3520729585408292, "grad_norm": 1.290021538734436, "learning_rate": 5.26274701784139e-06, "loss": 0.4837, "step": 5868 }, { "epoch": 0.3521329573408532, "grad_norm": 1.199452519416809, "learning_rate": 5.2621593801069504e-06, "loss": 0.3947, "step": 5869 }, { "epoch": 0.3521929561408772, "grad_norm": 1.2110884189605713, "learning_rate": 5.261571675823778e-06, "loss": 0.4134, "step": 5870 }, { "epoch": 0.3522529549409012, "grad_norm": 1.1996229887008667, "learning_rate": 5.26098390501407e-06, "loss": 0.395, "step": 5871 }, { "epoch": 0.3523129537409252, "grad_norm": 1.2701973915100098, "learning_rate": 5.260396067700023e-06, "loss": 0.4495, "step": 5872 }, { "epoch": 0.3523729525409492, "grad_norm": 1.1577528715133667, "learning_rate": 5.259808163903835e-06, "loss": 0.4378, "step": 5873 }, { "epoch": 0.3524329513409732, "grad_norm": 1.1763767004013062, "learning_rate": 5.259220193647714e-06, "loss": 0.4269, "step": 5874 }, { "epoch": 0.3524929501409972, "grad_norm": 1.3202191591262817, "learning_rate": 5.2586321569538575e-06, "loss": 0.441, "step": 5875 }, { "epoch": 0.3525529489410212, "grad_norm": 1.214985728263855, "learning_rate": 5.258044053844478e-06, "loss": 0.3965, "step": 5876 }, { "epoch": 0.3526129477410452, "grad_norm": 1.3209364414215088, "learning_rate": 5.257455884341785e-06, "loss": 0.4147, "step": 5877 }, { "epoch": 0.3526729465410692, "grad_norm": 1.455343246459961, "learning_rate": 5.25686764846799e-06, "loss": 0.4377, "step": 5878 }, { "epoch": 0.3527329453410932, "grad_norm": 1.3303061723709106, "learning_rate": 5.256279346245308e-06, "loss": 0.4631, "step": 5879 }, { "epoch": 0.35279294414111717, "grad_norm": 1.2186322212219238, "learning_rate": 5.255690977695956e-06, "loss": 0.392, "step": 5880 }, { "epoch": 0.35285294294114117, "grad_norm": 1.279320240020752, "learning_rate": 5.255102542842156e-06, "loss": 0.4789, "step": 5881 }, { "epoch": 0.35291294174116516, "grad_norm": 1.3856605291366577, "learning_rate": 5.254514041706128e-06, "loss": 0.4684, "step": 5882 }, { "epoch": 0.35297294054118916, "grad_norm": 1.2843109369277954, "learning_rate": 5.2539254743100976e-06, "loss": 0.4334, "step": 5883 }, { "epoch": 0.35303293934121316, "grad_norm": 1.3757120370864868, "learning_rate": 5.253336840676295e-06, "loss": 0.4099, "step": 5884 }, { "epoch": 0.35309293814123716, "grad_norm": 1.3325610160827637, "learning_rate": 5.252748140826946e-06, "loss": 0.4567, "step": 5885 }, { "epoch": 0.35315293694126115, "grad_norm": 1.2734795808792114, "learning_rate": 5.252159374784286e-06, "loss": 0.4255, "step": 5886 }, { "epoch": 0.35321293574128515, "grad_norm": 1.2775605916976929, "learning_rate": 5.25157054257055e-06, "loss": 0.4188, "step": 5887 }, { "epoch": 0.35327293454130915, "grad_norm": 1.3687058687210083, "learning_rate": 5.250981644207975e-06, "loss": 0.4605, "step": 5888 }, { "epoch": 0.3533329333413332, "grad_norm": 1.1707919836044312, "learning_rate": 5.250392679718798e-06, "loss": 0.3857, "step": 5889 }, { "epoch": 0.3533929321413572, "grad_norm": 1.1768666505813599, "learning_rate": 5.249803649125267e-06, "loss": 0.4116, "step": 5890 }, { "epoch": 0.3534529309413812, "grad_norm": 1.1846628189086914, "learning_rate": 5.249214552449624e-06, "loss": 0.4291, "step": 5891 }, { "epoch": 0.3535129297414052, "grad_norm": 1.1674292087554932, "learning_rate": 5.248625389714116e-06, "loss": 0.4578, "step": 5892 }, { "epoch": 0.3535729285414292, "grad_norm": 1.2077884674072266, "learning_rate": 5.248036160940994e-06, "loss": 0.3932, "step": 5893 }, { "epoch": 0.3536329273414532, "grad_norm": 1.2949576377868652, "learning_rate": 5.24744686615251e-06, "loss": 0.4459, "step": 5894 }, { "epoch": 0.3536929261414772, "grad_norm": 1.2869797945022583, "learning_rate": 5.24685750537092e-06, "loss": 0.4396, "step": 5895 }, { "epoch": 0.3537529249415012, "grad_norm": 1.2127834558486938, "learning_rate": 5.246268078618479e-06, "loss": 0.4682, "step": 5896 }, { "epoch": 0.3538129237415252, "grad_norm": 1.237308382987976, "learning_rate": 5.24567858591745e-06, "loss": 0.4303, "step": 5897 }, { "epoch": 0.3538729225415492, "grad_norm": 1.3645821809768677, "learning_rate": 5.2450890272900945e-06, "loss": 0.3696, "step": 5898 }, { "epoch": 0.35393292134157317, "grad_norm": 1.2035826444625854, "learning_rate": 5.244499402758676e-06, "loss": 0.4348, "step": 5899 }, { "epoch": 0.35399292014159717, "grad_norm": 1.2545182704925537, "learning_rate": 5.243909712345464e-06, "loss": 0.4474, "step": 5900 }, { "epoch": 0.35405291894162116, "grad_norm": 1.22239351272583, "learning_rate": 5.2433199560727265e-06, "loss": 0.4493, "step": 5901 }, { "epoch": 0.35411291774164516, "grad_norm": 1.252075433731079, "learning_rate": 5.242730133962737e-06, "loss": 0.4434, "step": 5902 }, { "epoch": 0.35417291654166916, "grad_norm": 1.3653095960617065, "learning_rate": 5.242140246037771e-06, "loss": 0.4299, "step": 5903 }, { "epoch": 0.35423291534169316, "grad_norm": 1.2905700206756592, "learning_rate": 5.2415502923201035e-06, "loss": 0.4433, "step": 5904 }, { "epoch": 0.35429291414171715, "grad_norm": 1.2099040746688843, "learning_rate": 5.240960272832018e-06, "loss": 0.4066, "step": 5905 }, { "epoch": 0.35435291294174115, "grad_norm": 1.1441923379898071, "learning_rate": 5.240370187595792e-06, "loss": 0.3807, "step": 5906 }, { "epoch": 0.35441291174176515, "grad_norm": 1.248112678527832, "learning_rate": 5.2397800366337155e-06, "loss": 0.4684, "step": 5907 }, { "epoch": 0.35447291054178914, "grad_norm": 1.2195613384246826, "learning_rate": 5.239189819968072e-06, "loss": 0.437, "step": 5908 }, { "epoch": 0.35453290934181314, "grad_norm": 1.3315906524658203, "learning_rate": 5.238599537621154e-06, "loss": 0.449, "step": 5909 }, { "epoch": 0.35459290814183714, "grad_norm": 1.2699590921401978, "learning_rate": 5.2380091896152515e-06, "loss": 0.39, "step": 5910 }, { "epoch": 0.3546529069418612, "grad_norm": 1.2579269409179688, "learning_rate": 5.2374187759726615e-06, "loss": 0.455, "step": 5911 }, { "epoch": 0.3547129057418852, "grad_norm": 1.213379979133606, "learning_rate": 5.236828296715678e-06, "loss": 0.4374, "step": 5912 }, { "epoch": 0.3547729045419092, "grad_norm": 1.4411630630493164, "learning_rate": 5.236237751866604e-06, "loss": 0.4219, "step": 5913 }, { "epoch": 0.3548329033419332, "grad_norm": 1.437264084815979, "learning_rate": 5.23564714144774e-06, "loss": 0.4544, "step": 5914 }, { "epoch": 0.3548929021419572, "grad_norm": 1.2903411388397217, "learning_rate": 5.235056465481392e-06, "loss": 0.3714, "step": 5915 }, { "epoch": 0.3549529009419812, "grad_norm": 1.1367669105529785, "learning_rate": 5.2344657239898655e-06, "loss": 0.4364, "step": 5916 }, { "epoch": 0.3550128997420052, "grad_norm": 1.1861562728881836, "learning_rate": 5.233874916995471e-06, "loss": 0.4004, "step": 5917 }, { "epoch": 0.35507289854202917, "grad_norm": 1.3226606845855713, "learning_rate": 5.23328404452052e-06, "loss": 0.4014, "step": 5918 }, { "epoch": 0.35513289734205317, "grad_norm": 1.3044155836105347, "learning_rate": 5.232693106587328e-06, "loss": 0.3841, "step": 5919 }, { "epoch": 0.35519289614207716, "grad_norm": 1.355491042137146, "learning_rate": 5.23210210321821e-06, "loss": 0.4498, "step": 5920 }, { "epoch": 0.35525289494210116, "grad_norm": 1.2328031063079834, "learning_rate": 5.231511034435488e-06, "loss": 0.4504, "step": 5921 }, { "epoch": 0.35531289374212516, "grad_norm": 1.2796534299850464, "learning_rate": 5.230919900261484e-06, "loss": 0.4609, "step": 5922 }, { "epoch": 0.35537289254214915, "grad_norm": 1.3351905345916748, "learning_rate": 5.23032870071852e-06, "loss": 0.3765, "step": 5923 }, { "epoch": 0.35543289134217315, "grad_norm": 1.3530453443527222, "learning_rate": 5.229737435828925e-06, "loss": 0.4516, "step": 5924 }, { "epoch": 0.35549289014219715, "grad_norm": 1.3252980709075928, "learning_rate": 5.2291461056150276e-06, "loss": 0.4282, "step": 5925 }, { "epoch": 0.35555288894222115, "grad_norm": 1.1876453161239624, "learning_rate": 5.22855471009916e-06, "loss": 0.4171, "step": 5926 }, { "epoch": 0.35561288774224514, "grad_norm": 1.147852897644043, "learning_rate": 5.227963249303656e-06, "loss": 0.4174, "step": 5927 }, { "epoch": 0.35567288654226914, "grad_norm": 1.3557863235473633, "learning_rate": 5.227371723250853e-06, "loss": 0.4749, "step": 5928 }, { "epoch": 0.35573288534229314, "grad_norm": 1.146191954612732, "learning_rate": 5.226780131963089e-06, "loss": 0.4301, "step": 5929 }, { "epoch": 0.35579288414231713, "grad_norm": 1.3303107023239136, "learning_rate": 5.226188475462707e-06, "loss": 0.4471, "step": 5930 }, { "epoch": 0.35585288294234113, "grad_norm": 1.1675034761428833, "learning_rate": 5.225596753772051e-06, "loss": 0.4125, "step": 5931 }, { "epoch": 0.35591288174236513, "grad_norm": 1.2095248699188232, "learning_rate": 5.225004966913468e-06, "loss": 0.4456, "step": 5932 }, { "epoch": 0.3559728805423891, "grad_norm": 1.1677275896072388, "learning_rate": 5.224413114909305e-06, "loss": 0.4559, "step": 5933 }, { "epoch": 0.3560328793424132, "grad_norm": 1.282449722290039, "learning_rate": 5.223821197781917e-06, "loss": 0.4456, "step": 5934 }, { "epoch": 0.3560928781424372, "grad_norm": 1.187554121017456, "learning_rate": 5.223229215553655e-06, "loss": 0.416, "step": 5935 }, { "epoch": 0.35615287694246117, "grad_norm": 1.240474820137024, "learning_rate": 5.222637168246876e-06, "loss": 0.3881, "step": 5936 }, { "epoch": 0.35621287574248517, "grad_norm": 1.4045175313949585, "learning_rate": 5.22204505588394e-06, "loss": 0.4359, "step": 5937 }, { "epoch": 0.35627287454250917, "grad_norm": 1.288279414176941, "learning_rate": 5.2214528784872085e-06, "loss": 0.4572, "step": 5938 }, { "epoch": 0.35633287334253316, "grad_norm": 1.2272534370422363, "learning_rate": 5.220860636079044e-06, "loss": 0.4458, "step": 5939 }, { "epoch": 0.35639287214255716, "grad_norm": 1.183828592300415, "learning_rate": 5.220268328681813e-06, "loss": 0.4039, "step": 5940 }, { "epoch": 0.35645287094258116, "grad_norm": 1.2185345888137817, "learning_rate": 5.219675956317885e-06, "loss": 0.4541, "step": 5941 }, { "epoch": 0.35651286974260515, "grad_norm": 1.3200041055679321, "learning_rate": 5.219083519009632e-06, "loss": 0.4281, "step": 5942 }, { "epoch": 0.35657286854262915, "grad_norm": 1.3563159704208374, "learning_rate": 5.218491016779425e-06, "loss": 0.4163, "step": 5943 }, { "epoch": 0.35663286734265315, "grad_norm": 1.272970199584961, "learning_rate": 5.217898449649643e-06, "loss": 0.4168, "step": 5944 }, { "epoch": 0.35669286614267715, "grad_norm": 1.2795518636703491, "learning_rate": 5.217305817642664e-06, "loss": 0.4524, "step": 5945 }, { "epoch": 0.35675286494270114, "grad_norm": 1.277754545211792, "learning_rate": 5.216713120780867e-06, "loss": 0.4706, "step": 5946 }, { "epoch": 0.35681286374272514, "grad_norm": 1.380818486213684, "learning_rate": 5.216120359086637e-06, "loss": 0.4569, "step": 5947 }, { "epoch": 0.35687286254274914, "grad_norm": 1.3434001207351685, "learning_rate": 5.215527532582359e-06, "loss": 0.4607, "step": 5948 }, { "epoch": 0.35693286134277313, "grad_norm": 1.3126921653747559, "learning_rate": 5.214934641290423e-06, "loss": 0.428, "step": 5949 }, { "epoch": 0.35699286014279713, "grad_norm": 1.5743194818496704, "learning_rate": 5.2143416852332175e-06, "loss": 0.42, "step": 5950 }, { "epoch": 0.3570528589428211, "grad_norm": 1.206344723701477, "learning_rate": 5.213748664433139e-06, "loss": 0.494, "step": 5951 }, { "epoch": 0.3571128577428451, "grad_norm": 1.2039767503738403, "learning_rate": 5.21315557891258e-06, "loss": 0.3971, "step": 5952 }, { "epoch": 0.3571728565428691, "grad_norm": 1.2846319675445557, "learning_rate": 5.212562428693941e-06, "loss": 0.4519, "step": 5953 }, { "epoch": 0.3572328553428931, "grad_norm": 1.320846438407898, "learning_rate": 5.211969213799619e-06, "loss": 0.4714, "step": 5954 }, { "epoch": 0.3572928541429171, "grad_norm": 1.2874422073364258, "learning_rate": 5.211375934252022e-06, "loss": 0.444, "step": 5955 }, { "epoch": 0.35735285294294117, "grad_norm": 1.3488216400146484, "learning_rate": 5.210782590073551e-06, "loss": 0.4503, "step": 5956 }, { "epoch": 0.35741285174296517, "grad_norm": 1.1364645957946777, "learning_rate": 5.2101891812866175e-06, "loss": 0.3966, "step": 5957 }, { "epoch": 0.35747285054298916, "grad_norm": 1.4052400588989258, "learning_rate": 5.20959570791363e-06, "loss": 0.4186, "step": 5958 }, { "epoch": 0.35753284934301316, "grad_norm": 1.4306269884109497, "learning_rate": 5.209002169977001e-06, "loss": 0.5218, "step": 5959 }, { "epoch": 0.35759284814303716, "grad_norm": 1.163454294204712, "learning_rate": 5.208408567499147e-06, "loss": 0.3733, "step": 5960 }, { "epoch": 0.35765284694306115, "grad_norm": 1.1754924058914185, "learning_rate": 5.207814900502484e-06, "loss": 0.4219, "step": 5961 }, { "epoch": 0.35771284574308515, "grad_norm": 1.1981631517410278, "learning_rate": 5.207221169009433e-06, "loss": 0.4382, "step": 5962 }, { "epoch": 0.35777284454310915, "grad_norm": 1.1616348028182983, "learning_rate": 5.206627373042418e-06, "loss": 0.3539, "step": 5963 }, { "epoch": 0.35783284334313314, "grad_norm": 1.3418726921081543, "learning_rate": 5.206033512623861e-06, "loss": 0.4766, "step": 5964 }, { "epoch": 0.35789284214315714, "grad_norm": 1.3737499713897705, "learning_rate": 5.20543958777619e-06, "loss": 0.4213, "step": 5965 }, { "epoch": 0.35795284094318114, "grad_norm": 1.3137468099594116, "learning_rate": 5.204845598521838e-06, "loss": 0.4407, "step": 5966 }, { "epoch": 0.35801283974320514, "grad_norm": 1.3997833728790283, "learning_rate": 5.204251544883234e-06, "loss": 0.5083, "step": 5967 }, { "epoch": 0.35807283854322913, "grad_norm": 1.2323909997940063, "learning_rate": 5.203657426882814e-06, "loss": 0.3896, "step": 5968 }, { "epoch": 0.35813283734325313, "grad_norm": 1.1360478401184082, "learning_rate": 5.2030632445430145e-06, "loss": 0.4082, "step": 5969 }, { "epoch": 0.3581928361432771, "grad_norm": 1.2587720155715942, "learning_rate": 5.202468997886275e-06, "loss": 0.4254, "step": 5970 }, { "epoch": 0.3582528349433011, "grad_norm": 1.3512836694717407, "learning_rate": 5.201874686935039e-06, "loss": 0.499, "step": 5971 }, { "epoch": 0.3583128337433251, "grad_norm": 1.1950831413269043, "learning_rate": 5.201280311711749e-06, "loss": 0.4387, "step": 5972 }, { "epoch": 0.3583728325433491, "grad_norm": 1.2579326629638672, "learning_rate": 5.200685872238853e-06, "loss": 0.4242, "step": 5973 }, { "epoch": 0.3584328313433731, "grad_norm": 1.2203656435012817, "learning_rate": 5.2000913685388e-06, "loss": 0.4103, "step": 5974 }, { "epoch": 0.3584928301433971, "grad_norm": 1.2580578327178955, "learning_rate": 5.199496800634041e-06, "loss": 0.3955, "step": 5975 }, { "epoch": 0.3585528289434211, "grad_norm": 1.2769454717636108, "learning_rate": 5.198902168547031e-06, "loss": 0.3994, "step": 5976 }, { "epoch": 0.3586128277434451, "grad_norm": 1.3877265453338623, "learning_rate": 5.198307472300226e-06, "loss": 0.4951, "step": 5977 }, { "epoch": 0.3586728265434691, "grad_norm": 1.4786694049835205, "learning_rate": 5.197712711916085e-06, "loss": 0.4478, "step": 5978 }, { "epoch": 0.35873282534349316, "grad_norm": 1.2001779079437256, "learning_rate": 5.1971178874170705e-06, "loss": 0.3809, "step": 5979 }, { "epoch": 0.35879282414351715, "grad_norm": 1.3902992010116577, "learning_rate": 5.196522998825645e-06, "loss": 0.4095, "step": 5980 }, { "epoch": 0.35885282294354115, "grad_norm": 1.269670844078064, "learning_rate": 5.195928046164275e-06, "loss": 0.4641, "step": 5981 }, { "epoch": 0.35891282174356515, "grad_norm": 1.215027928352356, "learning_rate": 5.1953330294554305e-06, "loss": 0.4099, "step": 5982 }, { "epoch": 0.35897282054358914, "grad_norm": 1.1900136470794678, "learning_rate": 5.194737948721579e-06, "loss": 0.4121, "step": 5983 }, { "epoch": 0.35903281934361314, "grad_norm": 1.264219880104065, "learning_rate": 5.194142803985199e-06, "loss": 0.4401, "step": 5984 }, { "epoch": 0.35909281814363714, "grad_norm": 1.2397860288619995, "learning_rate": 5.193547595268762e-06, "loss": 0.4419, "step": 5985 }, { "epoch": 0.35915281694366114, "grad_norm": 1.303440809249878, "learning_rate": 5.19295232259475e-06, "loss": 0.4228, "step": 5986 }, { "epoch": 0.35921281574368513, "grad_norm": 1.2351444959640503, "learning_rate": 5.19235698598564e-06, "loss": 0.4566, "step": 5987 }, { "epoch": 0.35927281454370913, "grad_norm": 1.1337013244628906, "learning_rate": 5.191761585463918e-06, "loss": 0.3888, "step": 5988 }, { "epoch": 0.3593328133437331, "grad_norm": 1.1745961904525757, "learning_rate": 5.191166121052068e-06, "loss": 0.4278, "step": 5989 }, { "epoch": 0.3593928121437571, "grad_norm": 1.449253797531128, "learning_rate": 5.190570592772579e-06, "loss": 0.4493, "step": 5990 }, { "epoch": 0.3594528109437811, "grad_norm": 1.2416523694992065, "learning_rate": 5.18997500064794e-06, "loss": 0.4192, "step": 5991 }, { "epoch": 0.3595128097438051, "grad_norm": 1.330739140510559, "learning_rate": 5.1893793447006464e-06, "loss": 0.4305, "step": 5992 }, { "epoch": 0.3595728085438291, "grad_norm": 1.3196756839752197, "learning_rate": 5.18878362495319e-06, "loss": 0.4304, "step": 5993 }, { "epoch": 0.3596328073438531, "grad_norm": 1.2898977994918823, "learning_rate": 5.188187841428071e-06, "loss": 0.4216, "step": 5994 }, { "epoch": 0.3596928061438771, "grad_norm": 1.5001369714736938, "learning_rate": 5.187591994147789e-06, "loss": 0.494, "step": 5995 }, { "epoch": 0.3597528049439011, "grad_norm": 1.2974135875701904, "learning_rate": 5.186996083134845e-06, "loss": 0.4256, "step": 5996 }, { "epoch": 0.3598128037439251, "grad_norm": 1.2684540748596191, "learning_rate": 5.186400108411746e-06, "loss": 0.4504, "step": 5997 }, { "epoch": 0.3598728025439491, "grad_norm": 1.5151455402374268, "learning_rate": 5.185804070000997e-06, "loss": 0.4013, "step": 5998 }, { "epoch": 0.3599328013439731, "grad_norm": 1.317945957183838, "learning_rate": 5.18520796792511e-06, "loss": 0.4274, "step": 5999 }, { "epoch": 0.3599928001439971, "grad_norm": 1.2220112085342407, "learning_rate": 5.1846118022065944e-06, "loss": 0.4106, "step": 6000 }, { "epoch": 0.36005279894402115, "grad_norm": 1.12797212600708, "learning_rate": 5.1840155728679665e-06, "loss": 0.4436, "step": 6001 }, { "epoch": 0.36011279774404514, "grad_norm": 1.2310959100723267, "learning_rate": 5.183419279931744e-06, "loss": 0.4078, "step": 6002 }, { "epoch": 0.36017279654406914, "grad_norm": 1.2501336336135864, "learning_rate": 5.182822923420444e-06, "loss": 0.4213, "step": 6003 }, { "epoch": 0.36023279534409314, "grad_norm": 1.2357600927352905, "learning_rate": 5.182226503356589e-06, "loss": 0.43, "step": 6004 }, { "epoch": 0.36029279414411713, "grad_norm": 1.2102614641189575, "learning_rate": 5.1816300197627034e-06, "loss": 0.4038, "step": 6005 }, { "epoch": 0.36035279294414113, "grad_norm": 1.219393253326416, "learning_rate": 5.181033472661313e-06, "loss": 0.427, "step": 6006 }, { "epoch": 0.36041279174416513, "grad_norm": 1.4030121564865112, "learning_rate": 5.180436862074948e-06, "loss": 0.4407, "step": 6007 }, { "epoch": 0.3604727905441891, "grad_norm": 1.257105827331543, "learning_rate": 5.179840188026138e-06, "loss": 0.3986, "step": 6008 }, { "epoch": 0.3605327893442131, "grad_norm": 1.4023627042770386, "learning_rate": 5.179243450537418e-06, "loss": 0.4312, "step": 6009 }, { "epoch": 0.3605927881442371, "grad_norm": 1.254873514175415, "learning_rate": 5.1786466496313224e-06, "loss": 0.4034, "step": 6010 }, { "epoch": 0.3606527869442611, "grad_norm": 1.1277942657470703, "learning_rate": 5.178049785330393e-06, "loss": 0.4017, "step": 6011 }, { "epoch": 0.3607127857442851, "grad_norm": 1.180777668952942, "learning_rate": 5.177452857657165e-06, "loss": 0.4442, "step": 6012 }, { "epoch": 0.3607727845443091, "grad_norm": 1.422584891319275, "learning_rate": 5.176855866634188e-06, "loss": 0.4278, "step": 6013 }, { "epoch": 0.3608327833443331, "grad_norm": 1.31055748462677, "learning_rate": 5.1762588122840025e-06, "loss": 0.4681, "step": 6014 }, { "epoch": 0.3608927821443571, "grad_norm": 1.3932536840438843, "learning_rate": 5.17566169462916e-06, "loss": 0.4393, "step": 6015 }, { "epoch": 0.3609527809443811, "grad_norm": 1.3073159456253052, "learning_rate": 5.17506451369221e-06, "loss": 0.4845, "step": 6016 }, { "epoch": 0.3610127797444051, "grad_norm": 1.205804467201233, "learning_rate": 5.174467269495705e-06, "loss": 0.4079, "step": 6017 }, { "epoch": 0.3610727785444291, "grad_norm": 1.1826884746551514, "learning_rate": 5.173869962062199e-06, "loss": 0.4622, "step": 6018 }, { "epoch": 0.3611327773444531, "grad_norm": 1.2420624494552612, "learning_rate": 5.17327259141425e-06, "loss": 0.4009, "step": 6019 }, { "epoch": 0.3611927761444771, "grad_norm": 1.3423807621002197, "learning_rate": 5.1726751575744205e-06, "loss": 0.4012, "step": 6020 }, { "epoch": 0.3612527749445011, "grad_norm": 1.3905786275863647, "learning_rate": 5.17207766056527e-06, "loss": 0.4326, "step": 6021 }, { "epoch": 0.3613127737445251, "grad_norm": 1.3331193923950195, "learning_rate": 5.171480100409365e-06, "loss": 0.4247, "step": 6022 }, { "epoch": 0.3613727725445491, "grad_norm": 1.1694228649139404, "learning_rate": 5.1708824771292705e-06, "loss": 0.4233, "step": 6023 }, { "epoch": 0.36143277134457313, "grad_norm": 1.2370136976242065, "learning_rate": 5.170284790747559e-06, "loss": 0.4569, "step": 6024 }, { "epoch": 0.36149277014459713, "grad_norm": 1.5234512090682983, "learning_rate": 5.169687041286798e-06, "loss": 0.4385, "step": 6025 }, { "epoch": 0.36155276894462113, "grad_norm": 1.2000128030776978, "learning_rate": 5.169089228769567e-06, "loss": 0.38, "step": 6026 }, { "epoch": 0.3616127677446451, "grad_norm": 1.1268982887268066, "learning_rate": 5.16849135321844e-06, "loss": 0.3551, "step": 6027 }, { "epoch": 0.3616727665446691, "grad_norm": 1.3049743175506592, "learning_rate": 5.167893414655996e-06, "loss": 0.395, "step": 6028 }, { "epoch": 0.3617327653446931, "grad_norm": 1.409866213798523, "learning_rate": 5.167295413104816e-06, "loss": 0.4526, "step": 6029 }, { "epoch": 0.3617927641447171, "grad_norm": 1.3773404359817505, "learning_rate": 5.166697348587484e-06, "loss": 0.4445, "step": 6030 }, { "epoch": 0.3618527629447411, "grad_norm": 1.2309565544128418, "learning_rate": 5.166099221126587e-06, "loss": 0.4497, "step": 6031 }, { "epoch": 0.3619127617447651, "grad_norm": 1.256908893585205, "learning_rate": 5.165501030744714e-06, "loss": 0.413, "step": 6032 }, { "epoch": 0.3619727605447891, "grad_norm": 1.2987126111984253, "learning_rate": 5.164902777464455e-06, "loss": 0.4943, "step": 6033 }, { "epoch": 0.3620327593448131, "grad_norm": 1.3540961742401123, "learning_rate": 5.164304461308403e-06, "loss": 0.4093, "step": 6034 }, { "epoch": 0.3620927581448371, "grad_norm": 1.2388451099395752, "learning_rate": 5.1637060822991534e-06, "loss": 0.4216, "step": 6035 }, { "epoch": 0.3621527569448611, "grad_norm": 1.4235405921936035, "learning_rate": 5.163107640459306e-06, "loss": 0.4581, "step": 6036 }, { "epoch": 0.3622127557448851, "grad_norm": 1.36576509475708, "learning_rate": 5.162509135811459e-06, "loss": 0.4505, "step": 6037 }, { "epoch": 0.3622727545449091, "grad_norm": 1.291934609413147, "learning_rate": 5.161910568378218e-06, "loss": 0.4663, "step": 6038 }, { "epoch": 0.3623327533449331, "grad_norm": 1.0729584693908691, "learning_rate": 5.161311938182185e-06, "loss": 0.4363, "step": 6039 }, { "epoch": 0.3623927521449571, "grad_norm": 1.3513449430465698, "learning_rate": 5.16071324524597e-06, "loss": 0.4241, "step": 6040 }, { "epoch": 0.3624527509449811, "grad_norm": 1.260717511177063, "learning_rate": 5.160114489592181e-06, "loss": 0.4484, "step": 6041 }, { "epoch": 0.3625127497450051, "grad_norm": 1.333910346031189, "learning_rate": 5.1595156712434314e-06, "loss": 0.4375, "step": 6042 }, { "epoch": 0.3625727485450291, "grad_norm": 1.2626895904541016, "learning_rate": 5.158916790222336e-06, "loss": 0.4363, "step": 6043 }, { "epoch": 0.3626327473450531, "grad_norm": 1.3072662353515625, "learning_rate": 5.158317846551512e-06, "loss": 0.4411, "step": 6044 }, { "epoch": 0.36269274614507707, "grad_norm": 1.3317348957061768, "learning_rate": 5.1577188402535774e-06, "loss": 0.4692, "step": 6045 }, { "epoch": 0.3627527449451011, "grad_norm": 1.2260994911193848, "learning_rate": 5.157119771351154e-06, "loss": 0.4215, "step": 6046 }, { "epoch": 0.3628127437451251, "grad_norm": 1.1559958457946777, "learning_rate": 5.156520639866867e-06, "loss": 0.3973, "step": 6047 }, { "epoch": 0.3628727425451491, "grad_norm": 1.264719009399414, "learning_rate": 5.155921445823344e-06, "loss": 0.4202, "step": 6048 }, { "epoch": 0.3629327413451731, "grad_norm": 1.3420805931091309, "learning_rate": 5.155322189243211e-06, "loss": 0.3935, "step": 6049 }, { "epoch": 0.3629927401451971, "grad_norm": 1.3322643041610718, "learning_rate": 5.154722870149101e-06, "loss": 0.4373, "step": 6050 }, { "epoch": 0.3630527389452211, "grad_norm": 1.3380860090255737, "learning_rate": 5.154123488563647e-06, "loss": 0.4374, "step": 6051 }, { "epoch": 0.3631127377452451, "grad_norm": 1.1925501823425293, "learning_rate": 5.153524044509485e-06, "loss": 0.4631, "step": 6052 }, { "epoch": 0.3631727365452691, "grad_norm": 1.3217920064926147, "learning_rate": 5.152924538009253e-06, "loss": 0.4456, "step": 6053 }, { "epoch": 0.3632327353452931, "grad_norm": 1.2311184406280518, "learning_rate": 5.152324969085592e-06, "loss": 0.4765, "step": 6054 }, { "epoch": 0.3632927341453171, "grad_norm": 1.1663626432418823, "learning_rate": 5.1517253377611456e-06, "loss": 0.4358, "step": 6055 }, { "epoch": 0.3633527329453411, "grad_norm": 1.2233126163482666, "learning_rate": 5.151125644058557e-06, "loss": 0.406, "step": 6056 }, { "epoch": 0.3634127317453651, "grad_norm": 1.3257046937942505, "learning_rate": 5.150525888000477e-06, "loss": 0.4854, "step": 6057 }, { "epoch": 0.3634727305453891, "grad_norm": 1.176782488822937, "learning_rate": 5.149926069609553e-06, "loss": 0.4607, "step": 6058 }, { "epoch": 0.3635327293454131, "grad_norm": 1.2268480062484741, "learning_rate": 5.1493261889084374e-06, "loss": 0.4135, "step": 6059 }, { "epoch": 0.3635927281454371, "grad_norm": 1.287563443183899, "learning_rate": 5.148726245919788e-06, "loss": 0.4166, "step": 6060 }, { "epoch": 0.3636527269454611, "grad_norm": 1.289284110069275, "learning_rate": 5.1481262406662585e-06, "loss": 0.4341, "step": 6061 }, { "epoch": 0.3637127257454851, "grad_norm": 1.2279810905456543, "learning_rate": 5.1475261731705105e-06, "loss": 0.4053, "step": 6062 }, { "epoch": 0.3637727245455091, "grad_norm": 1.2565674781799316, "learning_rate": 5.146926043455205e-06, "loss": 0.4249, "step": 6063 }, { "epoch": 0.36383272334553307, "grad_norm": 1.4203553199768066, "learning_rate": 5.146325851543006e-06, "loss": 0.4557, "step": 6064 }, { "epoch": 0.36389272214555707, "grad_norm": 1.2880573272705078, "learning_rate": 5.1457255974565805e-06, "loss": 0.3945, "step": 6065 }, { "epoch": 0.36395272094558107, "grad_norm": 1.1585958003997803, "learning_rate": 5.145125281218597e-06, "loss": 0.3367, "step": 6066 }, { "epoch": 0.36401271974560506, "grad_norm": 1.2338439226150513, "learning_rate": 5.144524902851726e-06, "loss": 0.3994, "step": 6067 }, { "epoch": 0.3640727185456291, "grad_norm": 1.163856863975525, "learning_rate": 5.143924462378643e-06, "loss": 0.3567, "step": 6068 }, { "epoch": 0.3641327173456531, "grad_norm": 1.2887203693389893, "learning_rate": 5.1433239598220215e-06, "loss": 0.4381, "step": 6069 }, { "epoch": 0.3641927161456771, "grad_norm": 1.136967420578003, "learning_rate": 5.1427233952045425e-06, "loss": 0.4234, "step": 6070 }, { "epoch": 0.3642527149457011, "grad_norm": 1.2017278671264648, "learning_rate": 5.142122768548884e-06, "loss": 0.361, "step": 6071 }, { "epoch": 0.3643127137457251, "grad_norm": 1.256467342376709, "learning_rate": 5.14152207987773e-06, "loss": 0.4002, "step": 6072 }, { "epoch": 0.3643727125457491, "grad_norm": 1.2095412015914917, "learning_rate": 5.1409213292137665e-06, "loss": 0.432, "step": 6073 }, { "epoch": 0.3644327113457731, "grad_norm": 1.1000304222106934, "learning_rate": 5.140320516579681e-06, "loss": 0.3561, "step": 6074 }, { "epoch": 0.3644927101457971, "grad_norm": 1.1514108180999756, "learning_rate": 5.139719641998161e-06, "loss": 0.4265, "step": 6075 }, { "epoch": 0.3645527089458211, "grad_norm": 1.3400750160217285, "learning_rate": 5.139118705491902e-06, "loss": 0.4193, "step": 6076 }, { "epoch": 0.3646127077458451, "grad_norm": 1.1986051797866821, "learning_rate": 5.138517707083596e-06, "loss": 0.4048, "step": 6077 }, { "epoch": 0.3646727065458691, "grad_norm": 1.347009539604187, "learning_rate": 5.1379166467959425e-06, "loss": 0.4248, "step": 6078 }, { "epoch": 0.3647327053458931, "grad_norm": 1.274585485458374, "learning_rate": 5.137315524651639e-06, "loss": 0.4734, "step": 6079 }, { "epoch": 0.3647927041459171, "grad_norm": 1.2253456115722656, "learning_rate": 5.136714340673389e-06, "loss": 0.4256, "step": 6080 }, { "epoch": 0.3648527029459411, "grad_norm": 1.1814370155334473, "learning_rate": 5.136113094883894e-06, "loss": 0.4258, "step": 6081 }, { "epoch": 0.3649127017459651, "grad_norm": 1.254990577697754, "learning_rate": 5.135511787305861e-06, "loss": 0.4224, "step": 6082 }, { "epoch": 0.36497270054598907, "grad_norm": 1.2021669149398804, "learning_rate": 5.134910417962e-06, "loss": 0.3934, "step": 6083 }, { "epoch": 0.36503269934601307, "grad_norm": 1.3014624118804932, "learning_rate": 5.134308986875022e-06, "loss": 0.4397, "step": 6084 }, { "epoch": 0.36509269814603706, "grad_norm": 1.4056638479232788, "learning_rate": 5.133707494067638e-06, "loss": 0.435, "step": 6085 }, { "epoch": 0.36515269694606106, "grad_norm": 1.1352808475494385, "learning_rate": 5.133105939562567e-06, "loss": 0.4456, "step": 6086 }, { "epoch": 0.36521269574608506, "grad_norm": 1.2037028074264526, "learning_rate": 5.132504323382522e-06, "loss": 0.3929, "step": 6087 }, { "epoch": 0.36527269454610906, "grad_norm": 1.3213298320770264, "learning_rate": 5.131902645550228e-06, "loss": 0.3933, "step": 6088 }, { "epoch": 0.36533269334613305, "grad_norm": 1.2085109949111938, "learning_rate": 5.1313009060884065e-06, "loss": 0.4044, "step": 6089 }, { "epoch": 0.36539269214615705, "grad_norm": 1.2655497789382935, "learning_rate": 5.130699105019782e-06, "loss": 0.425, "step": 6090 }, { "epoch": 0.3654526909461811, "grad_norm": 1.0782606601715088, "learning_rate": 5.1300972423670805e-06, "loss": 0.3684, "step": 6091 }, { "epoch": 0.3655126897462051, "grad_norm": 1.295789122581482, "learning_rate": 5.129495318153034e-06, "loss": 0.421, "step": 6092 }, { "epoch": 0.3655726885462291, "grad_norm": 1.2394204139709473, "learning_rate": 5.128893332400372e-06, "loss": 0.4358, "step": 6093 }, { "epoch": 0.3656326873462531, "grad_norm": 1.1919571161270142, "learning_rate": 5.1282912851318315e-06, "loss": 0.4392, "step": 6094 }, { "epoch": 0.3656926861462771, "grad_norm": 1.192719578742981, "learning_rate": 5.127689176370148e-06, "loss": 0.399, "step": 6095 }, { "epoch": 0.3657526849463011, "grad_norm": 1.2910267114639282, "learning_rate": 5.127087006138058e-06, "loss": 0.4832, "step": 6096 }, { "epoch": 0.3658126837463251, "grad_norm": 1.2110286951065063, "learning_rate": 5.1264847744583075e-06, "loss": 0.4193, "step": 6097 }, { "epoch": 0.3658726825463491, "grad_norm": 1.1456271409988403, "learning_rate": 5.125882481353635e-06, "loss": 0.4101, "step": 6098 }, { "epoch": 0.3659326813463731, "grad_norm": 1.426270842552185, "learning_rate": 5.12528012684679e-06, "loss": 0.458, "step": 6099 }, { "epoch": 0.3659926801463971, "grad_norm": 1.187911868095398, "learning_rate": 5.124677710960518e-06, "loss": 0.398, "step": 6100 }, { "epoch": 0.3660526789464211, "grad_norm": 1.258055567741394, "learning_rate": 5.124075233717572e-06, "loss": 0.4277, "step": 6101 }, { "epoch": 0.36611267774644507, "grad_norm": 1.4657073020935059, "learning_rate": 5.123472695140703e-06, "loss": 0.4332, "step": 6102 }, { "epoch": 0.36617267654646907, "grad_norm": 1.190436601638794, "learning_rate": 5.1228700952526675e-06, "loss": 0.4089, "step": 6103 }, { "epoch": 0.36623267534649306, "grad_norm": 1.3436949253082275, "learning_rate": 5.122267434076221e-06, "loss": 0.4095, "step": 6104 }, { "epoch": 0.36629267414651706, "grad_norm": 1.2789640426635742, "learning_rate": 5.121664711634127e-06, "loss": 0.3888, "step": 6105 }, { "epoch": 0.36635267294654106, "grad_norm": 1.2613502740859985, "learning_rate": 5.121061927949142e-06, "loss": 0.4129, "step": 6106 }, { "epoch": 0.36641267174656506, "grad_norm": 1.3716464042663574, "learning_rate": 5.120459083044037e-06, "loss": 0.4673, "step": 6107 }, { "epoch": 0.36647267054658905, "grad_norm": 1.3300566673278809, "learning_rate": 5.119856176941572e-06, "loss": 0.4843, "step": 6108 }, { "epoch": 0.36653266934661305, "grad_norm": 1.2155938148498535, "learning_rate": 5.119253209664521e-06, "loss": 0.3759, "step": 6109 }, { "epoch": 0.36659266814663705, "grad_norm": 1.275273084640503, "learning_rate": 5.118650181235653e-06, "loss": 0.4539, "step": 6110 }, { "epoch": 0.36665266694666104, "grad_norm": 1.1978919506072998, "learning_rate": 5.118047091677743e-06, "loss": 0.3853, "step": 6111 }, { "epoch": 0.36671266574668504, "grad_norm": 1.274215817451477, "learning_rate": 5.117443941013566e-06, "loss": 0.3929, "step": 6112 }, { "epoch": 0.3667726645467091, "grad_norm": 1.3436490297317505, "learning_rate": 5.1168407292658994e-06, "loss": 0.451, "step": 6113 }, { "epoch": 0.3668326633467331, "grad_norm": 1.3849788904190063, "learning_rate": 5.1162374564575254e-06, "loss": 0.4227, "step": 6114 }, { "epoch": 0.3668926621467571, "grad_norm": 1.2485480308532715, "learning_rate": 5.1156341226112254e-06, "loss": 0.448, "step": 6115 }, { "epoch": 0.3669526609467811, "grad_norm": 1.4225720167160034, "learning_rate": 5.115030727749786e-06, "loss": 0.4731, "step": 6116 }, { "epoch": 0.3670126597468051, "grad_norm": 1.196083903312683, "learning_rate": 5.114427271895995e-06, "loss": 0.4096, "step": 6117 }, { "epoch": 0.3670726585468291, "grad_norm": 1.1226162910461426, "learning_rate": 5.113823755072641e-06, "loss": 0.4126, "step": 6118 }, { "epoch": 0.3671326573468531, "grad_norm": 1.3640635013580322, "learning_rate": 5.113220177302515e-06, "loss": 0.4464, "step": 6119 }, { "epoch": 0.36719265614687707, "grad_norm": 1.2883312702178955, "learning_rate": 5.112616538608414e-06, "loss": 0.4397, "step": 6120 }, { "epoch": 0.36725265494690107, "grad_norm": 1.4475599527359009, "learning_rate": 5.112012839013132e-06, "loss": 0.4652, "step": 6121 }, { "epoch": 0.36731265374692507, "grad_norm": 1.2806506156921387, "learning_rate": 5.111409078539471e-06, "loss": 0.4592, "step": 6122 }, { "epoch": 0.36737265254694906, "grad_norm": 1.409581184387207, "learning_rate": 5.1108052572102295e-06, "loss": 0.4637, "step": 6123 }, { "epoch": 0.36743265134697306, "grad_norm": 1.3117855787277222, "learning_rate": 5.110201375048213e-06, "loss": 0.402, "step": 6124 }, { "epoch": 0.36749265014699706, "grad_norm": 1.212565541267395, "learning_rate": 5.109597432076227e-06, "loss": 0.4537, "step": 6125 }, { "epoch": 0.36755264894702105, "grad_norm": 1.2844022512435913, "learning_rate": 5.108993428317078e-06, "loss": 0.4308, "step": 6126 }, { "epoch": 0.36761264774704505, "grad_norm": 1.2671585083007812, "learning_rate": 5.108389363793579e-06, "loss": 0.4631, "step": 6127 }, { "epoch": 0.36767264654706905, "grad_norm": 1.2846438884735107, "learning_rate": 5.1077852385285406e-06, "loss": 0.4093, "step": 6128 }, { "epoch": 0.36773264534709305, "grad_norm": 1.3036950826644897, "learning_rate": 5.107181052544779e-06, "loss": 0.4062, "step": 6129 }, { "epoch": 0.36779264414711704, "grad_norm": 1.2953333854675293, "learning_rate": 5.106576805865112e-06, "loss": 0.4631, "step": 6130 }, { "epoch": 0.36785264294714104, "grad_norm": 1.212092638015747, "learning_rate": 5.105972498512357e-06, "loss": 0.3879, "step": 6131 }, { "epoch": 0.36791264174716504, "grad_norm": 1.305975317955017, "learning_rate": 5.10536813050934e-06, "loss": 0.501, "step": 6132 }, { "epoch": 0.36797264054718903, "grad_norm": 1.1749701499938965, "learning_rate": 5.104763701878881e-06, "loss": 0.4168, "step": 6133 }, { "epoch": 0.36803263934721303, "grad_norm": 1.2167226076126099, "learning_rate": 5.1041592126438095e-06, "loss": 0.3903, "step": 6134 }, { "epoch": 0.36809263814723703, "grad_norm": 1.3652514219284058, "learning_rate": 5.103554662826951e-06, "loss": 0.4283, "step": 6135 }, { "epoch": 0.3681526369472611, "grad_norm": 1.2433736324310303, "learning_rate": 5.1029500524511405e-06, "loss": 0.3855, "step": 6136 }, { "epoch": 0.3682126357472851, "grad_norm": 1.2574663162231445, "learning_rate": 5.102345381539208e-06, "loss": 0.3911, "step": 6137 }, { "epoch": 0.3682726345473091, "grad_norm": 1.3722541332244873, "learning_rate": 5.101740650113992e-06, "loss": 0.4497, "step": 6138 }, { "epoch": 0.36833263334733307, "grad_norm": 1.2458231449127197, "learning_rate": 5.101135858198328e-06, "loss": 0.4069, "step": 6139 }, { "epoch": 0.36839263214735707, "grad_norm": 1.465606689453125, "learning_rate": 5.1005310058150586e-06, "loss": 0.4088, "step": 6140 }, { "epoch": 0.36845263094738107, "grad_norm": 1.2226178646087646, "learning_rate": 5.099926092987025e-06, "loss": 0.4095, "step": 6141 }, { "epoch": 0.36851262974740506, "grad_norm": 1.1171541213989258, "learning_rate": 5.099321119737072e-06, "loss": 0.4107, "step": 6142 }, { "epoch": 0.36857262854742906, "grad_norm": 1.3583629131317139, "learning_rate": 5.098716086088047e-06, "loss": 0.4076, "step": 6143 }, { "epoch": 0.36863262734745306, "grad_norm": 1.2629611492156982, "learning_rate": 5.098110992062798e-06, "loss": 0.4706, "step": 6144 }, { "epoch": 0.36869262614747705, "grad_norm": 1.2018852233886719, "learning_rate": 5.097505837684179e-06, "loss": 0.3976, "step": 6145 }, { "epoch": 0.36875262494750105, "grad_norm": 1.2757861614227295, "learning_rate": 5.096900622975042e-06, "loss": 0.419, "step": 6146 }, { "epoch": 0.36881262374752505, "grad_norm": 1.3974765539169312, "learning_rate": 5.096295347958244e-06, "loss": 0.4426, "step": 6147 }, { "epoch": 0.36887262254754905, "grad_norm": 1.2882050275802612, "learning_rate": 5.095690012656643e-06, "loss": 0.4365, "step": 6148 }, { "epoch": 0.36893262134757304, "grad_norm": 1.3089604377746582, "learning_rate": 5.095084617093102e-06, "loss": 0.4445, "step": 6149 }, { "epoch": 0.36899262014759704, "grad_norm": 1.2463866472244263, "learning_rate": 5.09447916129048e-06, "loss": 0.4006, "step": 6150 }, { "epoch": 0.36905261894762104, "grad_norm": 1.2411413192749023, "learning_rate": 5.0938736452716465e-06, "loss": 0.3856, "step": 6151 }, { "epoch": 0.36911261774764503, "grad_norm": 1.1949079036712646, "learning_rate": 5.093268069059466e-06, "loss": 0.4524, "step": 6152 }, { "epoch": 0.36917261654766903, "grad_norm": 1.4365235567092896, "learning_rate": 5.0926624326768095e-06, "loss": 0.4667, "step": 6153 }, { "epoch": 0.369232615347693, "grad_norm": 1.2689342498779297, "learning_rate": 5.092056736146549e-06, "loss": 0.3808, "step": 6154 }, { "epoch": 0.369292614147717, "grad_norm": 1.1117267608642578, "learning_rate": 5.091450979491558e-06, "loss": 0.3536, "step": 6155 }, { "epoch": 0.369352612947741, "grad_norm": 1.292829155921936, "learning_rate": 5.090845162734716e-06, "loss": 0.3781, "step": 6156 }, { "epoch": 0.369412611747765, "grad_norm": 1.1620733737945557, "learning_rate": 5.090239285898899e-06, "loss": 0.3873, "step": 6157 }, { "epoch": 0.36947261054778907, "grad_norm": 1.2593941688537598, "learning_rate": 5.08963334900699e-06, "loss": 0.4187, "step": 6158 }, { "epoch": 0.36953260934781307, "grad_norm": 1.1702162027359009, "learning_rate": 5.089027352081871e-06, "loss": 0.4233, "step": 6159 }, { "epoch": 0.36959260814783707, "grad_norm": 1.2693675756454468, "learning_rate": 5.088421295146429e-06, "loss": 0.4593, "step": 6160 }, { "epoch": 0.36965260694786106, "grad_norm": 1.294777750968933, "learning_rate": 5.087815178223551e-06, "loss": 0.3939, "step": 6161 }, { "epoch": 0.36971260574788506, "grad_norm": 1.27573823928833, "learning_rate": 5.087209001336128e-06, "loss": 0.4357, "step": 6162 }, { "epoch": 0.36977260454790906, "grad_norm": 1.3114471435546875, "learning_rate": 5.086602764507053e-06, "loss": 0.4617, "step": 6163 }, { "epoch": 0.36983260334793305, "grad_norm": 1.2572838068008423, "learning_rate": 5.0859964677592194e-06, "loss": 0.4181, "step": 6164 }, { "epoch": 0.36989260214795705, "grad_norm": 1.3201181888580322, "learning_rate": 5.085390111115524e-06, "loss": 0.4482, "step": 6165 }, { "epoch": 0.36995260094798105, "grad_norm": 1.2506345510482788, "learning_rate": 5.08478369459887e-06, "loss": 0.4617, "step": 6166 }, { "epoch": 0.37001259974800504, "grad_norm": 1.229057788848877, "learning_rate": 5.0841772182321545e-06, "loss": 0.4028, "step": 6167 }, { "epoch": 0.37007259854802904, "grad_norm": 1.2846801280975342, "learning_rate": 5.0835706820382834e-06, "loss": 0.4282, "step": 6168 }, { "epoch": 0.37013259734805304, "grad_norm": 1.2158304452896118, "learning_rate": 5.08296408604016e-06, "loss": 0.4186, "step": 6169 }, { "epoch": 0.37019259614807704, "grad_norm": 1.349411129951477, "learning_rate": 5.082357430260698e-06, "loss": 0.4293, "step": 6170 }, { "epoch": 0.37025259494810103, "grad_norm": 1.248928427696228, "learning_rate": 5.081750714722804e-06, "loss": 0.447, "step": 6171 }, { "epoch": 0.37031259374812503, "grad_norm": 1.2625864744186401, "learning_rate": 5.081143939449391e-06, "loss": 0.4393, "step": 6172 }, { "epoch": 0.370372592548149, "grad_norm": 1.1269404888153076, "learning_rate": 5.080537104463377e-06, "loss": 0.4441, "step": 6173 }, { "epoch": 0.370432591348173, "grad_norm": 1.1181150674819946, "learning_rate": 5.079930209787676e-06, "loss": 0.429, "step": 6174 }, { "epoch": 0.370492590148197, "grad_norm": 1.1020865440368652, "learning_rate": 5.079323255445209e-06, "loss": 0.4116, "step": 6175 }, { "epoch": 0.370552588948221, "grad_norm": 1.1733883619308472, "learning_rate": 5.078716241458899e-06, "loss": 0.4273, "step": 6176 }, { "epoch": 0.370612587748245, "grad_norm": 1.3147739171981812, "learning_rate": 5.078109167851668e-06, "loss": 0.4112, "step": 6177 }, { "epoch": 0.370672586548269, "grad_norm": 1.2601172924041748, "learning_rate": 5.077502034646445e-06, "loss": 0.4409, "step": 6178 }, { "epoch": 0.370732585348293, "grad_norm": 1.340475082397461, "learning_rate": 5.076894841866156e-06, "loss": 0.4322, "step": 6179 }, { "epoch": 0.370792584148317, "grad_norm": 1.344726800918579, "learning_rate": 5.076287589533735e-06, "loss": 0.4104, "step": 6180 }, { "epoch": 0.37085258294834106, "grad_norm": 1.2387038469314575, "learning_rate": 5.075680277672111e-06, "loss": 0.4523, "step": 6181 }, { "epoch": 0.37091258174836506, "grad_norm": 1.3511606454849243, "learning_rate": 5.0750729063042226e-06, "loss": 0.4425, "step": 6182 }, { "epoch": 0.37097258054838905, "grad_norm": 1.3046884536743164, "learning_rate": 5.0744654754530064e-06, "loss": 0.4302, "step": 6183 }, { "epoch": 0.37103257934841305, "grad_norm": 1.3526359796524048, "learning_rate": 5.073857985141403e-06, "loss": 0.4815, "step": 6184 }, { "epoch": 0.37109257814843705, "grad_norm": 1.3351103067398071, "learning_rate": 5.073250435392354e-06, "loss": 0.4612, "step": 6185 }, { "epoch": 0.37115257694846104, "grad_norm": 1.2885476350784302, "learning_rate": 5.0726428262288036e-06, "loss": 0.4352, "step": 6186 }, { "epoch": 0.37121257574848504, "grad_norm": 1.142823576927185, "learning_rate": 5.072035157673698e-06, "loss": 0.4168, "step": 6187 }, { "epoch": 0.37127257454850904, "grad_norm": 1.34284508228302, "learning_rate": 5.071427429749987e-06, "loss": 0.4352, "step": 6188 }, { "epoch": 0.37133257334853303, "grad_norm": 1.2081220149993896, "learning_rate": 5.0708196424806214e-06, "loss": 0.4582, "step": 6189 }, { "epoch": 0.37139257214855703, "grad_norm": 1.2540220022201538, "learning_rate": 5.070211795888554e-06, "loss": 0.4483, "step": 6190 }, { "epoch": 0.37145257094858103, "grad_norm": 1.1373798847198486, "learning_rate": 5.069603889996743e-06, "loss": 0.4135, "step": 6191 }, { "epoch": 0.371512569748605, "grad_norm": 1.167576551437378, "learning_rate": 5.0689959248281425e-06, "loss": 0.4447, "step": 6192 }, { "epoch": 0.371572568548629, "grad_norm": 1.1990636587142944, "learning_rate": 5.068387900405715e-06, "loss": 0.4003, "step": 6193 }, { "epoch": 0.371632567348653, "grad_norm": 1.3285579681396484, "learning_rate": 5.06777981675242e-06, "loss": 0.4336, "step": 6194 }, { "epoch": 0.371692566148677, "grad_norm": 1.2977166175842285, "learning_rate": 5.0671716738912265e-06, "loss": 0.4634, "step": 6195 }, { "epoch": 0.371752564948701, "grad_norm": 1.1771585941314697, "learning_rate": 5.0665634718450975e-06, "loss": 0.355, "step": 6196 }, { "epoch": 0.371812563748725, "grad_norm": 1.2521744966506958, "learning_rate": 5.065955210637005e-06, "loss": 0.421, "step": 6197 }, { "epoch": 0.371872562548749, "grad_norm": 1.1592857837677002, "learning_rate": 5.065346890289917e-06, "loss": 0.4186, "step": 6198 }, { "epoch": 0.371932561348773, "grad_norm": 1.2555549144744873, "learning_rate": 5.0647385108268085e-06, "loss": 0.4382, "step": 6199 }, { "epoch": 0.371992560148797, "grad_norm": 1.3425521850585938, "learning_rate": 5.064130072270656e-06, "loss": 0.4339, "step": 6200 }, { "epoch": 0.372052558948821, "grad_norm": 1.32938551902771, "learning_rate": 5.0635215746444375e-06, "loss": 0.4357, "step": 6201 }, { "epoch": 0.372112557748845, "grad_norm": 1.2269057035446167, "learning_rate": 5.062913017971131e-06, "loss": 0.3932, "step": 6202 }, { "epoch": 0.37217255654886905, "grad_norm": 1.2624558210372925, "learning_rate": 5.062304402273722e-06, "loss": 0.4184, "step": 6203 }, { "epoch": 0.37223255534889305, "grad_norm": 1.228220820426941, "learning_rate": 5.061695727575193e-06, "loss": 0.4325, "step": 6204 }, { "epoch": 0.37229255414891704, "grad_norm": 1.1792715787887573, "learning_rate": 5.061086993898532e-06, "loss": 0.4169, "step": 6205 }, { "epoch": 0.37235255294894104, "grad_norm": 1.2418222427368164, "learning_rate": 5.0604782012667265e-06, "loss": 0.4197, "step": 6206 }, { "epoch": 0.37241255174896504, "grad_norm": 1.152169942855835, "learning_rate": 5.05986934970277e-06, "loss": 0.4005, "step": 6207 }, { "epoch": 0.37247255054898903, "grad_norm": 1.426037073135376, "learning_rate": 5.059260439229653e-06, "loss": 0.4443, "step": 6208 }, { "epoch": 0.37253254934901303, "grad_norm": 1.2302578687667847, "learning_rate": 5.058651469870376e-06, "loss": 0.4269, "step": 6209 }, { "epoch": 0.37259254814903703, "grad_norm": 1.2943683862686157, "learning_rate": 5.058042441647932e-06, "loss": 0.4045, "step": 6210 }, { "epoch": 0.372652546949061, "grad_norm": 1.1925702095031738, "learning_rate": 5.057433354585323e-06, "loss": 0.3985, "step": 6211 }, { "epoch": 0.372712545749085, "grad_norm": 1.3848398923873901, "learning_rate": 5.056824208705553e-06, "loss": 0.4459, "step": 6212 }, { "epoch": 0.372772544549109, "grad_norm": 1.2425354719161987, "learning_rate": 5.056215004031626e-06, "loss": 0.4222, "step": 6213 }, { "epoch": 0.372832543349133, "grad_norm": 1.331361174583435, "learning_rate": 5.055605740586548e-06, "loss": 0.4303, "step": 6214 }, { "epoch": 0.372892542149157, "grad_norm": 1.2458893060684204, "learning_rate": 5.054996418393328e-06, "loss": 0.452, "step": 6215 }, { "epoch": 0.372952540949181, "grad_norm": 1.291985034942627, "learning_rate": 5.0543870374749785e-06, "loss": 0.421, "step": 6216 }, { "epoch": 0.373012539749205, "grad_norm": 1.2255312204360962, "learning_rate": 5.053777597854511e-06, "loss": 0.4045, "step": 6217 }, { "epoch": 0.373072538549229, "grad_norm": 1.3417083024978638, "learning_rate": 5.0531680995549436e-06, "loss": 0.4221, "step": 6218 }, { "epoch": 0.373132537349253, "grad_norm": 1.3332465887069702, "learning_rate": 5.052558542599293e-06, "loss": 0.4479, "step": 6219 }, { "epoch": 0.373192536149277, "grad_norm": 1.3561283349990845, "learning_rate": 5.05194892701058e-06, "loss": 0.4664, "step": 6220 }, { "epoch": 0.373252534949301, "grad_norm": 1.2661278247833252, "learning_rate": 5.051339252811828e-06, "loss": 0.4662, "step": 6221 }, { "epoch": 0.373312533749325, "grad_norm": 1.2390432357788086, "learning_rate": 5.050729520026058e-06, "loss": 0.4168, "step": 6222 }, { "epoch": 0.373372532549349, "grad_norm": 1.2398899793624878, "learning_rate": 5.0501197286763e-06, "loss": 0.4617, "step": 6223 }, { "epoch": 0.373432531349373, "grad_norm": 1.135596752166748, "learning_rate": 5.0495098787855826e-06, "loss": 0.419, "step": 6224 }, { "epoch": 0.37349253014939704, "grad_norm": 1.3394476175308228, "learning_rate": 5.048899970376936e-06, "loss": 0.4176, "step": 6225 }, { "epoch": 0.37355252894942104, "grad_norm": 1.2956249713897705, "learning_rate": 5.048290003473395e-06, "loss": 0.4116, "step": 6226 }, { "epoch": 0.37361252774944503, "grad_norm": 1.368040680885315, "learning_rate": 5.047679978097994e-06, "loss": 0.431, "step": 6227 }, { "epoch": 0.37367252654946903, "grad_norm": 1.2917547225952148, "learning_rate": 5.047069894273772e-06, "loss": 0.4234, "step": 6228 }, { "epoch": 0.373732525349493, "grad_norm": 1.3700724840164185, "learning_rate": 5.0464597520237665e-06, "loss": 0.4006, "step": 6229 }, { "epoch": 0.373792524149517, "grad_norm": 1.2903828620910645, "learning_rate": 5.045849551371024e-06, "loss": 0.4475, "step": 6230 }, { "epoch": 0.373852522949541, "grad_norm": 1.3692468404769897, "learning_rate": 5.045239292338586e-06, "loss": 0.4533, "step": 6231 }, { "epoch": 0.373912521749565, "grad_norm": 1.1994564533233643, "learning_rate": 5.0446289749495004e-06, "loss": 0.4065, "step": 6232 }, { "epoch": 0.373972520549589, "grad_norm": 1.3975037336349487, "learning_rate": 5.044018599226815e-06, "loss": 0.4566, "step": 6233 }, { "epoch": 0.374032519349613, "grad_norm": 1.1819067001342773, "learning_rate": 5.043408165193583e-06, "loss": 0.3421, "step": 6234 }, { "epoch": 0.374092518149637, "grad_norm": 1.3769346475601196, "learning_rate": 5.042797672872856e-06, "loss": 0.4335, "step": 6235 }, { "epoch": 0.374152516949661, "grad_norm": 1.294144868850708, "learning_rate": 5.042187122287688e-06, "loss": 0.4448, "step": 6236 }, { "epoch": 0.374212515749685, "grad_norm": 1.3496296405792236, "learning_rate": 5.041576513461142e-06, "loss": 0.3848, "step": 6237 }, { "epoch": 0.374272514549709, "grad_norm": 1.1869933605194092, "learning_rate": 5.0409658464162715e-06, "loss": 0.4292, "step": 6238 }, { "epoch": 0.374332513349733, "grad_norm": 1.2329367399215698, "learning_rate": 5.040355121176142e-06, "loss": 0.4061, "step": 6239 }, { "epoch": 0.374392512149757, "grad_norm": 1.4907749891281128, "learning_rate": 5.0397443377638175e-06, "loss": 0.4781, "step": 6240 }, { "epoch": 0.374452510949781, "grad_norm": 1.1961194276809692, "learning_rate": 5.039133496202366e-06, "loss": 0.4054, "step": 6241 }, { "epoch": 0.374512509749805, "grad_norm": 1.2602976560592651, "learning_rate": 5.038522596514853e-06, "loss": 0.3746, "step": 6242 }, { "epoch": 0.374572508549829, "grad_norm": 1.175957441329956, "learning_rate": 5.037911638724353e-06, "loss": 0.4394, "step": 6243 }, { "epoch": 0.374632507349853, "grad_norm": 1.3671655654907227, "learning_rate": 5.0373006228539345e-06, "loss": 0.4212, "step": 6244 }, { "epoch": 0.374692506149877, "grad_norm": 1.3200325965881348, "learning_rate": 5.036689548926677e-06, "loss": 0.4402, "step": 6245 }, { "epoch": 0.374752504949901, "grad_norm": 1.2785511016845703, "learning_rate": 5.036078416965655e-06, "loss": 0.4358, "step": 6246 }, { "epoch": 0.374812503749925, "grad_norm": 1.3358217477798462, "learning_rate": 5.035467226993951e-06, "loss": 0.4222, "step": 6247 }, { "epoch": 0.374872502549949, "grad_norm": 1.283445954322815, "learning_rate": 5.034855979034645e-06, "loss": 0.4278, "step": 6248 }, { "epoch": 0.374932501349973, "grad_norm": 1.3721058368682861, "learning_rate": 5.034244673110821e-06, "loss": 0.3914, "step": 6249 }, { "epoch": 0.374992500149997, "grad_norm": 1.4564799070358276, "learning_rate": 5.033633309245567e-06, "loss": 0.4603, "step": 6250 }, { "epoch": 0.375052498950021, "grad_norm": 1.1941126585006714, "learning_rate": 5.0330218874619695e-06, "loss": 0.4782, "step": 6251 }, { "epoch": 0.375112497750045, "grad_norm": 1.1621768474578857, "learning_rate": 5.032410407783118e-06, "loss": 0.4231, "step": 6252 }, { "epoch": 0.375172496550069, "grad_norm": 1.3257673978805542, "learning_rate": 5.031798870232109e-06, "loss": 0.4607, "step": 6253 }, { "epoch": 0.375232495350093, "grad_norm": 1.255117654800415, "learning_rate": 5.031187274832035e-06, "loss": 0.458, "step": 6254 }, { "epoch": 0.375292494150117, "grad_norm": 1.2792818546295166, "learning_rate": 5.030575621605995e-06, "loss": 0.4476, "step": 6255 }, { "epoch": 0.375352492950141, "grad_norm": 1.294785499572754, "learning_rate": 5.029963910577084e-06, "loss": 0.4047, "step": 6256 }, { "epoch": 0.375412491750165, "grad_norm": 1.2112159729003906, "learning_rate": 5.029352141768409e-06, "loss": 0.3863, "step": 6257 }, { "epoch": 0.375472490550189, "grad_norm": 1.1909106969833374, "learning_rate": 5.028740315203071e-06, "loss": 0.4183, "step": 6258 }, { "epoch": 0.375532489350213, "grad_norm": 1.2280676364898682, "learning_rate": 5.028128430904176e-06, "loss": 0.4801, "step": 6259 }, { "epoch": 0.375592488150237, "grad_norm": 1.1075626611709595, "learning_rate": 5.027516488894832e-06, "loss": 0.4295, "step": 6260 }, { "epoch": 0.375652486950261, "grad_norm": 1.347025752067566, "learning_rate": 5.0269044891981496e-06, "loss": 0.4162, "step": 6261 }, { "epoch": 0.375712485750285, "grad_norm": 1.3099132776260376, "learning_rate": 5.026292431837241e-06, "loss": 0.4645, "step": 6262 }, { "epoch": 0.375772484550309, "grad_norm": 1.1646286249160767, "learning_rate": 5.025680316835222e-06, "loss": 0.3911, "step": 6263 }, { "epoch": 0.375832483350333, "grad_norm": 1.3732352256774902, "learning_rate": 5.025068144215207e-06, "loss": 0.4393, "step": 6264 }, { "epoch": 0.375892482150357, "grad_norm": 1.204749345779419, "learning_rate": 5.0244559140003175e-06, "loss": 0.3914, "step": 6265 }, { "epoch": 0.375952480950381, "grad_norm": 1.251565933227539, "learning_rate": 5.023843626213673e-06, "loss": 0.4097, "step": 6266 }, { "epoch": 0.37601247975040497, "grad_norm": 1.160212755203247, "learning_rate": 5.023231280878398e-06, "loss": 0.3916, "step": 6267 }, { "epoch": 0.37607247855042897, "grad_norm": 1.2849397659301758, "learning_rate": 5.0226188780176175e-06, "loss": 0.4138, "step": 6268 }, { "epoch": 0.37613247735045297, "grad_norm": 1.4181997776031494, "learning_rate": 5.0220064176544586e-06, "loss": 0.4014, "step": 6269 }, { "epoch": 0.376192476150477, "grad_norm": 1.2417197227478027, "learning_rate": 5.02139389981205e-06, "loss": 0.4076, "step": 6270 }, { "epoch": 0.376252474950501, "grad_norm": 1.4120410680770874, "learning_rate": 5.020781324513527e-06, "loss": 0.4392, "step": 6271 }, { "epoch": 0.376312473750525, "grad_norm": 1.3429580926895142, "learning_rate": 5.020168691782022e-06, "loss": 0.4518, "step": 6272 }, { "epoch": 0.376372472550549, "grad_norm": 1.2643663883209229, "learning_rate": 5.019556001640671e-06, "loss": 0.4678, "step": 6273 }, { "epoch": 0.376432471350573, "grad_norm": 1.4164409637451172, "learning_rate": 5.018943254112614e-06, "loss": 0.4697, "step": 6274 }, { "epoch": 0.376492470150597, "grad_norm": 1.3161275386810303, "learning_rate": 5.018330449220988e-06, "loss": 0.4517, "step": 6275 }, { "epoch": 0.376552468950621, "grad_norm": 1.1940569877624512, "learning_rate": 5.01771758698894e-06, "loss": 0.4139, "step": 6276 }, { "epoch": 0.376612467750645, "grad_norm": 2.112929344177246, "learning_rate": 5.017104667439614e-06, "loss": 0.4219, "step": 6277 }, { "epoch": 0.376672466550669, "grad_norm": 1.1941354274749756, "learning_rate": 5.0164916905961565e-06, "loss": 0.4441, "step": 6278 }, { "epoch": 0.376732465350693, "grad_norm": 1.179472804069519, "learning_rate": 5.015878656481716e-06, "loss": 0.3667, "step": 6279 }, { "epoch": 0.376792464150717, "grad_norm": 1.3632420301437378, "learning_rate": 5.015265565119446e-06, "loss": 0.4518, "step": 6280 }, { "epoch": 0.376852462950741, "grad_norm": 1.2047113180160522, "learning_rate": 5.014652416532498e-06, "loss": 0.4136, "step": 6281 }, { "epoch": 0.376912461750765, "grad_norm": 1.5249240398406982, "learning_rate": 5.01403921074403e-06, "loss": 0.4398, "step": 6282 }, { "epoch": 0.376972460550789, "grad_norm": 1.3273378610610962, "learning_rate": 5.013425947777199e-06, "loss": 0.4587, "step": 6283 }, { "epoch": 0.377032459350813, "grad_norm": 1.223543405532837, "learning_rate": 5.012812627655164e-06, "loss": 0.4059, "step": 6284 }, { "epoch": 0.377092458150837, "grad_norm": 1.3799796104431152, "learning_rate": 5.012199250401089e-06, "loss": 0.4418, "step": 6285 }, { "epoch": 0.37715245695086097, "grad_norm": 1.2024281024932861, "learning_rate": 5.011585816038138e-06, "loss": 0.3983, "step": 6286 }, { "epoch": 0.37721245575088497, "grad_norm": 1.190520167350769, "learning_rate": 5.010972324589477e-06, "loss": 0.378, "step": 6287 }, { "epoch": 0.37727245455090896, "grad_norm": 1.2426365613937378, "learning_rate": 5.010358776078275e-06, "loss": 0.4072, "step": 6288 }, { "epoch": 0.37733245335093296, "grad_norm": 1.1496647596359253, "learning_rate": 5.009745170527705e-06, "loss": 0.4028, "step": 6289 }, { "epoch": 0.37739245215095696, "grad_norm": 1.1998107433319092, "learning_rate": 5.009131507960937e-06, "loss": 0.436, "step": 6290 }, { "epoch": 0.37745245095098096, "grad_norm": 1.0310535430908203, "learning_rate": 5.008517788401148e-06, "loss": 0.3798, "step": 6291 }, { "epoch": 0.37751244975100495, "grad_norm": 1.283690094947815, "learning_rate": 5.007904011871515e-06, "loss": 0.3688, "step": 6292 }, { "epoch": 0.377572448551029, "grad_norm": 1.2139583826065063, "learning_rate": 5.007290178395216e-06, "loss": 0.3896, "step": 6293 }, { "epoch": 0.377632447351053, "grad_norm": 1.3598847389221191, "learning_rate": 5.0066762879954356e-06, "loss": 0.478, "step": 6294 }, { "epoch": 0.377692446151077, "grad_norm": 1.2667551040649414, "learning_rate": 5.006062340695356e-06, "loss": 0.41, "step": 6295 }, { "epoch": 0.377752444951101, "grad_norm": 1.1903105974197388, "learning_rate": 5.0054483365181635e-06, "loss": 0.4063, "step": 6296 }, { "epoch": 0.377812443751125, "grad_norm": 1.3433250188827515, "learning_rate": 5.0048342754870455e-06, "loss": 0.4079, "step": 6297 }, { "epoch": 0.377872442551149, "grad_norm": 1.289143443107605, "learning_rate": 5.004220157625194e-06, "loss": 0.4042, "step": 6298 }, { "epoch": 0.377932441351173, "grad_norm": 1.3365238904953003, "learning_rate": 5.0036059829557985e-06, "loss": 0.4629, "step": 6299 }, { "epoch": 0.377992440151197, "grad_norm": 1.2338871955871582, "learning_rate": 5.002991751502056e-06, "loss": 0.4559, "step": 6300 }, { "epoch": 0.378052438951221, "grad_norm": 1.1857221126556396, "learning_rate": 5.002377463287164e-06, "loss": 0.3778, "step": 6301 }, { "epoch": 0.378112437751245, "grad_norm": 1.2139060497283936, "learning_rate": 5.00176311833432e-06, "loss": 0.4318, "step": 6302 }, { "epoch": 0.378172436551269, "grad_norm": 1.273065209388733, "learning_rate": 5.001148716666725e-06, "loss": 0.3953, "step": 6303 }, { "epoch": 0.378232435351293, "grad_norm": 1.1221600770950317, "learning_rate": 5.000534258307581e-06, "loss": 0.387, "step": 6304 }, { "epoch": 0.37829243415131697, "grad_norm": 1.3157325983047485, "learning_rate": 4.999919743280094e-06, "loss": 0.4251, "step": 6305 }, { "epoch": 0.37835243295134097, "grad_norm": 1.2492424249649048, "learning_rate": 4.999305171607473e-06, "loss": 0.4241, "step": 6306 }, { "epoch": 0.37841243175136496, "grad_norm": 1.246158242225647, "learning_rate": 4.998690543312927e-06, "loss": 0.4275, "step": 6307 }, { "epoch": 0.37847243055138896, "grad_norm": 1.1890997886657715, "learning_rate": 4.9980758584196675e-06, "loss": 0.3655, "step": 6308 }, { "epoch": 0.37853242935141296, "grad_norm": 1.3983323574066162, "learning_rate": 4.997461116950906e-06, "loss": 0.4924, "step": 6309 }, { "epoch": 0.37859242815143695, "grad_norm": 1.3687114715576172, "learning_rate": 4.996846318929862e-06, "loss": 0.4811, "step": 6310 }, { "epoch": 0.37865242695146095, "grad_norm": 1.0747365951538086, "learning_rate": 4.9962314643797515e-06, "loss": 0.407, "step": 6311 }, { "epoch": 0.37871242575148495, "grad_norm": 1.1305269002914429, "learning_rate": 4.995616553323796e-06, "loss": 0.4428, "step": 6312 }, { "epoch": 0.37877242455150895, "grad_norm": 1.275601863861084, "learning_rate": 4.995001585785216e-06, "loss": 0.3879, "step": 6313 }, { "epoch": 0.37883242335153294, "grad_norm": 1.345676064491272, "learning_rate": 4.99438656178724e-06, "loss": 0.4362, "step": 6314 }, { "epoch": 0.378892422151557, "grad_norm": 1.180437684059143, "learning_rate": 4.99377148135309e-06, "loss": 0.4646, "step": 6315 }, { "epoch": 0.378952420951581, "grad_norm": 1.2331440448760986, "learning_rate": 4.993156344505996e-06, "loss": 0.4151, "step": 6316 }, { "epoch": 0.379012419751605, "grad_norm": 1.1888427734375, "learning_rate": 4.992541151269191e-06, "loss": 0.4058, "step": 6317 }, { "epoch": 0.379072418551629, "grad_norm": 1.11637544631958, "learning_rate": 4.991925901665907e-06, "loss": 0.3824, "step": 6318 }, { "epoch": 0.379132417351653, "grad_norm": 1.1738712787628174, "learning_rate": 4.991310595719378e-06, "loss": 0.4514, "step": 6319 }, { "epoch": 0.379192416151677, "grad_norm": 1.1736253499984741, "learning_rate": 4.990695233452843e-06, "loss": 0.4306, "step": 6320 }, { "epoch": 0.379252414951701, "grad_norm": 1.2748782634735107, "learning_rate": 4.99007981488954e-06, "loss": 0.4068, "step": 6321 }, { "epoch": 0.379312413751725, "grad_norm": 1.2660003900527954, "learning_rate": 4.989464340052711e-06, "loss": 0.4204, "step": 6322 }, { "epoch": 0.37937241255174897, "grad_norm": 1.208499789237976, "learning_rate": 4.9888488089656e-06, "loss": 0.4086, "step": 6323 }, { "epoch": 0.37943241135177297, "grad_norm": 1.5039376020431519, "learning_rate": 4.988233221651453e-06, "loss": 0.4215, "step": 6324 }, { "epoch": 0.37949241015179697, "grad_norm": 1.2622843980789185, "learning_rate": 4.987617578133517e-06, "loss": 0.3838, "step": 6325 }, { "epoch": 0.37955240895182096, "grad_norm": 1.282094120979309, "learning_rate": 4.987001878435044e-06, "loss": 0.4193, "step": 6326 }, { "epoch": 0.37961240775184496, "grad_norm": 1.1645981073379517, "learning_rate": 4.986386122579284e-06, "loss": 0.4353, "step": 6327 }, { "epoch": 0.37967240655186896, "grad_norm": 1.2064088582992554, "learning_rate": 4.985770310589492e-06, "loss": 0.406, "step": 6328 }, { "epoch": 0.37973240535189295, "grad_norm": 1.3184055089950562, "learning_rate": 4.985154442488924e-06, "loss": 0.4734, "step": 6329 }, { "epoch": 0.37979240415191695, "grad_norm": 1.2050873041152954, "learning_rate": 4.9845385183008395e-06, "loss": 0.3822, "step": 6330 }, { "epoch": 0.37985240295194095, "grad_norm": 1.473354697227478, "learning_rate": 4.9839225380484995e-06, "loss": 0.4414, "step": 6331 }, { "epoch": 0.37991240175196495, "grad_norm": 1.279605507850647, "learning_rate": 4.983306501755165e-06, "loss": 0.4442, "step": 6332 }, { "epoch": 0.37997240055198894, "grad_norm": 1.2121598720550537, "learning_rate": 4.982690409444102e-06, "loss": 0.4044, "step": 6333 }, { "epoch": 0.38003239935201294, "grad_norm": 1.3169652223587036, "learning_rate": 4.982074261138577e-06, "loss": 0.4417, "step": 6334 }, { "epoch": 0.38009239815203694, "grad_norm": 1.3403823375701904, "learning_rate": 4.981458056861861e-06, "loss": 0.4388, "step": 6335 }, { "epoch": 0.38015239695206093, "grad_norm": 1.3687349557876587, "learning_rate": 4.980841796637222e-06, "loss": 0.407, "step": 6336 }, { "epoch": 0.38021239575208493, "grad_norm": 1.35160493850708, "learning_rate": 4.980225480487936e-06, "loss": 0.449, "step": 6337 }, { "epoch": 0.380272394552109, "grad_norm": 1.2241718769073486, "learning_rate": 4.9796091084372775e-06, "loss": 0.4237, "step": 6338 }, { "epoch": 0.380332393352133, "grad_norm": 1.3539040088653564, "learning_rate": 4.978992680508524e-06, "loss": 0.4021, "step": 6339 }, { "epoch": 0.380392392152157, "grad_norm": 1.267471194267273, "learning_rate": 4.9783761967249544e-06, "loss": 0.4442, "step": 6340 }, { "epoch": 0.380452390952181, "grad_norm": 1.3506261110305786, "learning_rate": 4.977759657109852e-06, "loss": 0.4534, "step": 6341 }, { "epoch": 0.38051238975220497, "grad_norm": 1.3223834037780762, "learning_rate": 4.9771430616864994e-06, "loss": 0.4242, "step": 6342 }, { "epoch": 0.38057238855222897, "grad_norm": 1.1747835874557495, "learning_rate": 4.976526410478184e-06, "loss": 0.4553, "step": 6343 }, { "epoch": 0.38063238735225297, "grad_norm": 1.2092444896697998, "learning_rate": 4.975909703508193e-06, "loss": 0.4249, "step": 6344 }, { "epoch": 0.38069238615227696, "grad_norm": 1.4177449941635132, "learning_rate": 4.975292940799817e-06, "loss": 0.459, "step": 6345 }, { "epoch": 0.38075238495230096, "grad_norm": 1.4035941362380981, "learning_rate": 4.9746761223763465e-06, "loss": 0.4147, "step": 6346 }, { "epoch": 0.38081238375232496, "grad_norm": 1.2483288049697876, "learning_rate": 4.974059248261079e-06, "loss": 0.3935, "step": 6347 }, { "epoch": 0.38087238255234895, "grad_norm": 1.2511917352676392, "learning_rate": 4.973442318477309e-06, "loss": 0.4475, "step": 6348 }, { "epoch": 0.38093238135237295, "grad_norm": 1.5992584228515625, "learning_rate": 4.972825333048336e-06, "loss": 0.543, "step": 6349 }, { "epoch": 0.38099238015239695, "grad_norm": 1.280414342880249, "learning_rate": 4.972208291997459e-06, "loss": 0.404, "step": 6350 }, { "epoch": 0.38105237895242094, "grad_norm": 1.2225981950759888, "learning_rate": 4.971591195347984e-06, "loss": 0.4739, "step": 6351 }, { "epoch": 0.38111237775244494, "grad_norm": 1.2211074829101562, "learning_rate": 4.970974043123213e-06, "loss": 0.3689, "step": 6352 }, { "epoch": 0.38117237655246894, "grad_norm": 1.255631923675537, "learning_rate": 4.970356835346454e-06, "loss": 0.4587, "step": 6353 }, { "epoch": 0.38123237535249294, "grad_norm": 1.2433089017868042, "learning_rate": 4.969739572041018e-06, "loss": 0.4765, "step": 6354 }, { "epoch": 0.38129237415251693, "grad_norm": 1.1718804836273193, "learning_rate": 4.969122253230212e-06, "loss": 0.4016, "step": 6355 }, { "epoch": 0.38135237295254093, "grad_norm": 1.327962875366211, "learning_rate": 4.968504878937354e-06, "loss": 0.4152, "step": 6356 }, { "epoch": 0.3814123717525649, "grad_norm": 1.2652406692504883, "learning_rate": 4.967887449185755e-06, "loss": 0.4671, "step": 6357 }, { "epoch": 0.3814723705525889, "grad_norm": 1.3151847124099731, "learning_rate": 4.967269963998737e-06, "loss": 0.4756, "step": 6358 }, { "epoch": 0.3815323693526129, "grad_norm": 1.4648391008377075, "learning_rate": 4.966652423399616e-06, "loss": 0.46, "step": 6359 }, { "epoch": 0.381592368152637, "grad_norm": 1.203551173210144, "learning_rate": 4.966034827411715e-06, "loss": 0.429, "step": 6360 }, { "epoch": 0.38165236695266097, "grad_norm": 1.2905932664871216, "learning_rate": 4.965417176058358e-06, "loss": 0.4336, "step": 6361 }, { "epoch": 0.38171236575268497, "grad_norm": 1.364675760269165, "learning_rate": 4.964799469362871e-06, "loss": 0.3895, "step": 6362 }, { "epoch": 0.38177236455270896, "grad_norm": 1.2600183486938477, "learning_rate": 4.964181707348581e-06, "loss": 0.4697, "step": 6363 }, { "epoch": 0.38183236335273296, "grad_norm": 1.1792540550231934, "learning_rate": 4.963563890038819e-06, "loss": 0.4144, "step": 6364 }, { "epoch": 0.38189236215275696, "grad_norm": 1.4309126138687134, "learning_rate": 4.962946017456917e-06, "loss": 0.4596, "step": 6365 }, { "epoch": 0.38195236095278096, "grad_norm": 1.198245644569397, "learning_rate": 4.962328089626209e-06, "loss": 0.3552, "step": 6366 }, { "epoch": 0.38201235975280495, "grad_norm": 1.1649787425994873, "learning_rate": 4.961710106570032e-06, "loss": 0.4193, "step": 6367 }, { "epoch": 0.38207235855282895, "grad_norm": 1.2960742712020874, "learning_rate": 4.961092068311724e-06, "loss": 0.4069, "step": 6368 }, { "epoch": 0.38213235735285295, "grad_norm": 1.2618433237075806, "learning_rate": 4.960473974874623e-06, "loss": 0.4365, "step": 6369 }, { "epoch": 0.38219235615287694, "grad_norm": 1.2000809907913208, "learning_rate": 4.959855826282076e-06, "loss": 0.4358, "step": 6370 }, { "epoch": 0.38225235495290094, "grad_norm": 1.228556513786316, "learning_rate": 4.959237622557425e-06, "loss": 0.4195, "step": 6371 }, { "epoch": 0.38231235375292494, "grad_norm": 1.299157738685608, "learning_rate": 4.958619363724018e-06, "loss": 0.4154, "step": 6372 }, { "epoch": 0.38237235255294894, "grad_norm": 1.2825539112091064, "learning_rate": 4.958001049805202e-06, "loss": 0.4066, "step": 6373 }, { "epoch": 0.38243235135297293, "grad_norm": 1.1456923484802246, "learning_rate": 4.957382680824329e-06, "loss": 0.4034, "step": 6374 }, { "epoch": 0.38249235015299693, "grad_norm": 1.2171835899353027, "learning_rate": 4.956764256804752e-06, "loss": 0.4365, "step": 6375 }, { "epoch": 0.3825523489530209, "grad_norm": 1.357114315032959, "learning_rate": 4.956145777769826e-06, "loss": 0.4402, "step": 6376 }, { "epoch": 0.3826123477530449, "grad_norm": 1.3469960689544678, "learning_rate": 4.95552724374291e-06, "loss": 0.4379, "step": 6377 }, { "epoch": 0.3826723465530689, "grad_norm": 1.2430340051651, "learning_rate": 4.954908654747358e-06, "loss": 0.4296, "step": 6378 }, { "epoch": 0.3827323453530929, "grad_norm": 1.400604248046875, "learning_rate": 4.9542900108065364e-06, "loss": 0.4491, "step": 6379 }, { "epoch": 0.3827923441531169, "grad_norm": 1.2689412832260132, "learning_rate": 4.953671311943808e-06, "loss": 0.4292, "step": 6380 }, { "epoch": 0.3828523429531409, "grad_norm": 1.2531859874725342, "learning_rate": 4.953052558182536e-06, "loss": 0.3895, "step": 6381 }, { "epoch": 0.38291234175316496, "grad_norm": 1.3583828210830688, "learning_rate": 4.952433749546089e-06, "loss": 0.4159, "step": 6382 }, { "epoch": 0.38297234055318896, "grad_norm": 1.2259539365768433, "learning_rate": 4.951814886057836e-06, "loss": 0.3891, "step": 6383 }, { "epoch": 0.38303233935321296, "grad_norm": 1.1501411199569702, "learning_rate": 4.95119596774115e-06, "loss": 0.3592, "step": 6384 }, { "epoch": 0.38309233815323696, "grad_norm": 1.3301879167556763, "learning_rate": 4.950576994619404e-06, "loss": 0.4392, "step": 6385 }, { "epoch": 0.38315233695326095, "grad_norm": 1.2972803115844727, "learning_rate": 4.949957966715973e-06, "loss": 0.4572, "step": 6386 }, { "epoch": 0.38321233575328495, "grad_norm": 1.3000155687332153, "learning_rate": 4.949338884054236e-06, "loss": 0.4323, "step": 6387 }, { "epoch": 0.38327233455330895, "grad_norm": 1.2419484853744507, "learning_rate": 4.948719746657571e-06, "loss": 0.433, "step": 6388 }, { "epoch": 0.38333233335333294, "grad_norm": 1.285813331604004, "learning_rate": 4.948100554549364e-06, "loss": 0.4032, "step": 6389 }, { "epoch": 0.38339233215335694, "grad_norm": 1.2742623090744019, "learning_rate": 4.947481307752995e-06, "loss": 0.441, "step": 6390 }, { "epoch": 0.38345233095338094, "grad_norm": 1.4357905387878418, "learning_rate": 4.946862006291853e-06, "loss": 0.4727, "step": 6391 }, { "epoch": 0.38351232975340493, "grad_norm": 1.3690606355667114, "learning_rate": 4.946242650189323e-06, "loss": 0.4258, "step": 6392 }, { "epoch": 0.38357232855342893, "grad_norm": 1.2358028888702393, "learning_rate": 4.945623239468799e-06, "loss": 0.4252, "step": 6393 }, { "epoch": 0.38363232735345293, "grad_norm": 1.2438186407089233, "learning_rate": 4.945003774153671e-06, "loss": 0.4464, "step": 6394 }, { "epoch": 0.3836923261534769, "grad_norm": 1.194188117980957, "learning_rate": 4.944384254267334e-06, "loss": 0.4303, "step": 6395 }, { "epoch": 0.3837523249535009, "grad_norm": 1.2133584022521973, "learning_rate": 4.943764679833183e-06, "loss": 0.4379, "step": 6396 }, { "epoch": 0.3838123237535249, "grad_norm": 1.327926516532898, "learning_rate": 4.94314505087462e-06, "loss": 0.407, "step": 6397 }, { "epoch": 0.3838723225535489, "grad_norm": 1.140121340751648, "learning_rate": 4.942525367415041e-06, "loss": 0.4052, "step": 6398 }, { "epoch": 0.3839323213535729, "grad_norm": 1.3034814596176147, "learning_rate": 4.941905629477853e-06, "loss": 0.4081, "step": 6399 }, { "epoch": 0.3839923201535969, "grad_norm": 1.3090603351593018, "learning_rate": 4.941285837086458e-06, "loss": 0.4527, "step": 6400 }, { "epoch": 0.3840523189536209, "grad_norm": 1.3290287256240845, "learning_rate": 4.940665990264263e-06, "loss": 0.4134, "step": 6401 }, { "epoch": 0.3841123177536449, "grad_norm": 1.1443907022476196, "learning_rate": 4.940046089034677e-06, "loss": 0.3925, "step": 6402 }, { "epoch": 0.3841723165536689, "grad_norm": 1.3121517896652222, "learning_rate": 4.939426133421112e-06, "loss": 0.457, "step": 6403 }, { "epoch": 0.3842323153536929, "grad_norm": 1.338555097579956, "learning_rate": 4.93880612344698e-06, "loss": 0.4244, "step": 6404 }, { "epoch": 0.38429231415371695, "grad_norm": 1.3157638311386108, "learning_rate": 4.938186059135695e-06, "loss": 0.5006, "step": 6405 }, { "epoch": 0.38435231295374095, "grad_norm": 1.1393333673477173, "learning_rate": 4.937565940510675e-06, "loss": 0.3943, "step": 6406 }, { "epoch": 0.38441231175376495, "grad_norm": 1.083715796470642, "learning_rate": 4.9369457675953394e-06, "loss": 0.3322, "step": 6407 }, { "epoch": 0.38447231055378894, "grad_norm": 1.2457239627838135, "learning_rate": 4.936325540413109e-06, "loss": 0.4297, "step": 6408 }, { "epoch": 0.38453230935381294, "grad_norm": 1.2909619808197021, "learning_rate": 4.935705258987407e-06, "loss": 0.4519, "step": 6409 }, { "epoch": 0.38459230815383694, "grad_norm": 1.1115461587905884, "learning_rate": 4.935084923341657e-06, "loss": 0.3853, "step": 6410 }, { "epoch": 0.38465230695386093, "grad_norm": 1.1365911960601807, "learning_rate": 4.9344645334992886e-06, "loss": 0.3759, "step": 6411 }, { "epoch": 0.38471230575388493, "grad_norm": 1.141642689704895, "learning_rate": 4.933844089483731e-06, "loss": 0.4191, "step": 6412 }, { "epoch": 0.38477230455390893, "grad_norm": 1.2451515197753906, "learning_rate": 4.933223591318413e-06, "loss": 0.4533, "step": 6413 }, { "epoch": 0.3848323033539329, "grad_norm": 1.1678143739700317, "learning_rate": 4.932603039026772e-06, "loss": 0.4411, "step": 6414 }, { "epoch": 0.3848923021539569, "grad_norm": 1.2762449979782104, "learning_rate": 4.931982432632239e-06, "loss": 0.3894, "step": 6415 }, { "epoch": 0.3849523009539809, "grad_norm": 1.2573959827423096, "learning_rate": 4.931361772158254e-06, "loss": 0.4466, "step": 6416 }, { "epoch": 0.3850122997540049, "grad_norm": 1.357349157333374, "learning_rate": 4.9307410576282564e-06, "loss": 0.4545, "step": 6417 }, { "epoch": 0.3850722985540289, "grad_norm": 1.2309356927871704, "learning_rate": 4.930120289065689e-06, "loss": 0.4419, "step": 6418 }, { "epoch": 0.3851322973540529, "grad_norm": 1.2620347738265991, "learning_rate": 4.929499466493992e-06, "loss": 0.4333, "step": 6419 }, { "epoch": 0.3851922961540769, "grad_norm": 1.367147445678711, "learning_rate": 4.928878589936614e-06, "loss": 0.4441, "step": 6420 }, { "epoch": 0.3852522949541009, "grad_norm": 1.2608122825622559, "learning_rate": 4.928257659417001e-06, "loss": 0.4236, "step": 6421 }, { "epoch": 0.3853122937541249, "grad_norm": 1.2501031160354614, "learning_rate": 4.927636674958602e-06, "loss": 0.4019, "step": 6422 }, { "epoch": 0.3853722925541489, "grad_norm": 1.1709483861923218, "learning_rate": 4.927015636584871e-06, "loss": 0.4372, "step": 6423 }, { "epoch": 0.3854322913541729, "grad_norm": 1.4295084476470947, "learning_rate": 4.9263945443192625e-06, "loss": 0.488, "step": 6424 }, { "epoch": 0.3854922901541969, "grad_norm": 1.2423447370529175, "learning_rate": 4.92577339818523e-06, "loss": 0.389, "step": 6425 }, { "epoch": 0.3855522889542209, "grad_norm": 1.2641454935073853, "learning_rate": 4.9251521982062325e-06, "loss": 0.403, "step": 6426 }, { "epoch": 0.38561228775424494, "grad_norm": 1.2571672201156616, "learning_rate": 4.9245309444057276e-06, "loss": 0.5289, "step": 6427 }, { "epoch": 0.38567228655426894, "grad_norm": 1.2879447937011719, "learning_rate": 4.92390963680718e-06, "loss": 0.44, "step": 6428 }, { "epoch": 0.38573228535429294, "grad_norm": 1.4644253253936768, "learning_rate": 4.923288275434053e-06, "loss": 0.4527, "step": 6429 }, { "epoch": 0.38579228415431693, "grad_norm": 1.226897954940796, "learning_rate": 4.9226668603098124e-06, "loss": 0.4282, "step": 6430 }, { "epoch": 0.38585228295434093, "grad_norm": 1.2837181091308594, "learning_rate": 4.922045391457926e-06, "loss": 0.4382, "step": 6431 }, { "epoch": 0.3859122817543649, "grad_norm": 1.1886218786239624, "learning_rate": 4.921423868901864e-06, "loss": 0.4061, "step": 6432 }, { "epoch": 0.3859722805543889, "grad_norm": 1.3715143203735352, "learning_rate": 4.9208022926650985e-06, "loss": 0.4612, "step": 6433 }, { "epoch": 0.3860322793544129, "grad_norm": 1.2903045415878296, "learning_rate": 4.920180662771102e-06, "loss": 0.4173, "step": 6434 }, { "epoch": 0.3860922781544369, "grad_norm": 1.2682515382766724, "learning_rate": 4.919558979243354e-06, "loss": 0.4309, "step": 6435 }, { "epoch": 0.3861522769544609, "grad_norm": 1.2853014469146729, "learning_rate": 4.91893724210533e-06, "loss": 0.392, "step": 6436 }, { "epoch": 0.3862122757544849, "grad_norm": 1.40668523311615, "learning_rate": 4.918315451380512e-06, "loss": 0.4636, "step": 6437 }, { "epoch": 0.3862722745545089, "grad_norm": 1.1739038228988647, "learning_rate": 4.917693607092381e-06, "loss": 0.4155, "step": 6438 }, { "epoch": 0.3863322733545329, "grad_norm": 1.2053885459899902, "learning_rate": 4.9170717092644205e-06, "loss": 0.3558, "step": 6439 }, { "epoch": 0.3863922721545569, "grad_norm": 1.32881760597229, "learning_rate": 4.916449757920118e-06, "loss": 0.4378, "step": 6440 }, { "epoch": 0.3864522709545809, "grad_norm": 1.2141157388687134, "learning_rate": 4.9158277530829624e-06, "loss": 0.4438, "step": 6441 }, { "epoch": 0.3865122697546049, "grad_norm": 1.20330011844635, "learning_rate": 4.9152056947764426e-06, "loss": 0.3622, "step": 6442 }, { "epoch": 0.3865722685546289, "grad_norm": 1.4177559614181519, "learning_rate": 4.914583583024051e-06, "loss": 0.4403, "step": 6443 }, { "epoch": 0.3866322673546529, "grad_norm": 1.2506521940231323, "learning_rate": 4.9139614178492835e-06, "loss": 0.439, "step": 6444 }, { "epoch": 0.3866922661546769, "grad_norm": 1.0980640649795532, "learning_rate": 4.9133391992756334e-06, "loss": 0.3841, "step": 6445 }, { "epoch": 0.3867522649547009, "grad_norm": 1.2359490394592285, "learning_rate": 4.912716927326601e-06, "loss": 0.4443, "step": 6446 }, { "epoch": 0.3868122637547249, "grad_norm": 1.3981492519378662, "learning_rate": 4.9120946020256885e-06, "loss": 0.4596, "step": 6447 }, { "epoch": 0.3868722625547489, "grad_norm": 1.1985536813735962, "learning_rate": 4.9114722233963945e-06, "loss": 0.4352, "step": 6448 }, { "epoch": 0.3869322613547729, "grad_norm": 1.4132452011108398, "learning_rate": 4.910849791462227e-06, "loss": 0.4546, "step": 6449 }, { "epoch": 0.38699226015479693, "grad_norm": 1.4115219116210938, "learning_rate": 4.910227306246689e-06, "loss": 0.456, "step": 6450 }, { "epoch": 0.3870522589548209, "grad_norm": 1.2597616910934448, "learning_rate": 4.9096047677732915e-06, "loss": 0.3958, "step": 6451 }, { "epoch": 0.3871122577548449, "grad_norm": 1.433668851852417, "learning_rate": 4.908982176065544e-06, "loss": 0.4241, "step": 6452 }, { "epoch": 0.3871722565548689, "grad_norm": 1.1281810998916626, "learning_rate": 4.908359531146959e-06, "loss": 0.3684, "step": 6453 }, { "epoch": 0.3872322553548929, "grad_norm": 1.3627287149429321, "learning_rate": 4.90773683304105e-06, "loss": 0.4683, "step": 6454 }, { "epoch": 0.3872922541549169, "grad_norm": 1.353044867515564, "learning_rate": 4.907114081771336e-06, "loss": 0.4179, "step": 6455 }, { "epoch": 0.3873522529549409, "grad_norm": 1.161341667175293, "learning_rate": 4.906491277361333e-06, "loss": 0.3779, "step": 6456 }, { "epoch": 0.3874122517549649, "grad_norm": 1.1677051782608032, "learning_rate": 4.905868419834562e-06, "loss": 0.3759, "step": 6457 }, { "epoch": 0.3874722505549889, "grad_norm": 1.2585971355438232, "learning_rate": 4.905245509214547e-06, "loss": 0.4594, "step": 6458 }, { "epoch": 0.3875322493550129, "grad_norm": 1.451205849647522, "learning_rate": 4.904622545524811e-06, "loss": 0.4204, "step": 6459 }, { "epoch": 0.3875922481550369, "grad_norm": 1.3634626865386963, "learning_rate": 4.9039995287888815e-06, "loss": 0.4761, "step": 6460 }, { "epoch": 0.3876522469550609, "grad_norm": 1.3060619831085205, "learning_rate": 4.9033764590302854e-06, "loss": 0.4264, "step": 6461 }, { "epoch": 0.3877122457550849, "grad_norm": 1.3031210899353027, "learning_rate": 4.902753336272556e-06, "loss": 0.4139, "step": 6462 }, { "epoch": 0.3877722445551089, "grad_norm": 1.1981432437896729, "learning_rate": 4.902130160539221e-06, "loss": 0.412, "step": 6463 }, { "epoch": 0.3878322433551329, "grad_norm": 1.2905806303024292, "learning_rate": 4.90150693185382e-06, "loss": 0.4451, "step": 6464 }, { "epoch": 0.3878922421551569, "grad_norm": 1.1784658432006836, "learning_rate": 4.900883650239887e-06, "loss": 0.4423, "step": 6465 }, { "epoch": 0.3879522409551809, "grad_norm": 1.2173937559127808, "learning_rate": 4.900260315720961e-06, "loss": 0.395, "step": 6466 }, { "epoch": 0.3880122397552049, "grad_norm": 1.1361252069473267, "learning_rate": 4.899636928320582e-06, "loss": 0.4502, "step": 6467 }, { "epoch": 0.3880722385552289, "grad_norm": 1.2856462001800537, "learning_rate": 4.899013488062293e-06, "loss": 0.4832, "step": 6468 }, { "epoch": 0.3881322373552529, "grad_norm": 1.1730531454086304, "learning_rate": 4.898389994969639e-06, "loss": 0.4072, "step": 6469 }, { "epoch": 0.38819223615527687, "grad_norm": 1.1430739164352417, "learning_rate": 4.897766449066165e-06, "loss": 0.3734, "step": 6470 }, { "epoch": 0.38825223495530087, "grad_norm": 1.3040966987609863, "learning_rate": 4.897142850375421e-06, "loss": 0.4201, "step": 6471 }, { "epoch": 0.3883122337553249, "grad_norm": 1.4218940734863281, "learning_rate": 4.896519198920959e-06, "loss": 0.4404, "step": 6472 }, { "epoch": 0.3883722325553489, "grad_norm": 1.2549128532409668, "learning_rate": 4.8958954947263265e-06, "loss": 0.4352, "step": 6473 }, { "epoch": 0.3884322313553729, "grad_norm": 1.3701207637786865, "learning_rate": 4.895271737815081e-06, "loss": 0.4016, "step": 6474 }, { "epoch": 0.3884922301553969, "grad_norm": 1.1600581407546997, "learning_rate": 4.89464792821078e-06, "loss": 0.4031, "step": 6475 }, { "epoch": 0.3885522289554209, "grad_norm": 1.2396645545959473, "learning_rate": 4.89402406593698e-06, "loss": 0.4145, "step": 6476 }, { "epoch": 0.3886122277554449, "grad_norm": 1.260084629058838, "learning_rate": 4.893400151017243e-06, "loss": 0.4509, "step": 6477 }, { "epoch": 0.3886722265554689, "grad_norm": 1.3248149156570435, "learning_rate": 4.89277618347513e-06, "loss": 0.4601, "step": 6478 }, { "epoch": 0.3887322253554929, "grad_norm": 1.0956716537475586, "learning_rate": 4.892152163334206e-06, "loss": 0.4005, "step": 6479 }, { "epoch": 0.3887922241555169, "grad_norm": 1.1718195676803589, "learning_rate": 4.891528090618037e-06, "loss": 0.3998, "step": 6480 }, { "epoch": 0.3888522229555409, "grad_norm": 1.2561631202697754, "learning_rate": 4.890903965350193e-06, "loss": 0.443, "step": 6481 }, { "epoch": 0.3889122217555649, "grad_norm": 1.3021130561828613, "learning_rate": 4.890279787554242e-06, "loss": 0.4349, "step": 6482 }, { "epoch": 0.3889722205555889, "grad_norm": 1.2696757316589355, "learning_rate": 4.889655557253759e-06, "loss": 0.3816, "step": 6483 }, { "epoch": 0.3890322193556129, "grad_norm": 1.112883448600769, "learning_rate": 4.889031274472315e-06, "loss": 0.3694, "step": 6484 }, { "epoch": 0.3890922181556369, "grad_norm": 1.1615492105484009, "learning_rate": 4.888406939233489e-06, "loss": 0.4058, "step": 6485 }, { "epoch": 0.3891522169556609, "grad_norm": 1.180432677268982, "learning_rate": 4.8877825515608586e-06, "loss": 0.4085, "step": 6486 }, { "epoch": 0.3892122157556849, "grad_norm": 1.3049975633621216, "learning_rate": 4.887158111478004e-06, "loss": 0.4456, "step": 6487 }, { "epoch": 0.3892722145557089, "grad_norm": 1.1983500719070435, "learning_rate": 4.886533619008507e-06, "loss": 0.4531, "step": 6488 }, { "epoch": 0.38933221335573287, "grad_norm": 1.273638367652893, "learning_rate": 4.885909074175953e-06, "loss": 0.4329, "step": 6489 }, { "epoch": 0.38939221215575687, "grad_norm": 1.2083325386047363, "learning_rate": 4.885284477003928e-06, "loss": 0.3781, "step": 6490 }, { "epoch": 0.38945221095578086, "grad_norm": 1.200708031654358, "learning_rate": 4.8846598275160185e-06, "loss": 0.4308, "step": 6491 }, { "epoch": 0.38951220975580486, "grad_norm": 1.269410490989685, "learning_rate": 4.8840351257358146e-06, "loss": 0.4266, "step": 6492 }, { "epoch": 0.38957220855582886, "grad_norm": 1.4142155647277832, "learning_rate": 4.883410371686912e-06, "loss": 0.4796, "step": 6493 }, { "epoch": 0.38963220735585286, "grad_norm": 1.197796106338501, "learning_rate": 4.882785565392901e-06, "loss": 0.3862, "step": 6494 }, { "epoch": 0.3896922061558769, "grad_norm": 1.2240276336669922, "learning_rate": 4.88216070687738e-06, "loss": 0.3857, "step": 6495 }, { "epoch": 0.3897522049559009, "grad_norm": 1.3449949026107788, "learning_rate": 4.881535796163945e-06, "loss": 0.4565, "step": 6496 }, { "epoch": 0.3898122037559249, "grad_norm": 1.2974889278411865, "learning_rate": 4.880910833276199e-06, "loss": 0.4034, "step": 6497 }, { "epoch": 0.3898722025559489, "grad_norm": 1.1976803541183472, "learning_rate": 4.880285818237741e-06, "loss": 0.3525, "step": 6498 }, { "epoch": 0.3899322013559729, "grad_norm": 1.2347227334976196, "learning_rate": 4.879660751072176e-06, "loss": 0.4286, "step": 6499 }, { "epoch": 0.3899922001559969, "grad_norm": 1.2548249959945679, "learning_rate": 4.879035631803111e-06, "loss": 0.4372, "step": 6500 }, { "epoch": 0.3900521989560209, "grad_norm": 1.4452719688415527, "learning_rate": 4.878410460454153e-06, "loss": 0.4696, "step": 6501 }, { "epoch": 0.3901121977560449, "grad_norm": 1.3732476234436035, "learning_rate": 4.877785237048913e-06, "loss": 0.4075, "step": 6502 }, { "epoch": 0.3901721965560689, "grad_norm": 1.276459813117981, "learning_rate": 4.877159961610999e-06, "loss": 0.4179, "step": 6503 }, { "epoch": 0.3902321953560929, "grad_norm": 1.3090829849243164, "learning_rate": 4.876534634164029e-06, "loss": 0.4512, "step": 6504 }, { "epoch": 0.3902921941561169, "grad_norm": 1.3089519739151, "learning_rate": 4.875909254731617e-06, "loss": 0.4557, "step": 6505 }, { "epoch": 0.3903521929561409, "grad_norm": 1.302361249923706, "learning_rate": 4.8752838233373826e-06, "loss": 0.3937, "step": 6506 }, { "epoch": 0.3904121917561649, "grad_norm": 1.3448032140731812, "learning_rate": 4.874658340004942e-06, "loss": 0.4116, "step": 6507 }, { "epoch": 0.39047219055618887, "grad_norm": 1.2860609292984009, "learning_rate": 4.874032804757919e-06, "loss": 0.4285, "step": 6508 }, { "epoch": 0.39053218935621287, "grad_norm": 1.1538946628570557, "learning_rate": 4.873407217619938e-06, "loss": 0.3992, "step": 6509 }, { "epoch": 0.39059218815623686, "grad_norm": 1.328249454498291, "learning_rate": 4.872781578614624e-06, "loss": 0.4086, "step": 6510 }, { "epoch": 0.39065218695626086, "grad_norm": 1.1756553649902344, "learning_rate": 4.872155887765603e-06, "loss": 0.4429, "step": 6511 }, { "epoch": 0.39071218575628486, "grad_norm": 1.3063925504684448, "learning_rate": 4.871530145096506e-06, "loss": 0.4, "step": 6512 }, { "epoch": 0.39077218455630885, "grad_norm": 1.1798920631408691, "learning_rate": 4.8709043506309634e-06, "loss": 0.3863, "step": 6513 }, { "epoch": 0.39083218335633285, "grad_norm": 1.2160758972167969, "learning_rate": 4.870278504392611e-06, "loss": 0.4128, "step": 6514 }, { "epoch": 0.39089218215635685, "grad_norm": 1.2720537185668945, "learning_rate": 4.869652606405081e-06, "loss": 0.403, "step": 6515 }, { "epoch": 0.39095218095638085, "grad_norm": 1.3210269212722778, "learning_rate": 4.869026656692013e-06, "loss": 0.4248, "step": 6516 }, { "epoch": 0.3910121797564049, "grad_norm": 1.295495629310608, "learning_rate": 4.868400655277044e-06, "loss": 0.4367, "step": 6517 }, { "epoch": 0.3910721785564289, "grad_norm": 1.1583895683288574, "learning_rate": 4.867774602183819e-06, "loss": 0.4257, "step": 6518 }, { "epoch": 0.3911321773564529, "grad_norm": 1.2417268753051758, "learning_rate": 4.867148497435978e-06, "loss": 0.4046, "step": 6519 }, { "epoch": 0.3911921761564769, "grad_norm": 1.2615015506744385, "learning_rate": 4.866522341057167e-06, "loss": 0.3954, "step": 6520 }, { "epoch": 0.3912521749565009, "grad_norm": 1.185678243637085, "learning_rate": 4.865896133071032e-06, "loss": 0.4131, "step": 6521 }, { "epoch": 0.3913121737565249, "grad_norm": 1.1654400825500488, "learning_rate": 4.865269873501223e-06, "loss": 0.401, "step": 6522 }, { "epoch": 0.3913721725565489, "grad_norm": 1.1935807466506958, "learning_rate": 4.864643562371392e-06, "loss": 0.4281, "step": 6523 }, { "epoch": 0.3914321713565729, "grad_norm": 1.2037826776504517, "learning_rate": 4.864017199705191e-06, "loss": 0.4331, "step": 6524 }, { "epoch": 0.3914921701565969, "grad_norm": 1.328488826751709, "learning_rate": 4.863390785526275e-06, "loss": 0.4981, "step": 6525 }, { "epoch": 0.39155216895662087, "grad_norm": 1.2537298202514648, "learning_rate": 4.8627643198583e-06, "loss": 0.4379, "step": 6526 }, { "epoch": 0.39161216775664487, "grad_norm": 1.3748775720596313, "learning_rate": 4.862137802724926e-06, "loss": 0.4432, "step": 6527 }, { "epoch": 0.39167216655666887, "grad_norm": 1.4465429782867432, "learning_rate": 4.861511234149811e-06, "loss": 0.4616, "step": 6528 }, { "epoch": 0.39173216535669286, "grad_norm": 1.341040849685669, "learning_rate": 4.860884614156623e-06, "loss": 0.4196, "step": 6529 }, { "epoch": 0.39179216415671686, "grad_norm": 1.2599772214889526, "learning_rate": 4.86025794276902e-06, "loss": 0.4476, "step": 6530 }, { "epoch": 0.39185216295674086, "grad_norm": 1.3817352056503296, "learning_rate": 4.8596312200106745e-06, "loss": 0.4281, "step": 6531 }, { "epoch": 0.39191216175676485, "grad_norm": 1.2736074924468994, "learning_rate": 4.8590044459052515e-06, "loss": 0.4362, "step": 6532 }, { "epoch": 0.39197216055678885, "grad_norm": 1.2029986381530762, "learning_rate": 4.858377620476421e-06, "loss": 0.4577, "step": 6533 }, { "epoch": 0.39203215935681285, "grad_norm": 1.365142822265625, "learning_rate": 4.857750743747858e-06, "loss": 0.3627, "step": 6534 }, { "epoch": 0.39209215815683685, "grad_norm": 1.265304684638977, "learning_rate": 4.857123815743234e-06, "loss": 0.4033, "step": 6535 }, { "epoch": 0.39215215695686084, "grad_norm": 1.2594865560531616, "learning_rate": 4.856496836486228e-06, "loss": 0.4356, "step": 6536 }, { "epoch": 0.39221215575688484, "grad_norm": 1.2120819091796875, "learning_rate": 4.855869806000516e-06, "loss": 0.3924, "step": 6537 }, { "epoch": 0.39227215455690884, "grad_norm": 1.2851831912994385, "learning_rate": 4.855242724309779e-06, "loss": 0.4643, "step": 6538 }, { "epoch": 0.3923321533569329, "grad_norm": 1.0897401571273804, "learning_rate": 4.854615591437698e-06, "loss": 0.3975, "step": 6539 }, { "epoch": 0.3923921521569569, "grad_norm": 1.206956386566162, "learning_rate": 4.853988407407958e-06, "loss": 0.4512, "step": 6540 }, { "epoch": 0.3924521509569809, "grad_norm": 1.1736894845962524, "learning_rate": 4.853361172244245e-06, "loss": 0.387, "step": 6541 }, { "epoch": 0.3925121497570049, "grad_norm": 1.1750304698944092, "learning_rate": 4.852733885970246e-06, "loss": 0.4235, "step": 6542 }, { "epoch": 0.3925721485570289, "grad_norm": 1.1447125673294067, "learning_rate": 4.852106548609652e-06, "loss": 0.4191, "step": 6543 }, { "epoch": 0.3926321473570529, "grad_norm": 1.3145228624343872, "learning_rate": 4.851479160186153e-06, "loss": 0.4303, "step": 6544 }, { "epoch": 0.39269214615707687, "grad_norm": 1.2970974445343018, "learning_rate": 4.850851720723444e-06, "loss": 0.4284, "step": 6545 }, { "epoch": 0.39275214495710087, "grad_norm": 1.3581708669662476, "learning_rate": 4.850224230245219e-06, "loss": 0.4341, "step": 6546 }, { "epoch": 0.39281214375712487, "grad_norm": 1.2630397081375122, "learning_rate": 4.849596688775177e-06, "loss": 0.404, "step": 6547 }, { "epoch": 0.39287214255714886, "grad_norm": 1.244785189628601, "learning_rate": 4.848969096337018e-06, "loss": 0.4552, "step": 6548 }, { "epoch": 0.39293214135717286, "grad_norm": 1.1202865839004517, "learning_rate": 4.848341452954441e-06, "loss": 0.3727, "step": 6549 }, { "epoch": 0.39299214015719686, "grad_norm": 1.3637090921401978, "learning_rate": 4.847713758651149e-06, "loss": 0.4562, "step": 6550 }, { "epoch": 0.39305213895722085, "grad_norm": 1.2813371419906616, "learning_rate": 4.847086013450851e-06, "loss": 0.4495, "step": 6551 }, { "epoch": 0.39311213775724485, "grad_norm": 1.1969181299209595, "learning_rate": 4.84645821737725e-06, "loss": 0.4066, "step": 6552 }, { "epoch": 0.39317213655726885, "grad_norm": 1.2143194675445557, "learning_rate": 4.845830370454057e-06, "loss": 0.3915, "step": 6553 }, { "epoch": 0.39323213535729284, "grad_norm": 1.1437827348709106, "learning_rate": 4.845202472704983e-06, "loss": 0.3729, "step": 6554 }, { "epoch": 0.39329213415731684, "grad_norm": 1.3941501379013062, "learning_rate": 4.8445745241537395e-06, "loss": 0.4558, "step": 6555 }, { "epoch": 0.39335213295734084, "grad_norm": 1.3193105459213257, "learning_rate": 4.843946524824042e-06, "loss": 0.4017, "step": 6556 }, { "epoch": 0.39341213175736484, "grad_norm": 1.3166712522506714, "learning_rate": 4.843318474739608e-06, "loss": 0.4196, "step": 6557 }, { "epoch": 0.39347213055738883, "grad_norm": 1.2751288414001465, "learning_rate": 4.842690373924156e-06, "loss": 0.4209, "step": 6558 }, { "epoch": 0.39353212935741283, "grad_norm": 1.3011283874511719, "learning_rate": 4.842062222401404e-06, "loss": 0.4472, "step": 6559 }, { "epoch": 0.3935921281574368, "grad_norm": 1.0695456266403198, "learning_rate": 4.841434020195078e-06, "loss": 0.3555, "step": 6560 }, { "epoch": 0.3936521269574608, "grad_norm": 1.213250994682312, "learning_rate": 4.840805767328899e-06, "loss": 0.393, "step": 6561 }, { "epoch": 0.3937121257574849, "grad_norm": 1.1648672819137573, "learning_rate": 4.840177463826596e-06, "loss": 0.3586, "step": 6562 }, { "epoch": 0.3937721245575089, "grad_norm": 1.170338749885559, "learning_rate": 4.839549109711896e-06, "loss": 0.3966, "step": 6563 }, { "epoch": 0.39383212335753287, "grad_norm": 1.3466135263442993, "learning_rate": 4.838920705008528e-06, "loss": 0.3885, "step": 6564 }, { "epoch": 0.39389212215755687, "grad_norm": 1.2023922204971313, "learning_rate": 4.838292249740226e-06, "loss": 0.4008, "step": 6565 }, { "epoch": 0.39395212095758086, "grad_norm": 1.2098846435546875, "learning_rate": 4.837663743930724e-06, "loss": 0.3899, "step": 6566 }, { "epoch": 0.39401211975760486, "grad_norm": 1.258349061012268, "learning_rate": 4.837035187603754e-06, "loss": 0.4064, "step": 6567 }, { "epoch": 0.39407211855762886, "grad_norm": 1.1711758375167847, "learning_rate": 4.836406580783059e-06, "loss": 0.4271, "step": 6568 }, { "epoch": 0.39413211735765286, "grad_norm": 1.3359332084655762, "learning_rate": 4.835777923492375e-06, "loss": 0.4545, "step": 6569 }, { "epoch": 0.39419211615767685, "grad_norm": 1.2456773519515991, "learning_rate": 4.835149215755444e-06, "loss": 0.4297, "step": 6570 }, { "epoch": 0.39425211495770085, "grad_norm": 1.178910255432129, "learning_rate": 4.834520457596012e-06, "loss": 0.3925, "step": 6571 }, { "epoch": 0.39431211375772485, "grad_norm": 1.2687915563583374, "learning_rate": 4.833891649037821e-06, "loss": 0.3929, "step": 6572 }, { "epoch": 0.39437211255774884, "grad_norm": 1.2001804113388062, "learning_rate": 4.83326279010462e-06, "loss": 0.4165, "step": 6573 }, { "epoch": 0.39443211135777284, "grad_norm": 1.3622010946273804, "learning_rate": 4.832633880820158e-06, "loss": 0.414, "step": 6574 }, { "epoch": 0.39449211015779684, "grad_norm": 1.386317253112793, "learning_rate": 4.8320049212081855e-06, "loss": 0.4507, "step": 6575 }, { "epoch": 0.39455210895782084, "grad_norm": 1.1800328493118286, "learning_rate": 4.831375911292455e-06, "loss": 0.4013, "step": 6576 }, { "epoch": 0.39461210775784483, "grad_norm": 1.2289332151412964, "learning_rate": 4.830746851096723e-06, "loss": 0.4349, "step": 6577 }, { "epoch": 0.39467210655786883, "grad_norm": 1.3276184797286987, "learning_rate": 4.830117740644745e-06, "loss": 0.4013, "step": 6578 }, { "epoch": 0.3947321053578928, "grad_norm": 1.188906192779541, "learning_rate": 4.82948857996028e-06, "loss": 0.4019, "step": 6579 }, { "epoch": 0.3947921041579168, "grad_norm": 1.2945523262023926, "learning_rate": 4.8288593690670875e-06, "loss": 0.409, "step": 6580 }, { "epoch": 0.3948521029579408, "grad_norm": 1.2872531414031982, "learning_rate": 4.828230107988933e-06, "loss": 0.3879, "step": 6581 }, { "epoch": 0.3949121017579648, "grad_norm": 1.1834843158721924, "learning_rate": 4.827600796749577e-06, "loss": 0.3919, "step": 6582 }, { "epoch": 0.3949721005579888, "grad_norm": 1.3282994031906128, "learning_rate": 4.826971435372788e-06, "loss": 0.3898, "step": 6583 }, { "epoch": 0.39503209935801287, "grad_norm": 1.2430839538574219, "learning_rate": 4.826342023882333e-06, "loss": 0.3931, "step": 6584 }, { "epoch": 0.39509209815803686, "grad_norm": 1.2527707815170288, "learning_rate": 4.825712562301984e-06, "loss": 0.4188, "step": 6585 }, { "epoch": 0.39515209695806086, "grad_norm": 1.2739745378494263, "learning_rate": 4.825083050655509e-06, "loss": 0.4133, "step": 6586 }, { "epoch": 0.39521209575808486, "grad_norm": 1.3103764057159424, "learning_rate": 4.8244534889666865e-06, "loss": 0.4144, "step": 6587 }, { "epoch": 0.39527209455810886, "grad_norm": 1.2151519060134888, "learning_rate": 4.823823877259289e-06, "loss": 0.3802, "step": 6588 }, { "epoch": 0.39533209335813285, "grad_norm": 1.268075942993164, "learning_rate": 4.823194215557096e-06, "loss": 0.3674, "step": 6589 }, { "epoch": 0.39539209215815685, "grad_norm": 1.4170947074890137, "learning_rate": 4.822564503883885e-06, "loss": 0.5278, "step": 6590 }, { "epoch": 0.39545209095818085, "grad_norm": 1.1522830724716187, "learning_rate": 4.821934742263439e-06, "loss": 0.4272, "step": 6591 }, { "epoch": 0.39551208975820484, "grad_norm": 1.3221720457077026, "learning_rate": 4.821304930719538e-06, "loss": 0.4347, "step": 6592 }, { "epoch": 0.39557208855822884, "grad_norm": 1.2774014472961426, "learning_rate": 4.820675069275972e-06, "loss": 0.3964, "step": 6593 }, { "epoch": 0.39563208735825284, "grad_norm": 1.219655990600586, "learning_rate": 4.820045157956526e-06, "loss": 0.43, "step": 6594 }, { "epoch": 0.39569208615827683, "grad_norm": 1.1540634632110596, "learning_rate": 4.819415196784987e-06, "loss": 0.4101, "step": 6595 }, { "epoch": 0.39575208495830083, "grad_norm": 1.377010703086853, "learning_rate": 4.818785185785148e-06, "loss": 0.4559, "step": 6596 }, { "epoch": 0.39581208375832483, "grad_norm": 1.1369717121124268, "learning_rate": 4.818155124980801e-06, "loss": 0.4068, "step": 6597 }, { "epoch": 0.3958720825583488, "grad_norm": 1.3272892236709595, "learning_rate": 4.81752501439574e-06, "loss": 0.4558, "step": 6598 }, { "epoch": 0.3959320813583728, "grad_norm": 1.3511362075805664, "learning_rate": 4.816894854053762e-06, "loss": 0.4118, "step": 6599 }, { "epoch": 0.3959920801583968, "grad_norm": 1.27162766456604, "learning_rate": 4.816264643978665e-06, "loss": 0.4754, "step": 6600 }, { "epoch": 0.3960520789584208, "grad_norm": 1.3875471353530884, "learning_rate": 4.8156343841942495e-06, "loss": 0.4851, "step": 6601 }, { "epoch": 0.3961120777584448, "grad_norm": 1.2250277996063232, "learning_rate": 4.815004074724318e-06, "loss": 0.391, "step": 6602 }, { "epoch": 0.3961720765584688, "grad_norm": 1.2295695543289185, "learning_rate": 4.814373715592672e-06, "loss": 0.4411, "step": 6603 }, { "epoch": 0.3962320753584928, "grad_norm": 1.2891422510147095, "learning_rate": 4.813743306823121e-06, "loss": 0.4149, "step": 6604 }, { "epoch": 0.3962920741585168, "grad_norm": 1.2692161798477173, "learning_rate": 4.813112848439468e-06, "loss": 0.4204, "step": 6605 }, { "epoch": 0.3963520729585408, "grad_norm": 1.212822675704956, "learning_rate": 4.8124823404655284e-06, "loss": 0.3669, "step": 6606 }, { "epoch": 0.39641207175856485, "grad_norm": 1.22439706325531, "learning_rate": 4.8118517829251085e-06, "loss": 0.4208, "step": 6607 }, { "epoch": 0.39647207055858885, "grad_norm": 1.3135994672775269, "learning_rate": 4.8112211758420245e-06, "loss": 0.4451, "step": 6608 }, { "epoch": 0.39653206935861285, "grad_norm": 1.2377090454101562, "learning_rate": 4.810590519240089e-06, "loss": 0.4196, "step": 6609 }, { "epoch": 0.39659206815863685, "grad_norm": 1.4730932712554932, "learning_rate": 4.8099598131431235e-06, "loss": 0.4564, "step": 6610 }, { "epoch": 0.39665206695866084, "grad_norm": 1.2950550317764282, "learning_rate": 4.809329057574941e-06, "loss": 0.414, "step": 6611 }, { "epoch": 0.39671206575868484, "grad_norm": 1.229192852973938, "learning_rate": 4.808698252559367e-06, "loss": 0.4327, "step": 6612 }, { "epoch": 0.39677206455870884, "grad_norm": 1.409232497215271, "learning_rate": 4.808067398120222e-06, "loss": 0.4439, "step": 6613 }, { "epoch": 0.39683206335873283, "grad_norm": 1.3406052589416504, "learning_rate": 4.807436494281331e-06, "loss": 0.439, "step": 6614 }, { "epoch": 0.39689206215875683, "grad_norm": 1.2807939052581787, "learning_rate": 4.806805541066521e-06, "loss": 0.3827, "step": 6615 }, { "epoch": 0.39695206095878083, "grad_norm": 1.3871058225631714, "learning_rate": 4.806174538499618e-06, "loss": 0.4088, "step": 6616 }, { "epoch": 0.3970120597588048, "grad_norm": 1.4021480083465576, "learning_rate": 4.805543486604455e-06, "loss": 0.4152, "step": 6617 }, { "epoch": 0.3970720585588288, "grad_norm": 1.2001291513442993, "learning_rate": 4.804912385404861e-06, "loss": 0.3929, "step": 6618 }, { "epoch": 0.3971320573588528, "grad_norm": 1.2617677450180054, "learning_rate": 4.804281234924672e-06, "loss": 0.4116, "step": 6619 }, { "epoch": 0.3971920561588768, "grad_norm": 1.2573599815368652, "learning_rate": 4.803650035187724e-06, "loss": 0.449, "step": 6620 }, { "epoch": 0.3972520549589008, "grad_norm": 1.4147945642471313, "learning_rate": 4.803018786217852e-06, "loss": 0.4293, "step": 6621 }, { "epoch": 0.3973120537589248, "grad_norm": 1.2203457355499268, "learning_rate": 4.802387488038898e-06, "loss": 0.444, "step": 6622 }, { "epoch": 0.3973720525589488, "grad_norm": 1.159888744354248, "learning_rate": 4.8017561406747025e-06, "loss": 0.4006, "step": 6623 }, { "epoch": 0.3974320513589728, "grad_norm": 1.1325737237930298, "learning_rate": 4.801124744149108e-06, "loss": 0.4134, "step": 6624 }, { "epoch": 0.3974920501589968, "grad_norm": 1.2739518880844116, "learning_rate": 4.80049329848596e-06, "loss": 0.4526, "step": 6625 }, { "epoch": 0.3975520489590208, "grad_norm": 1.3068162202835083, "learning_rate": 4.799861803709105e-06, "loss": 0.4345, "step": 6626 }, { "epoch": 0.3976120477590448, "grad_norm": 1.2989858388900757, "learning_rate": 4.7992302598423915e-06, "loss": 0.4299, "step": 6627 }, { "epoch": 0.3976720465590688, "grad_norm": 1.2734612226486206, "learning_rate": 4.79859866690967e-06, "loss": 0.4649, "step": 6628 }, { "epoch": 0.39773204535909285, "grad_norm": 1.1213170289993286, "learning_rate": 4.797967024934794e-06, "loss": 0.3695, "step": 6629 }, { "epoch": 0.39779204415911684, "grad_norm": 1.1910818815231323, "learning_rate": 4.797335333941617e-06, "loss": 0.3964, "step": 6630 }, { "epoch": 0.39785204295914084, "grad_norm": 1.38451087474823, "learning_rate": 4.796703593953995e-06, "loss": 0.4876, "step": 6631 }, { "epoch": 0.39791204175916484, "grad_norm": 1.3560153245925903, "learning_rate": 4.796071804995786e-06, "loss": 0.4951, "step": 6632 }, { "epoch": 0.39797204055918883, "grad_norm": 1.4373314380645752, "learning_rate": 4.79543996709085e-06, "loss": 0.4025, "step": 6633 }, { "epoch": 0.39803203935921283, "grad_norm": 1.0780248641967773, "learning_rate": 4.794808080263048e-06, "loss": 0.4097, "step": 6634 }, { "epoch": 0.3980920381592368, "grad_norm": 1.429793357849121, "learning_rate": 4.794176144536244e-06, "loss": 0.4962, "step": 6635 }, { "epoch": 0.3981520369592608, "grad_norm": 1.163487434387207, "learning_rate": 4.793544159934303e-06, "loss": 0.3872, "step": 6636 }, { "epoch": 0.3982120357592848, "grad_norm": 1.2726224660873413, "learning_rate": 4.792912126481094e-06, "loss": 0.4222, "step": 6637 }, { "epoch": 0.3982720345593088, "grad_norm": 1.1280239820480347, "learning_rate": 4.792280044200482e-06, "loss": 0.4067, "step": 6638 }, { "epoch": 0.3983320333593328, "grad_norm": 1.4143569469451904, "learning_rate": 4.791647913116342e-06, "loss": 0.4608, "step": 6639 }, { "epoch": 0.3983920321593568, "grad_norm": 1.2352155447006226, "learning_rate": 4.791015733252545e-06, "loss": 0.4089, "step": 6640 }, { "epoch": 0.3984520309593808, "grad_norm": 1.2942497730255127, "learning_rate": 4.790383504632966e-06, "loss": 0.4914, "step": 6641 }, { "epoch": 0.3985120297594048, "grad_norm": 1.3522948026657104, "learning_rate": 4.78975122728148e-06, "loss": 0.4557, "step": 6642 }, { "epoch": 0.3985720285594288, "grad_norm": 1.2709578275680542, "learning_rate": 4.789118901221967e-06, "loss": 0.3757, "step": 6643 }, { "epoch": 0.3986320273594528, "grad_norm": 1.2746026515960693, "learning_rate": 4.7884865264783055e-06, "loss": 0.4257, "step": 6644 }, { "epoch": 0.3986920261594768, "grad_norm": 1.2631423473358154, "learning_rate": 4.787854103074378e-06, "loss": 0.4475, "step": 6645 }, { "epoch": 0.3987520249595008, "grad_norm": 1.2762014865875244, "learning_rate": 4.78722163103407e-06, "loss": 0.3893, "step": 6646 }, { "epoch": 0.3988120237595248, "grad_norm": 1.107545256614685, "learning_rate": 4.7865891103812645e-06, "loss": 0.415, "step": 6647 }, { "epoch": 0.3988720225595488, "grad_norm": 1.408057689666748, "learning_rate": 4.7859565411398504e-06, "loss": 0.4736, "step": 6648 }, { "epoch": 0.3989320213595728, "grad_norm": 1.4221266508102417, "learning_rate": 4.785323923333717e-06, "loss": 0.4216, "step": 6649 }, { "epoch": 0.3989920201595968, "grad_norm": 1.3490405082702637, "learning_rate": 4.784691256986754e-06, "loss": 0.4512, "step": 6650 }, { "epoch": 0.3990520189596208, "grad_norm": 1.2641345262527466, "learning_rate": 4.784058542122855e-06, "loss": 0.3871, "step": 6651 }, { "epoch": 0.39911201775964483, "grad_norm": 1.2890658378601074, "learning_rate": 4.783425778765915e-06, "loss": 0.4492, "step": 6652 }, { "epoch": 0.39917201655966883, "grad_norm": 1.3483247756958008, "learning_rate": 4.782792966939832e-06, "loss": 0.4397, "step": 6653 }, { "epoch": 0.3992320153596928, "grad_norm": 1.249411702156067, "learning_rate": 4.782160106668502e-06, "loss": 0.3895, "step": 6654 }, { "epoch": 0.3992920141597168, "grad_norm": 1.1983275413513184, "learning_rate": 4.781527197975826e-06, "loss": 0.4075, "step": 6655 }, { "epoch": 0.3993520129597408, "grad_norm": 1.127134919166565, "learning_rate": 4.780894240885707e-06, "loss": 0.425, "step": 6656 }, { "epoch": 0.3994120117597648, "grad_norm": 1.2188916206359863, "learning_rate": 4.780261235422048e-06, "loss": 0.434, "step": 6657 }, { "epoch": 0.3994720105597888, "grad_norm": 1.2467634677886963, "learning_rate": 4.779628181608755e-06, "loss": 0.3646, "step": 6658 }, { "epoch": 0.3995320093598128, "grad_norm": 1.3484019041061401, "learning_rate": 4.778995079469736e-06, "loss": 0.4344, "step": 6659 }, { "epoch": 0.3995920081598368, "grad_norm": 1.3202534914016724, "learning_rate": 4.778361929028899e-06, "loss": 0.4308, "step": 6660 }, { "epoch": 0.3996520069598608, "grad_norm": 1.1557848453521729, "learning_rate": 4.777728730310157e-06, "loss": 0.4235, "step": 6661 }, { "epoch": 0.3997120057598848, "grad_norm": 1.259566068649292, "learning_rate": 4.777095483337422e-06, "loss": 0.4573, "step": 6662 }, { "epoch": 0.3997720045599088, "grad_norm": 1.5601931810379028, "learning_rate": 4.776462188134608e-06, "loss": 0.4299, "step": 6663 }, { "epoch": 0.3998320033599328, "grad_norm": 1.2840945720672607, "learning_rate": 4.775828844725635e-06, "loss": 0.4138, "step": 6664 }, { "epoch": 0.3998920021599568, "grad_norm": 1.1149545907974243, "learning_rate": 4.775195453134417e-06, "loss": 0.468, "step": 6665 }, { "epoch": 0.3999520009599808, "grad_norm": 1.3465020656585693, "learning_rate": 4.774562013384878e-06, "loss": 0.4494, "step": 6666 }, { "epoch": 0.4000119997600048, "grad_norm": 1.2676705121994019, "learning_rate": 4.773928525500938e-06, "loss": 0.3699, "step": 6667 }, { "epoch": 0.4000719985600288, "grad_norm": 1.4326871633529663, "learning_rate": 4.773294989506521e-06, "loss": 0.4457, "step": 6668 }, { "epoch": 0.4001319973600528, "grad_norm": 1.1571205854415894, "learning_rate": 4.772661405425554e-06, "loss": 0.3992, "step": 6669 }, { "epoch": 0.4001919961600768, "grad_norm": 1.3937734365463257, "learning_rate": 4.772027773281964e-06, "loss": 0.4267, "step": 6670 }, { "epoch": 0.4002519949601008, "grad_norm": 1.1803929805755615, "learning_rate": 4.771394093099681e-06, "loss": 0.3532, "step": 6671 }, { "epoch": 0.4003119937601248, "grad_norm": 1.3996074199676514, "learning_rate": 4.770760364902635e-06, "loss": 0.4578, "step": 6672 }, { "epoch": 0.40037199256014877, "grad_norm": 1.2108958959579468, "learning_rate": 4.770126588714758e-06, "loss": 0.447, "step": 6673 }, { "epoch": 0.4004319913601728, "grad_norm": 1.4299672842025757, "learning_rate": 4.769492764559987e-06, "loss": 0.3918, "step": 6674 }, { "epoch": 0.4004919901601968, "grad_norm": 1.488879680633545, "learning_rate": 4.76885889246226e-06, "loss": 0.4737, "step": 6675 }, { "epoch": 0.4005519889602208, "grad_norm": 1.1859861612319946, "learning_rate": 4.768224972445512e-06, "loss": 0.3975, "step": 6676 }, { "epoch": 0.4006119877602448, "grad_norm": 1.3665558099746704, "learning_rate": 4.7675910045336865e-06, "loss": 0.485, "step": 6677 }, { "epoch": 0.4006719865602688, "grad_norm": 1.2944046258926392, "learning_rate": 4.766956988750722e-06, "loss": 0.4061, "step": 6678 }, { "epoch": 0.4007319853602928, "grad_norm": 1.1861512660980225, "learning_rate": 4.766322925120565e-06, "loss": 0.4103, "step": 6679 }, { "epoch": 0.4007919841603168, "grad_norm": 1.1936427354812622, "learning_rate": 4.7656888136671595e-06, "loss": 0.4166, "step": 6680 }, { "epoch": 0.4008519829603408, "grad_norm": 1.1788548231124878, "learning_rate": 4.765054654414455e-06, "loss": 0.4201, "step": 6681 }, { "epoch": 0.4009119817603648, "grad_norm": 1.24456787109375, "learning_rate": 4.7644204473864e-06, "loss": 0.4529, "step": 6682 }, { "epoch": 0.4009719805603888, "grad_norm": 1.1762709617614746, "learning_rate": 4.763786192606945e-06, "loss": 0.4454, "step": 6683 }, { "epoch": 0.4010319793604128, "grad_norm": 1.3672432899475098, "learning_rate": 4.763151890100043e-06, "loss": 0.3998, "step": 6684 }, { "epoch": 0.4010919781604368, "grad_norm": 1.2554032802581787, "learning_rate": 4.76251753988965e-06, "loss": 0.4394, "step": 6685 }, { "epoch": 0.4011519769604608, "grad_norm": 1.1958080530166626, "learning_rate": 4.76188314199972e-06, "loss": 0.3872, "step": 6686 }, { "epoch": 0.4012119757604848, "grad_norm": 1.3293901681900024, "learning_rate": 4.761248696454214e-06, "loss": 0.4176, "step": 6687 }, { "epoch": 0.4012719745605088, "grad_norm": 1.300147294998169, "learning_rate": 4.7606142032770905e-06, "loss": 0.4099, "step": 6688 }, { "epoch": 0.4013319733605328, "grad_norm": 1.3125096559524536, "learning_rate": 4.759979662492313e-06, "loss": 0.4249, "step": 6689 }, { "epoch": 0.4013919721605568, "grad_norm": 1.2160664796829224, "learning_rate": 4.759345074123842e-06, "loss": 0.4416, "step": 6690 }, { "epoch": 0.4014519709605808, "grad_norm": 1.4402923583984375, "learning_rate": 4.758710438195646e-06, "loss": 0.4403, "step": 6691 }, { "epoch": 0.40151196976060477, "grad_norm": 1.4873775243759155, "learning_rate": 4.758075754731693e-06, "loss": 0.4267, "step": 6692 }, { "epoch": 0.40157196856062877, "grad_norm": 1.1550087928771973, "learning_rate": 4.757441023755949e-06, "loss": 0.3765, "step": 6693 }, { "epoch": 0.40163196736065276, "grad_norm": 1.3589788675308228, "learning_rate": 4.756806245292387e-06, "loss": 0.4492, "step": 6694 }, { "epoch": 0.40169196616067676, "grad_norm": 1.2466557025909424, "learning_rate": 4.756171419364977e-06, "loss": 0.4075, "step": 6695 }, { "epoch": 0.4017519649607008, "grad_norm": 1.2883901596069336, "learning_rate": 4.7555365459976984e-06, "loss": 0.4387, "step": 6696 }, { "epoch": 0.4018119637607248, "grad_norm": 1.2631570100784302, "learning_rate": 4.754901625214522e-06, "loss": 0.4812, "step": 6697 }, { "epoch": 0.4018719625607488, "grad_norm": 1.2511309385299683, "learning_rate": 4.754266657039431e-06, "loss": 0.4323, "step": 6698 }, { "epoch": 0.4019319613607728, "grad_norm": 1.4013245105743408, "learning_rate": 4.753631641496402e-06, "loss": 0.432, "step": 6699 }, { "epoch": 0.4019919601607968, "grad_norm": 1.3564445972442627, "learning_rate": 4.752996578609418e-06, "loss": 0.4201, "step": 6700 }, { "epoch": 0.4020519589608208, "grad_norm": 1.285037636756897, "learning_rate": 4.752361468402461e-06, "loss": 0.4001, "step": 6701 }, { "epoch": 0.4021119577608448, "grad_norm": 1.278329610824585, "learning_rate": 4.751726310899518e-06, "loss": 0.439, "step": 6702 }, { "epoch": 0.4021719565608688, "grad_norm": 1.1237833499908447, "learning_rate": 4.751091106124573e-06, "loss": 0.3772, "step": 6703 }, { "epoch": 0.4022319553608928, "grad_norm": 1.443568229675293, "learning_rate": 4.750455854101618e-06, "loss": 0.4595, "step": 6704 }, { "epoch": 0.4022919541609168, "grad_norm": 1.2722288370132446, "learning_rate": 4.749820554854643e-06, "loss": 0.4534, "step": 6705 }, { "epoch": 0.4023519529609408, "grad_norm": 1.3044838905334473, "learning_rate": 4.7491852084076404e-06, "loss": 0.4665, "step": 6706 }, { "epoch": 0.4024119517609648, "grad_norm": 1.2494356632232666, "learning_rate": 4.748549814784602e-06, "loss": 0.4379, "step": 6707 }, { "epoch": 0.4024719505609888, "grad_norm": 1.263201117515564, "learning_rate": 4.747914374009527e-06, "loss": 0.4665, "step": 6708 }, { "epoch": 0.4025319493610128, "grad_norm": 1.2118456363677979, "learning_rate": 4.74727888610641e-06, "loss": 0.4033, "step": 6709 }, { "epoch": 0.4025919481610368, "grad_norm": 1.221199631690979, "learning_rate": 4.746643351099254e-06, "loss": 0.3895, "step": 6710 }, { "epoch": 0.40265194696106077, "grad_norm": 1.3161256313323975, "learning_rate": 4.746007769012057e-06, "loss": 0.4523, "step": 6711 }, { "epoch": 0.40271194576108477, "grad_norm": 1.2751575708389282, "learning_rate": 4.745372139868824e-06, "loss": 0.4258, "step": 6712 }, { "epoch": 0.40277194456110876, "grad_norm": 1.1611639261245728, "learning_rate": 4.7447364636935585e-06, "loss": 0.433, "step": 6713 }, { "epoch": 0.40283194336113276, "grad_norm": 1.357069969177246, "learning_rate": 4.744100740510268e-06, "loss": 0.4463, "step": 6714 }, { "epoch": 0.40289194216115676, "grad_norm": 1.3106824159622192, "learning_rate": 4.743464970342961e-06, "loss": 0.438, "step": 6715 }, { "epoch": 0.40295194096118075, "grad_norm": 1.2509530782699585, "learning_rate": 4.742829153215647e-06, "loss": 0.4046, "step": 6716 }, { "epoch": 0.40301193976120475, "grad_norm": 1.4690239429473877, "learning_rate": 4.742193289152339e-06, "loss": 0.4628, "step": 6717 }, { "epoch": 0.40307193856122875, "grad_norm": 1.2341481447219849, "learning_rate": 4.741557378177048e-06, "loss": 0.3972, "step": 6718 }, { "epoch": 0.4031319373612528, "grad_norm": 1.307049036026001, "learning_rate": 4.7409214203137935e-06, "loss": 0.4164, "step": 6719 }, { "epoch": 0.4031919361612768, "grad_norm": 1.389655590057373, "learning_rate": 4.740285415586588e-06, "loss": 0.4658, "step": 6720 }, { "epoch": 0.4032519349613008, "grad_norm": 1.3395442962646484, "learning_rate": 4.739649364019456e-06, "loss": 0.5017, "step": 6721 }, { "epoch": 0.4033119337613248, "grad_norm": 1.2765202522277832, "learning_rate": 4.739013265636414e-06, "loss": 0.4257, "step": 6722 }, { "epoch": 0.4033719325613488, "grad_norm": 1.2159013748168945, "learning_rate": 4.738377120461487e-06, "loss": 0.3638, "step": 6723 }, { "epoch": 0.4034319313613728, "grad_norm": 1.156792163848877, "learning_rate": 4.737740928518696e-06, "loss": 0.3841, "step": 6724 }, { "epoch": 0.4034919301613968, "grad_norm": 1.2359278202056885, "learning_rate": 4.737104689832071e-06, "loss": 0.4506, "step": 6725 }, { "epoch": 0.4035519289614208, "grad_norm": 1.2697131633758545, "learning_rate": 4.736468404425636e-06, "loss": 0.3949, "step": 6726 }, { "epoch": 0.4036119277614448, "grad_norm": 1.3609734773635864, "learning_rate": 4.735832072323424e-06, "loss": 0.4413, "step": 6727 }, { "epoch": 0.4036719265614688, "grad_norm": 1.3368529081344604, "learning_rate": 4.7351956935494656e-06, "loss": 0.4412, "step": 6728 }, { "epoch": 0.40373192536149277, "grad_norm": 1.2710634469985962, "learning_rate": 4.734559268127793e-06, "loss": 0.394, "step": 6729 }, { "epoch": 0.40379192416151677, "grad_norm": 1.2249459028244019, "learning_rate": 4.7339227960824415e-06, "loss": 0.4131, "step": 6730 }, { "epoch": 0.40385192296154077, "grad_norm": 1.1960911750793457, "learning_rate": 4.733286277437449e-06, "loss": 0.4695, "step": 6731 }, { "epoch": 0.40391192176156476, "grad_norm": 1.2965213060379028, "learning_rate": 4.732649712216849e-06, "loss": 0.4598, "step": 6732 }, { "epoch": 0.40397192056158876, "grad_norm": 1.2893849611282349, "learning_rate": 4.732013100444689e-06, "loss": 0.3588, "step": 6733 }, { "epoch": 0.40403191936161276, "grad_norm": 1.2808868885040283, "learning_rate": 4.731376442145005e-06, "loss": 0.3848, "step": 6734 }, { "epoch": 0.40409191816163675, "grad_norm": 1.312317132949829, "learning_rate": 4.730739737341845e-06, "loss": 0.4462, "step": 6735 }, { "epoch": 0.40415191696166075, "grad_norm": 1.2799506187438965, "learning_rate": 4.730102986059251e-06, "loss": 0.4025, "step": 6736 }, { "epoch": 0.40421191576168475, "grad_norm": 1.4664307832717896, "learning_rate": 4.729466188321272e-06, "loss": 0.4478, "step": 6737 }, { "epoch": 0.40427191456170875, "grad_norm": 1.3793823719024658, "learning_rate": 4.728829344151955e-06, "loss": 0.4271, "step": 6738 }, { "epoch": 0.40433191336173274, "grad_norm": 1.2452049255371094, "learning_rate": 4.728192453575353e-06, "loss": 0.4117, "step": 6739 }, { "epoch": 0.40439191216175674, "grad_norm": 1.2155773639678955, "learning_rate": 4.72755551661552e-06, "loss": 0.3755, "step": 6740 }, { "epoch": 0.4044519109617808, "grad_norm": 1.4218605756759644, "learning_rate": 4.726918533296505e-06, "loss": 0.4383, "step": 6741 }, { "epoch": 0.4045119097618048, "grad_norm": 1.2171542644500732, "learning_rate": 4.726281503642369e-06, "loss": 0.3777, "step": 6742 }, { "epoch": 0.4045719085618288, "grad_norm": 1.3096423149108887, "learning_rate": 4.725644427677165e-06, "loss": 0.4401, "step": 6743 }, { "epoch": 0.4046319073618528, "grad_norm": 1.2676571607589722, "learning_rate": 4.725007305424957e-06, "loss": 0.4363, "step": 6744 }, { "epoch": 0.4046919061618768, "grad_norm": 1.2075518369674683, "learning_rate": 4.7243701369098024e-06, "loss": 0.4009, "step": 6745 }, { "epoch": 0.4047519049619008, "grad_norm": 1.3397283554077148, "learning_rate": 4.723732922155768e-06, "loss": 0.4592, "step": 6746 }, { "epoch": 0.4048119037619248, "grad_norm": 1.2180290222167969, "learning_rate": 4.723095661186915e-06, "loss": 0.382, "step": 6747 }, { "epoch": 0.40487190256194877, "grad_norm": 1.260215401649475, "learning_rate": 4.722458354027312e-06, "loss": 0.436, "step": 6748 }, { "epoch": 0.40493190136197277, "grad_norm": 1.2633875608444214, "learning_rate": 4.721821000701026e-06, "loss": 0.4033, "step": 6749 }, { "epoch": 0.40499190016199677, "grad_norm": 1.3165932893753052, "learning_rate": 4.721183601232128e-06, "loss": 0.4445, "step": 6750 }, { "epoch": 0.40505189896202076, "grad_norm": 1.1767491102218628, "learning_rate": 4.7205461556446876e-06, "loss": 0.3788, "step": 6751 }, { "epoch": 0.40511189776204476, "grad_norm": 1.1940712928771973, "learning_rate": 4.719908663962782e-06, "loss": 0.3719, "step": 6752 }, { "epoch": 0.40517189656206876, "grad_norm": 1.2501105070114136, "learning_rate": 4.719271126210482e-06, "loss": 0.4538, "step": 6753 }, { "epoch": 0.40523189536209275, "grad_norm": 1.0926340818405151, "learning_rate": 4.718633542411868e-06, "loss": 0.4, "step": 6754 }, { "epoch": 0.40529189416211675, "grad_norm": 1.2974653244018555, "learning_rate": 4.717995912591016e-06, "loss": 0.4322, "step": 6755 }, { "epoch": 0.40535189296214075, "grad_norm": 1.174882173538208, "learning_rate": 4.717358236772009e-06, "loss": 0.3849, "step": 6756 }, { "epoch": 0.40541189176216474, "grad_norm": 1.3206000328063965, "learning_rate": 4.716720514978927e-06, "loss": 0.4347, "step": 6757 }, { "epoch": 0.40547189056218874, "grad_norm": 1.18045973777771, "learning_rate": 4.7160827472358545e-06, "loss": 0.4131, "step": 6758 }, { "epoch": 0.40553188936221274, "grad_norm": 1.204441785812378, "learning_rate": 4.715444933566876e-06, "loss": 0.3964, "step": 6759 }, { "epoch": 0.40559188816223674, "grad_norm": 1.139378309249878, "learning_rate": 4.714807073996082e-06, "loss": 0.3798, "step": 6760 }, { "epoch": 0.40565188696226073, "grad_norm": 1.206936001777649, "learning_rate": 4.714169168547557e-06, "loss": 0.4319, "step": 6761 }, { "epoch": 0.40571188576228473, "grad_norm": 1.2041269540786743, "learning_rate": 4.7135312172453955e-06, "loss": 0.3767, "step": 6762 }, { "epoch": 0.4057718845623087, "grad_norm": 1.2320812940597534, "learning_rate": 4.712893220113689e-06, "loss": 0.3973, "step": 6763 }, { "epoch": 0.4058318833623328, "grad_norm": 1.2738548517227173, "learning_rate": 4.712255177176532e-06, "loss": 0.4673, "step": 6764 }, { "epoch": 0.4058918821623568, "grad_norm": 1.2080131769180298, "learning_rate": 4.71161708845802e-06, "loss": 0.4087, "step": 6765 }, { "epoch": 0.4059518809623808, "grad_norm": 1.3245038986206055, "learning_rate": 4.710978953982249e-06, "loss": 0.4821, "step": 6766 }, { "epoch": 0.40601187976240477, "grad_norm": 1.2233099937438965, "learning_rate": 4.710340773773321e-06, "loss": 0.4703, "step": 6767 }, { "epoch": 0.40607187856242877, "grad_norm": 1.2359219789505005, "learning_rate": 4.7097025478553364e-06, "loss": 0.4124, "step": 6768 }, { "epoch": 0.40613187736245276, "grad_norm": 1.332844853401184, "learning_rate": 4.709064276252398e-06, "loss": 0.4687, "step": 6769 }, { "epoch": 0.40619187616247676, "grad_norm": 1.3588958978652954, "learning_rate": 4.70842595898861e-06, "loss": 0.4863, "step": 6770 }, { "epoch": 0.40625187496250076, "grad_norm": 1.2274218797683716, "learning_rate": 4.707787596088081e-06, "loss": 0.4458, "step": 6771 }, { "epoch": 0.40631187376252476, "grad_norm": 1.3531913757324219, "learning_rate": 4.707149187574914e-06, "loss": 0.5054, "step": 6772 }, { "epoch": 0.40637187256254875, "grad_norm": 1.3777133226394653, "learning_rate": 4.706510733473223e-06, "loss": 0.5023, "step": 6773 }, { "epoch": 0.40643187136257275, "grad_norm": 1.2936280965805054, "learning_rate": 4.705872233807119e-06, "loss": 0.4034, "step": 6774 }, { "epoch": 0.40649187016259675, "grad_norm": 1.3772008419036865, "learning_rate": 4.7052336886007154e-06, "loss": 0.4109, "step": 6775 }, { "epoch": 0.40655186896262074, "grad_norm": 1.228105902671814, "learning_rate": 4.704595097878125e-06, "loss": 0.4245, "step": 6776 }, { "epoch": 0.40661186776264474, "grad_norm": 1.1964341402053833, "learning_rate": 4.703956461663468e-06, "loss": 0.4008, "step": 6777 }, { "epoch": 0.40667186656266874, "grad_norm": 1.257713794708252, "learning_rate": 4.703317779980859e-06, "loss": 0.4605, "step": 6778 }, { "epoch": 0.40673186536269273, "grad_norm": 1.2999122142791748, "learning_rate": 4.70267905285442e-06, "loss": 0.4435, "step": 6779 }, { "epoch": 0.40679186416271673, "grad_norm": 1.291627287864685, "learning_rate": 4.702040280308271e-06, "loss": 0.4362, "step": 6780 }, { "epoch": 0.40685186296274073, "grad_norm": 1.1209001541137695, "learning_rate": 4.701401462366539e-06, "loss": 0.4191, "step": 6781 }, { "epoch": 0.4069118617627647, "grad_norm": 1.4912586212158203, "learning_rate": 4.7007625990533465e-06, "loss": 0.4373, "step": 6782 }, { "epoch": 0.4069718605627887, "grad_norm": 1.1958870887756348, "learning_rate": 4.700123690392823e-06, "loss": 0.3917, "step": 6783 }, { "epoch": 0.4070318593628127, "grad_norm": 1.2434649467468262, "learning_rate": 4.699484736409093e-06, "loss": 0.4434, "step": 6784 }, { "epoch": 0.4070918581628367, "grad_norm": 1.3386329412460327, "learning_rate": 4.698845737126291e-06, "loss": 0.3822, "step": 6785 }, { "epoch": 0.40715185696286077, "grad_norm": 1.2924365997314453, "learning_rate": 4.6982066925685475e-06, "loss": 0.4494, "step": 6786 }, { "epoch": 0.40721185576288477, "grad_norm": 1.2522908449172974, "learning_rate": 4.697567602759995e-06, "loss": 0.4774, "step": 6787 }, { "epoch": 0.40727185456290876, "grad_norm": 1.2839570045471191, "learning_rate": 4.696928467724771e-06, "loss": 0.4318, "step": 6788 }, { "epoch": 0.40733185336293276, "grad_norm": 1.1580499410629272, "learning_rate": 4.696289287487012e-06, "loss": 0.4332, "step": 6789 }, { "epoch": 0.40739185216295676, "grad_norm": 1.2348207235336304, "learning_rate": 4.695650062070856e-06, "loss": 0.4148, "step": 6790 }, { "epoch": 0.40745185096298075, "grad_norm": 1.2777180671691895, "learning_rate": 4.695010791500446e-06, "loss": 0.4252, "step": 6791 }, { "epoch": 0.40751184976300475, "grad_norm": 1.1221386194229126, "learning_rate": 4.694371475799922e-06, "loss": 0.4574, "step": 6792 }, { "epoch": 0.40757184856302875, "grad_norm": 1.2757431268692017, "learning_rate": 4.69373211499343e-06, "loss": 0.4882, "step": 6793 }, { "epoch": 0.40763184736305275, "grad_norm": 1.2972071170806885, "learning_rate": 4.693092709105114e-06, "loss": 0.4563, "step": 6794 }, { "epoch": 0.40769184616307674, "grad_norm": 1.3266113996505737, "learning_rate": 4.692453258159121e-06, "loss": 0.402, "step": 6795 }, { "epoch": 0.40775184496310074, "grad_norm": 1.5472344160079956, "learning_rate": 4.691813762179602e-06, "loss": 0.4729, "step": 6796 }, { "epoch": 0.40781184376312474, "grad_norm": 1.210198163986206, "learning_rate": 4.691174221190707e-06, "loss": 0.345, "step": 6797 }, { "epoch": 0.40787184256314873, "grad_norm": 1.0907524824142456, "learning_rate": 4.6905346352165895e-06, "loss": 0.4266, "step": 6798 }, { "epoch": 0.40793184136317273, "grad_norm": 1.228353500366211, "learning_rate": 4.689895004281401e-06, "loss": 0.4138, "step": 6799 }, { "epoch": 0.40799184016319673, "grad_norm": 1.3605470657348633, "learning_rate": 4.6892553284093015e-06, "loss": 0.4219, "step": 6800 }, { "epoch": 0.4080518389632207, "grad_norm": 1.2923552989959717, "learning_rate": 4.688615607624445e-06, "loss": 0.4393, "step": 6801 }, { "epoch": 0.4081118377632447, "grad_norm": 1.3780481815338135, "learning_rate": 4.687975841950993e-06, "loss": 0.4445, "step": 6802 }, { "epoch": 0.4081718365632687, "grad_norm": 1.1952598094940186, "learning_rate": 4.6873360314131055e-06, "loss": 0.4045, "step": 6803 }, { "epoch": 0.4082318353632927, "grad_norm": 1.3160743713378906, "learning_rate": 4.686696176034946e-06, "loss": 0.4445, "step": 6804 }, { "epoch": 0.4082918341633167, "grad_norm": 1.2490135431289673, "learning_rate": 4.686056275840678e-06, "loss": 0.4346, "step": 6805 }, { "epoch": 0.4083518329633407, "grad_norm": 1.404009222984314, "learning_rate": 4.68541633085447e-06, "loss": 0.4354, "step": 6806 }, { "epoch": 0.4084118317633647, "grad_norm": 1.2613545656204224, "learning_rate": 4.684776341100485e-06, "loss": 0.37, "step": 6807 }, { "epoch": 0.4084718305633887, "grad_norm": 1.230217695236206, "learning_rate": 4.684136306602897e-06, "loss": 0.3827, "step": 6808 }, { "epoch": 0.40853182936341276, "grad_norm": 1.2564752101898193, "learning_rate": 4.683496227385876e-06, "loss": 0.4326, "step": 6809 }, { "epoch": 0.40859182816343675, "grad_norm": 1.3751977682113647, "learning_rate": 4.682856103473593e-06, "loss": 0.44, "step": 6810 }, { "epoch": 0.40865182696346075, "grad_norm": 1.1970899105072021, "learning_rate": 4.682215934890225e-06, "loss": 0.4108, "step": 6811 }, { "epoch": 0.40871182576348475, "grad_norm": 1.3720009326934814, "learning_rate": 4.681575721659948e-06, "loss": 0.4632, "step": 6812 }, { "epoch": 0.40877182456350875, "grad_norm": 1.2013323307037354, "learning_rate": 4.680935463806938e-06, "loss": 0.4101, "step": 6813 }, { "epoch": 0.40883182336353274, "grad_norm": 1.3649942874908447, "learning_rate": 4.680295161355376e-06, "loss": 0.4224, "step": 6814 }, { "epoch": 0.40889182216355674, "grad_norm": 1.1411235332489014, "learning_rate": 4.679654814329444e-06, "loss": 0.4116, "step": 6815 }, { "epoch": 0.40895182096358074, "grad_norm": 1.3169658184051514, "learning_rate": 4.679014422753324e-06, "loss": 0.4486, "step": 6816 }, { "epoch": 0.40901181976360473, "grad_norm": 1.2026630640029907, "learning_rate": 4.6783739866512e-06, "loss": 0.3654, "step": 6817 }, { "epoch": 0.40907181856362873, "grad_norm": 1.332870364189148, "learning_rate": 4.67773350604726e-06, "loss": 0.4592, "step": 6818 }, { "epoch": 0.40913181736365273, "grad_norm": 1.3249880075454712, "learning_rate": 4.677092980965691e-06, "loss": 0.4384, "step": 6819 }, { "epoch": 0.4091918161636767, "grad_norm": 1.4299236536026, "learning_rate": 4.676452411430682e-06, "loss": 0.4657, "step": 6820 }, { "epoch": 0.4092518149637007, "grad_norm": 1.352677345275879, "learning_rate": 4.675811797466427e-06, "loss": 0.3949, "step": 6821 }, { "epoch": 0.4093118137637247, "grad_norm": 1.4199306964874268, "learning_rate": 4.675171139097115e-06, "loss": 0.4366, "step": 6822 }, { "epoch": 0.4093718125637487, "grad_norm": 1.21751070022583, "learning_rate": 4.674530436346945e-06, "loss": 0.4044, "step": 6823 }, { "epoch": 0.4094318113637727, "grad_norm": 1.4133306741714478, "learning_rate": 4.673889689240111e-06, "loss": 0.4314, "step": 6824 }, { "epoch": 0.4094918101637967, "grad_norm": 1.3177149295806885, "learning_rate": 4.673248897800812e-06, "loss": 0.3958, "step": 6825 }, { "epoch": 0.4095518089638207, "grad_norm": 1.1737101078033447, "learning_rate": 4.672608062053248e-06, "loss": 0.4076, "step": 6826 }, { "epoch": 0.4096118077638447, "grad_norm": 1.202317476272583, "learning_rate": 4.671967182021619e-06, "loss": 0.4296, "step": 6827 }, { "epoch": 0.4096718065638687, "grad_norm": 1.2784972190856934, "learning_rate": 4.671326257730129e-06, "loss": 0.4308, "step": 6828 }, { "epoch": 0.4097318053638927, "grad_norm": 1.1614007949829102, "learning_rate": 4.670685289202983e-06, "loss": 0.3933, "step": 6829 }, { "epoch": 0.4097918041639167, "grad_norm": 1.3338563442230225, "learning_rate": 4.670044276464387e-06, "loss": 0.4604, "step": 6830 }, { "epoch": 0.40985180296394075, "grad_norm": 1.2819989919662476, "learning_rate": 4.669403219538551e-06, "loss": 0.4435, "step": 6831 }, { "epoch": 0.40991180176396474, "grad_norm": 1.2508502006530762, "learning_rate": 4.6687621184496814e-06, "loss": 0.4049, "step": 6832 }, { "epoch": 0.40997180056398874, "grad_norm": 1.2189406156539917, "learning_rate": 4.668120973221991e-06, "loss": 0.3919, "step": 6833 }, { "epoch": 0.41003179936401274, "grad_norm": 1.3814024925231934, "learning_rate": 4.667479783879695e-06, "loss": 0.3991, "step": 6834 }, { "epoch": 0.41009179816403674, "grad_norm": 1.288353681564331, "learning_rate": 4.666838550447006e-06, "loss": 0.3863, "step": 6835 }, { "epoch": 0.41015179696406073, "grad_norm": 1.1876140832901, "learning_rate": 4.6661972729481405e-06, "loss": 0.3943, "step": 6836 }, { "epoch": 0.41021179576408473, "grad_norm": 1.318961501121521, "learning_rate": 4.665555951407318e-06, "loss": 0.4546, "step": 6837 }, { "epoch": 0.4102717945641087, "grad_norm": 1.2012779712677002, "learning_rate": 4.6649145858487565e-06, "loss": 0.4307, "step": 6838 }, { "epoch": 0.4103317933641327, "grad_norm": 1.267897605895996, "learning_rate": 4.664273176296679e-06, "loss": 0.457, "step": 6839 }, { "epoch": 0.4103917921641567, "grad_norm": 1.3988027572631836, "learning_rate": 4.663631722775308e-06, "loss": 0.4483, "step": 6840 }, { "epoch": 0.4104517909641807, "grad_norm": 1.2247298955917358, "learning_rate": 4.6629902253088685e-06, "loss": 0.4934, "step": 6841 }, { "epoch": 0.4105117897642047, "grad_norm": 1.4355535507202148, "learning_rate": 4.662348683921587e-06, "loss": 0.4904, "step": 6842 }, { "epoch": 0.4105717885642287, "grad_norm": 1.4105571508407593, "learning_rate": 4.66170709863769e-06, "loss": 0.4172, "step": 6843 }, { "epoch": 0.4106317873642527, "grad_norm": 1.204670786857605, "learning_rate": 4.6610654694814105e-06, "loss": 0.4254, "step": 6844 }, { "epoch": 0.4106917861642767, "grad_norm": 1.3312689065933228, "learning_rate": 4.660423796476976e-06, "loss": 0.4222, "step": 6845 }, { "epoch": 0.4107517849643007, "grad_norm": 1.282975435256958, "learning_rate": 4.659782079648624e-06, "loss": 0.3956, "step": 6846 }, { "epoch": 0.4108117837643247, "grad_norm": 1.2441414594650269, "learning_rate": 4.659140319020585e-06, "loss": 0.3757, "step": 6847 }, { "epoch": 0.4108717825643487, "grad_norm": 1.3112138509750366, "learning_rate": 4.658498514617099e-06, "loss": 0.4507, "step": 6848 }, { "epoch": 0.4109317813643727, "grad_norm": 1.342199683189392, "learning_rate": 4.6578566664623996e-06, "loss": 0.4095, "step": 6849 }, { "epoch": 0.4109917801643967, "grad_norm": 1.3338299989700317, "learning_rate": 4.6572147745807305e-06, "loss": 0.4076, "step": 6850 }, { "epoch": 0.4110517789644207, "grad_norm": 1.2370494604110718, "learning_rate": 4.656572838996331e-06, "loss": 0.4436, "step": 6851 }, { "epoch": 0.4111117777644447, "grad_norm": 1.1777400970458984, "learning_rate": 4.655930859733446e-06, "loss": 0.4435, "step": 6852 }, { "epoch": 0.41117177656446874, "grad_norm": 1.1753060817718506, "learning_rate": 4.655288836816318e-06, "loss": 0.4354, "step": 6853 }, { "epoch": 0.41123177536449274, "grad_norm": 1.2437635660171509, "learning_rate": 4.654646770269194e-06, "loss": 0.4033, "step": 6854 }, { "epoch": 0.41129177416451673, "grad_norm": 1.467993974685669, "learning_rate": 4.654004660116322e-06, "loss": 0.4828, "step": 6855 }, { "epoch": 0.41135177296454073, "grad_norm": 1.192525863647461, "learning_rate": 4.653362506381952e-06, "loss": 0.3821, "step": 6856 }, { "epoch": 0.4114117717645647, "grad_norm": 1.2802101373672485, "learning_rate": 4.6527203090903355e-06, "loss": 0.4176, "step": 6857 }, { "epoch": 0.4114717705645887, "grad_norm": 1.2133504152297974, "learning_rate": 4.652078068265723e-06, "loss": 0.4153, "step": 6858 }, { "epoch": 0.4115317693646127, "grad_norm": 1.2936781644821167, "learning_rate": 4.6514357839323715e-06, "loss": 0.3773, "step": 6859 }, { "epoch": 0.4115917681646367, "grad_norm": 1.3438634872436523, "learning_rate": 4.650793456114537e-06, "loss": 0.4203, "step": 6860 }, { "epoch": 0.4116517669646607, "grad_norm": 1.3400627374649048, "learning_rate": 4.6501510848364755e-06, "loss": 0.4489, "step": 6861 }, { "epoch": 0.4117117657646847, "grad_norm": 1.280337929725647, "learning_rate": 4.6495086701224475e-06, "loss": 0.458, "step": 6862 }, { "epoch": 0.4117717645647087, "grad_norm": 1.2661868333816528, "learning_rate": 4.648866211996716e-06, "loss": 0.431, "step": 6863 }, { "epoch": 0.4118317633647327, "grad_norm": 1.2847506999969482, "learning_rate": 4.64822371048354e-06, "loss": 0.3696, "step": 6864 }, { "epoch": 0.4118917621647567, "grad_norm": 1.2194018363952637, "learning_rate": 4.6475811656071875e-06, "loss": 0.4384, "step": 6865 }, { "epoch": 0.4119517609647807, "grad_norm": 1.1719850301742554, "learning_rate": 4.646938577391921e-06, "loss": 0.4201, "step": 6866 }, { "epoch": 0.4120117597648047, "grad_norm": 1.4161887168884277, "learning_rate": 4.646295945862011e-06, "loss": 0.4846, "step": 6867 }, { "epoch": 0.4120717585648287, "grad_norm": 1.3957704305648804, "learning_rate": 4.645653271041725e-06, "loss": 0.4149, "step": 6868 }, { "epoch": 0.4121317573648527, "grad_norm": 1.1212981939315796, "learning_rate": 4.645010552955335e-06, "loss": 0.4463, "step": 6869 }, { "epoch": 0.4121917561648767, "grad_norm": 1.1340885162353516, "learning_rate": 4.644367791627114e-06, "loss": 0.3781, "step": 6870 }, { "epoch": 0.4122517549649007, "grad_norm": 1.2006546258926392, "learning_rate": 4.643724987081334e-06, "loss": 0.4519, "step": 6871 }, { "epoch": 0.4123117537649247, "grad_norm": 1.2360138893127441, "learning_rate": 4.643082139342271e-06, "loss": 0.4159, "step": 6872 }, { "epoch": 0.4123717525649487, "grad_norm": 1.2800334692001343, "learning_rate": 4.642439248434205e-06, "loss": 0.4536, "step": 6873 }, { "epoch": 0.4124317513649727, "grad_norm": 1.314517617225647, "learning_rate": 4.641796314381413e-06, "loss": 0.4253, "step": 6874 }, { "epoch": 0.4124917501649967, "grad_norm": 1.485022783279419, "learning_rate": 4.641153337208177e-06, "loss": 0.4285, "step": 6875 }, { "epoch": 0.4125517489650207, "grad_norm": 1.1881499290466309, "learning_rate": 4.640510316938777e-06, "loss": 0.4414, "step": 6876 }, { "epoch": 0.4126117477650447, "grad_norm": 1.279144287109375, "learning_rate": 4.6398672535975e-06, "loss": 0.3962, "step": 6877 }, { "epoch": 0.4126717465650687, "grad_norm": 1.4097521305084229, "learning_rate": 4.639224147208629e-06, "loss": 0.4991, "step": 6878 }, { "epoch": 0.4127317453650927, "grad_norm": 1.2683119773864746, "learning_rate": 4.638580997796454e-06, "loss": 0.4032, "step": 6879 }, { "epoch": 0.4127917441651167, "grad_norm": 1.24672532081604, "learning_rate": 4.637937805385261e-06, "loss": 0.4726, "step": 6880 }, { "epoch": 0.4128517429651407, "grad_norm": 1.4238479137420654, "learning_rate": 4.63729456999934e-06, "loss": 0.4358, "step": 6881 }, { "epoch": 0.4129117417651647, "grad_norm": 1.30945885181427, "learning_rate": 4.636651291662987e-06, "loss": 0.4562, "step": 6882 }, { "epoch": 0.4129717405651887, "grad_norm": 1.2722090482711792, "learning_rate": 4.636007970400492e-06, "loss": 0.4664, "step": 6883 }, { "epoch": 0.4130317393652127, "grad_norm": 1.2557538747787476, "learning_rate": 4.635364606236151e-06, "loss": 0.4173, "step": 6884 }, { "epoch": 0.4130917381652367, "grad_norm": 1.2097865343093872, "learning_rate": 4.634721199194262e-06, "loss": 0.3856, "step": 6885 }, { "epoch": 0.4131517369652607, "grad_norm": 1.2847075462341309, "learning_rate": 4.6340777492991245e-06, "loss": 0.4228, "step": 6886 }, { "epoch": 0.4132117357652847, "grad_norm": 1.087641716003418, "learning_rate": 4.6334342565750356e-06, "loss": 0.4061, "step": 6887 }, { "epoch": 0.4132717345653087, "grad_norm": 1.294228434562683, "learning_rate": 4.6327907210463e-06, "loss": 0.4158, "step": 6888 }, { "epoch": 0.4133317333653327, "grad_norm": 1.1914385557174683, "learning_rate": 4.6321471427372196e-06, "loss": 0.4268, "step": 6889 }, { "epoch": 0.4133917321653567, "grad_norm": 1.4809074401855469, "learning_rate": 4.631503521672098e-06, "loss": 0.4686, "step": 6890 }, { "epoch": 0.4134517309653807, "grad_norm": 1.2802809476852417, "learning_rate": 4.6308598578752455e-06, "loss": 0.4438, "step": 6891 }, { "epoch": 0.4135117297654047, "grad_norm": 1.3278820514678955, "learning_rate": 4.630216151370967e-06, "loss": 0.4646, "step": 6892 }, { "epoch": 0.4135717285654287, "grad_norm": 1.158747911453247, "learning_rate": 4.629572402183574e-06, "loss": 0.4398, "step": 6893 }, { "epoch": 0.4136317273654527, "grad_norm": 1.2142632007598877, "learning_rate": 4.628928610337378e-06, "loss": 0.4417, "step": 6894 }, { "epoch": 0.41369172616547667, "grad_norm": 1.2478805780410767, "learning_rate": 4.628284775856692e-06, "loss": 0.3753, "step": 6895 }, { "epoch": 0.41375172496550067, "grad_norm": 1.1233317852020264, "learning_rate": 4.627640898765831e-06, "loss": 0.414, "step": 6896 }, { "epoch": 0.41381172376552466, "grad_norm": 1.2253727912902832, "learning_rate": 4.626996979089109e-06, "loss": 0.4657, "step": 6897 }, { "epoch": 0.4138717225655487, "grad_norm": 1.3560940027236938, "learning_rate": 4.626353016850847e-06, "loss": 0.4299, "step": 6898 }, { "epoch": 0.4139317213655727, "grad_norm": 1.2164193391799927, "learning_rate": 4.625709012075363e-06, "loss": 0.4036, "step": 6899 }, { "epoch": 0.4139917201655967, "grad_norm": 1.4054094552993774, "learning_rate": 4.625064964786979e-06, "loss": 0.4648, "step": 6900 }, { "epoch": 0.4140517189656207, "grad_norm": 1.2335196733474731, "learning_rate": 4.624420875010015e-06, "loss": 0.3492, "step": 6901 }, { "epoch": 0.4141117177656447, "grad_norm": 1.279653549194336, "learning_rate": 4.6237767427687995e-06, "loss": 0.372, "step": 6902 }, { "epoch": 0.4141717165656687, "grad_norm": 1.1679229736328125, "learning_rate": 4.6231325680876554e-06, "loss": 0.4013, "step": 6903 }, { "epoch": 0.4142317153656927, "grad_norm": 1.3786200284957886, "learning_rate": 4.6224883509909105e-06, "loss": 0.4466, "step": 6904 }, { "epoch": 0.4142917141657167, "grad_norm": 1.2860559225082397, "learning_rate": 4.621844091502896e-06, "loss": 0.4723, "step": 6905 }, { "epoch": 0.4143517129657407, "grad_norm": 1.388850450515747, "learning_rate": 4.62119978964794e-06, "loss": 0.4391, "step": 6906 }, { "epoch": 0.4144117117657647, "grad_norm": 1.3848625421524048, "learning_rate": 4.620555445450377e-06, "loss": 0.4315, "step": 6907 }, { "epoch": 0.4144717105657887, "grad_norm": 1.285975694656372, "learning_rate": 4.6199110589345405e-06, "loss": 0.3813, "step": 6908 }, { "epoch": 0.4145317093658127, "grad_norm": 1.29606032371521, "learning_rate": 4.619266630124765e-06, "loss": 0.4176, "step": 6909 }, { "epoch": 0.4145917081658367, "grad_norm": 1.3293631076812744, "learning_rate": 4.618622159045388e-06, "loss": 0.4128, "step": 6910 }, { "epoch": 0.4146517069658607, "grad_norm": 1.193903923034668, "learning_rate": 4.6179776457207494e-06, "loss": 0.4104, "step": 6911 }, { "epoch": 0.4147117057658847, "grad_norm": 1.260405421257019, "learning_rate": 4.617333090175187e-06, "loss": 0.395, "step": 6912 }, { "epoch": 0.41477170456590867, "grad_norm": 1.1010258197784424, "learning_rate": 4.616688492433045e-06, "loss": 0.3902, "step": 6913 }, { "epoch": 0.41483170336593267, "grad_norm": 1.217969298362732, "learning_rate": 4.616043852518667e-06, "loss": 0.3974, "step": 6914 }, { "epoch": 0.41489170216595667, "grad_norm": 1.2050470113754272, "learning_rate": 4.6153991704563975e-06, "loss": 0.4516, "step": 6915 }, { "epoch": 0.41495170096598066, "grad_norm": 1.3392277956008911, "learning_rate": 4.614754446270582e-06, "loss": 0.4551, "step": 6916 }, { "epoch": 0.41501169976600466, "grad_norm": 1.3935531377792358, "learning_rate": 4.61410967998557e-06, "loss": 0.4726, "step": 6917 }, { "epoch": 0.41507169856602866, "grad_norm": 1.1516731977462769, "learning_rate": 4.613464871625712e-06, "loss": 0.3908, "step": 6918 }, { "epoch": 0.41513169736605265, "grad_norm": 1.262574553489685, "learning_rate": 4.612820021215358e-06, "loss": 0.4116, "step": 6919 }, { "epoch": 0.41519169616607665, "grad_norm": 1.1003071069717407, "learning_rate": 4.612175128778862e-06, "loss": 0.4211, "step": 6920 }, { "epoch": 0.4152516949661007, "grad_norm": 1.3572235107421875, "learning_rate": 4.611530194340579e-06, "loss": 0.4465, "step": 6921 }, { "epoch": 0.4153116937661247, "grad_norm": 1.1272300481796265, "learning_rate": 4.610885217924864e-06, "loss": 0.4085, "step": 6922 }, { "epoch": 0.4153716925661487, "grad_norm": 1.2828948497772217, "learning_rate": 4.6102401995560764e-06, "loss": 0.379, "step": 6923 }, { "epoch": 0.4154316913661727, "grad_norm": 1.2081608772277832, "learning_rate": 4.609595139258574e-06, "loss": 0.4051, "step": 6924 }, { "epoch": 0.4154916901661967, "grad_norm": 1.1499145030975342, "learning_rate": 4.608950037056718e-06, "loss": 0.4156, "step": 6925 }, { "epoch": 0.4155516889662207, "grad_norm": 1.3253625631332397, "learning_rate": 4.608304892974871e-06, "loss": 0.4418, "step": 6926 }, { "epoch": 0.4156116877662447, "grad_norm": 1.3614311218261719, "learning_rate": 4.6076597070373995e-06, "loss": 0.4158, "step": 6927 }, { "epoch": 0.4156716865662687, "grad_norm": 1.3025600910186768, "learning_rate": 4.607014479268667e-06, "loss": 0.4747, "step": 6928 }, { "epoch": 0.4157316853662927, "grad_norm": 1.3109345436096191, "learning_rate": 4.606369209693041e-06, "loss": 0.4071, "step": 6929 }, { "epoch": 0.4157916841663167, "grad_norm": 1.2860631942749023, "learning_rate": 4.60572389833489e-06, "loss": 0.4014, "step": 6930 }, { "epoch": 0.4158516829663407, "grad_norm": 1.1672693490982056, "learning_rate": 4.605078545218585e-06, "loss": 0.3992, "step": 6931 }, { "epoch": 0.41591168176636467, "grad_norm": 1.5412358045578003, "learning_rate": 4.604433150368499e-06, "loss": 0.4404, "step": 6932 }, { "epoch": 0.41597168056638867, "grad_norm": 1.212924838066101, "learning_rate": 4.603787713809003e-06, "loss": 0.4044, "step": 6933 }, { "epoch": 0.41603167936641267, "grad_norm": 1.2088202238082886, "learning_rate": 4.603142235564476e-06, "loss": 0.3675, "step": 6934 }, { "epoch": 0.41609167816643666, "grad_norm": 1.1916608810424805, "learning_rate": 4.602496715659291e-06, "loss": 0.4154, "step": 6935 }, { "epoch": 0.41615167696646066, "grad_norm": 1.1499851942062378, "learning_rate": 4.601851154117829e-06, "loss": 0.3938, "step": 6936 }, { "epoch": 0.41621167576648466, "grad_norm": 1.1511842012405396, "learning_rate": 4.601205550964467e-06, "loss": 0.3893, "step": 6937 }, { "epoch": 0.41627167456650865, "grad_norm": 1.3410736322402954, "learning_rate": 4.600559906223591e-06, "loss": 0.4347, "step": 6938 }, { "epoch": 0.41633167336653265, "grad_norm": 1.3880679607391357, "learning_rate": 4.59991421991958e-06, "loss": 0.3902, "step": 6939 }, { "epoch": 0.41639167216655665, "grad_norm": 1.2676023244857788, "learning_rate": 4.5992684920768214e-06, "loss": 0.3884, "step": 6940 }, { "epoch": 0.41645167096658064, "grad_norm": 1.36931574344635, "learning_rate": 4.598622722719699e-06, "loss": 0.4025, "step": 6941 }, { "epoch": 0.41651166976660464, "grad_norm": 1.3140844106674194, "learning_rate": 4.597976911872603e-06, "loss": 0.4219, "step": 6942 }, { "epoch": 0.4165716685666287, "grad_norm": 1.1880542039871216, "learning_rate": 4.5973310595599186e-06, "loss": 0.3975, "step": 6943 }, { "epoch": 0.4166316673666527, "grad_norm": 1.274094820022583, "learning_rate": 4.5966851658060415e-06, "loss": 0.4168, "step": 6944 }, { "epoch": 0.4166916661666767, "grad_norm": 1.3740907907485962, "learning_rate": 4.596039230635362e-06, "loss": 0.4139, "step": 6945 }, { "epoch": 0.4167516649667007, "grad_norm": 1.3108640909194946, "learning_rate": 4.595393254072274e-06, "loss": 0.3828, "step": 6946 }, { "epoch": 0.4168116637667247, "grad_norm": 1.177433729171753, "learning_rate": 4.594747236141174e-06, "loss": 0.4068, "step": 6947 }, { "epoch": 0.4168716625667487, "grad_norm": 1.2124651670455933, "learning_rate": 4.594101176866459e-06, "loss": 0.4347, "step": 6948 }, { "epoch": 0.4169316613667727, "grad_norm": 1.200891375541687, "learning_rate": 4.5934550762725255e-06, "loss": 0.3841, "step": 6949 }, { "epoch": 0.4169916601667967, "grad_norm": 1.196914792060852, "learning_rate": 4.592808934383776e-06, "loss": 0.4059, "step": 6950 }, { "epoch": 0.41705165896682067, "grad_norm": 1.3658283948898315, "learning_rate": 4.592162751224612e-06, "loss": 0.4218, "step": 6951 }, { "epoch": 0.41711165776684467, "grad_norm": 1.2403243780136108, "learning_rate": 4.591516526819438e-06, "loss": 0.4271, "step": 6952 }, { "epoch": 0.41717165656686866, "grad_norm": 1.345669150352478, "learning_rate": 4.590870261192657e-06, "loss": 0.4258, "step": 6953 }, { "epoch": 0.41723165536689266, "grad_norm": 1.4353493452072144, "learning_rate": 4.590223954368675e-06, "loss": 0.4495, "step": 6954 }, { "epoch": 0.41729165416691666, "grad_norm": 1.3421111106872559, "learning_rate": 4.589577606371903e-06, "loss": 0.4368, "step": 6955 }, { "epoch": 0.41735165296694066, "grad_norm": 1.2520394325256348, "learning_rate": 4.5889312172267485e-06, "loss": 0.3895, "step": 6956 }, { "epoch": 0.41741165176696465, "grad_norm": 1.3265752792358398, "learning_rate": 4.588284786957623e-06, "loss": 0.4022, "step": 6957 }, { "epoch": 0.41747165056698865, "grad_norm": 1.258838415145874, "learning_rate": 4.587638315588939e-06, "loss": 0.4154, "step": 6958 }, { "epoch": 0.41753164936701265, "grad_norm": 1.2169930934906006, "learning_rate": 4.5869918031451115e-06, "loss": 0.409, "step": 6959 }, { "epoch": 0.41759164816703664, "grad_norm": 1.3703104257583618, "learning_rate": 4.586345249650554e-06, "loss": 0.4349, "step": 6960 }, { "epoch": 0.41765164696706064, "grad_norm": 1.4336767196655273, "learning_rate": 4.585698655129688e-06, "loss": 0.4366, "step": 6961 }, { "epoch": 0.41771164576708464, "grad_norm": 1.2345865964889526, "learning_rate": 4.58505201960693e-06, "loss": 0.4152, "step": 6962 }, { "epoch": 0.41777164456710864, "grad_norm": 1.2672810554504395, "learning_rate": 4.5844053431067e-06, "loss": 0.4047, "step": 6963 }, { "epoch": 0.41783164336713263, "grad_norm": 1.1844427585601807, "learning_rate": 4.583758625653422e-06, "loss": 0.4071, "step": 6964 }, { "epoch": 0.41789164216715663, "grad_norm": 1.3040668964385986, "learning_rate": 4.5831118672715174e-06, "loss": 0.4078, "step": 6965 }, { "epoch": 0.4179516409671807, "grad_norm": 1.1634697914123535, "learning_rate": 4.582465067985411e-06, "loss": 0.383, "step": 6966 }, { "epoch": 0.4180116397672047, "grad_norm": 1.2521946430206299, "learning_rate": 4.581818227819532e-06, "loss": 0.4237, "step": 6967 }, { "epoch": 0.4180716385672287, "grad_norm": 1.315093755722046, "learning_rate": 4.581171346798307e-06, "loss": 0.4351, "step": 6968 }, { "epoch": 0.4181316373672527, "grad_norm": 1.1431654691696167, "learning_rate": 4.580524424946167e-06, "loss": 0.3773, "step": 6969 }, { "epoch": 0.41819163616727667, "grad_norm": 1.128592848777771, "learning_rate": 4.579877462287542e-06, "loss": 0.4431, "step": 6970 }, { "epoch": 0.41825163496730067, "grad_norm": 1.280312418937683, "learning_rate": 4.579230458846864e-06, "loss": 0.461, "step": 6971 }, { "epoch": 0.41831163376732466, "grad_norm": 1.2784833908081055, "learning_rate": 4.578583414648569e-06, "loss": 0.4041, "step": 6972 }, { "epoch": 0.41837163256734866, "grad_norm": 1.2555549144744873, "learning_rate": 4.577936329717094e-06, "loss": 0.4562, "step": 6973 }, { "epoch": 0.41843163136737266, "grad_norm": 1.174172043800354, "learning_rate": 4.577289204076872e-06, "loss": 0.4109, "step": 6974 }, { "epoch": 0.41849163016739666, "grad_norm": 1.211807131767273, "learning_rate": 4.576642037752347e-06, "loss": 0.4438, "step": 6975 }, { "epoch": 0.41855162896742065, "grad_norm": 1.2550908327102661, "learning_rate": 4.5759948307679565e-06, "loss": 0.4388, "step": 6976 }, { "epoch": 0.41861162776744465, "grad_norm": 1.26236093044281, "learning_rate": 4.575347583148145e-06, "loss": 0.4365, "step": 6977 }, { "epoch": 0.41867162656746865, "grad_norm": 1.133923888206482, "learning_rate": 4.574700294917353e-06, "loss": 0.3727, "step": 6978 }, { "epoch": 0.41873162536749264, "grad_norm": 1.3106869459152222, "learning_rate": 4.574052966100027e-06, "loss": 0.4607, "step": 6979 }, { "epoch": 0.41879162416751664, "grad_norm": 1.1824331283569336, "learning_rate": 4.573405596720615e-06, "loss": 0.4276, "step": 6980 }, { "epoch": 0.41885162296754064, "grad_norm": 1.3177144527435303, "learning_rate": 4.572758186803563e-06, "loss": 0.4259, "step": 6981 }, { "epoch": 0.41891162176756463, "grad_norm": 1.3680956363677979, "learning_rate": 4.572110736373323e-06, "loss": 0.4264, "step": 6982 }, { "epoch": 0.41897162056758863, "grad_norm": 1.2828044891357422, "learning_rate": 4.571463245454344e-06, "loss": 0.3694, "step": 6983 }, { "epoch": 0.41903161936761263, "grad_norm": 1.245177984237671, "learning_rate": 4.57081571407108e-06, "loss": 0.377, "step": 6984 }, { "epoch": 0.4190916181676366, "grad_norm": 1.347421646118164, "learning_rate": 4.570168142247985e-06, "loss": 0.4364, "step": 6985 }, { "epoch": 0.4191516169676606, "grad_norm": 1.4374330043792725, "learning_rate": 4.5695205300095145e-06, "loss": 0.4187, "step": 6986 }, { "epoch": 0.4192116157676846, "grad_norm": 1.5300278663635254, "learning_rate": 4.5688728773801265e-06, "loss": 0.4314, "step": 6987 }, { "epoch": 0.4192716145677087, "grad_norm": 1.404836893081665, "learning_rate": 4.568225184384281e-06, "loss": 0.4626, "step": 6988 }, { "epoch": 0.41933161336773267, "grad_norm": 1.4465337991714478, "learning_rate": 4.567577451046436e-06, "loss": 0.451, "step": 6989 }, { "epoch": 0.41939161216775667, "grad_norm": 1.2141631841659546, "learning_rate": 4.566929677391054e-06, "loss": 0.4132, "step": 6990 }, { "epoch": 0.41945161096778066, "grad_norm": 1.3845633268356323, "learning_rate": 4.5662818634426e-06, "loss": 0.377, "step": 6991 }, { "epoch": 0.41951160976780466, "grad_norm": 1.2977616786956787, "learning_rate": 4.565634009225539e-06, "loss": 0.4524, "step": 6992 }, { "epoch": 0.41957160856782866, "grad_norm": 1.3012006282806396, "learning_rate": 4.5649861147643335e-06, "loss": 0.4141, "step": 6993 }, { "epoch": 0.41963160736785265, "grad_norm": 1.1135660409927368, "learning_rate": 4.564338180083456e-06, "loss": 0.3684, "step": 6994 }, { "epoch": 0.41969160616787665, "grad_norm": 1.1603950262069702, "learning_rate": 4.563690205207374e-06, "loss": 0.4015, "step": 6995 }, { "epoch": 0.41975160496790065, "grad_norm": 1.4458824396133423, "learning_rate": 4.56304219016056e-06, "loss": 0.4175, "step": 6996 }, { "epoch": 0.41981160376792465, "grad_norm": 1.3088388442993164, "learning_rate": 4.562394134967487e-06, "loss": 0.4057, "step": 6997 }, { "epoch": 0.41987160256794864, "grad_norm": 1.3601535558700562, "learning_rate": 4.561746039652626e-06, "loss": 0.4458, "step": 6998 }, { "epoch": 0.41993160136797264, "grad_norm": 1.2343262434005737, "learning_rate": 4.561097904240454e-06, "loss": 0.4, "step": 6999 }, { "epoch": 0.41999160016799664, "grad_norm": 1.4086016416549683, "learning_rate": 4.56044972875545e-06, "loss": 0.4326, "step": 7000 }, { "epoch": 0.42005159896802063, "grad_norm": 1.2081267833709717, "learning_rate": 4.55980151322209e-06, "loss": 0.4202, "step": 7001 }, { "epoch": 0.42011159776804463, "grad_norm": 1.2943272590637207, "learning_rate": 4.559153257664855e-06, "loss": 0.3856, "step": 7002 }, { "epoch": 0.42017159656806863, "grad_norm": 1.2984780073165894, "learning_rate": 4.5585049621082284e-06, "loss": 0.4334, "step": 7003 }, { "epoch": 0.4202315953680926, "grad_norm": 1.7530003786087036, "learning_rate": 4.5578566265766905e-06, "loss": 0.406, "step": 7004 }, { "epoch": 0.4202915941681166, "grad_norm": 1.197136402130127, "learning_rate": 4.557208251094729e-06, "loss": 0.3684, "step": 7005 }, { "epoch": 0.4203515929681406, "grad_norm": 1.1939268112182617, "learning_rate": 4.5565598356868275e-06, "loss": 0.4013, "step": 7006 }, { "epoch": 0.4204115917681646, "grad_norm": 1.263681173324585, "learning_rate": 4.555911380377474e-06, "loss": 0.41, "step": 7007 }, { "epoch": 0.4204715905681886, "grad_norm": 1.2946295738220215, "learning_rate": 4.5552628851911594e-06, "loss": 0.4252, "step": 7008 }, { "epoch": 0.4205315893682126, "grad_norm": 1.3288758993148804, "learning_rate": 4.554614350152374e-06, "loss": 0.4018, "step": 7009 }, { "epoch": 0.4205915881682366, "grad_norm": 1.2979702949523926, "learning_rate": 4.553965775285607e-06, "loss": 0.3854, "step": 7010 }, { "epoch": 0.42065158696826066, "grad_norm": 1.3939639329910278, "learning_rate": 4.553317160615356e-06, "loss": 0.44, "step": 7011 }, { "epoch": 0.42071158576828466, "grad_norm": 1.2410892248153687, "learning_rate": 4.552668506166115e-06, "loss": 0.429, "step": 7012 }, { "epoch": 0.42077158456830865, "grad_norm": 1.279802918434143, "learning_rate": 4.55201981196238e-06, "loss": 0.4203, "step": 7013 }, { "epoch": 0.42083158336833265, "grad_norm": 1.289430022239685, "learning_rate": 4.55137107802865e-06, "loss": 0.3917, "step": 7014 }, { "epoch": 0.42089158216835665, "grad_norm": 1.2351553440093994, "learning_rate": 4.550722304389424e-06, "loss": 0.422, "step": 7015 }, { "epoch": 0.42095158096838065, "grad_norm": 1.463940978050232, "learning_rate": 4.550073491069205e-06, "loss": 0.4312, "step": 7016 }, { "epoch": 0.42101157976840464, "grad_norm": 1.1992920637130737, "learning_rate": 4.5494246380924924e-06, "loss": 0.4113, "step": 7017 }, { "epoch": 0.42107157856842864, "grad_norm": 1.2493441104888916, "learning_rate": 4.548775745483793e-06, "loss": 0.4201, "step": 7018 }, { "epoch": 0.42113157736845264, "grad_norm": 1.220799446105957, "learning_rate": 4.548126813267612e-06, "loss": 0.4028, "step": 7019 }, { "epoch": 0.42119157616847663, "grad_norm": 1.2484105825424194, "learning_rate": 4.547477841468457e-06, "loss": 0.4048, "step": 7020 }, { "epoch": 0.42125157496850063, "grad_norm": 1.2189148664474487, "learning_rate": 4.546828830110837e-06, "loss": 0.4001, "step": 7021 }, { "epoch": 0.4213115737685246, "grad_norm": 1.1699573993682861, "learning_rate": 4.546179779219259e-06, "loss": 0.4028, "step": 7022 }, { "epoch": 0.4213715725685486, "grad_norm": 1.1672258377075195, "learning_rate": 4.54553068881824e-06, "loss": 0.4089, "step": 7023 }, { "epoch": 0.4214315713685726, "grad_norm": 1.333325982093811, "learning_rate": 4.544881558932288e-06, "loss": 0.4209, "step": 7024 }, { "epoch": 0.4214915701685966, "grad_norm": 1.3132613897323608, "learning_rate": 4.54423238958592e-06, "loss": 0.398, "step": 7025 }, { "epoch": 0.4215515689686206, "grad_norm": 1.2260477542877197, "learning_rate": 4.543583180803654e-06, "loss": 0.4098, "step": 7026 }, { "epoch": 0.4216115677686446, "grad_norm": 1.324571132659912, "learning_rate": 4.542933932610004e-06, "loss": 0.4184, "step": 7027 }, { "epoch": 0.4216715665686686, "grad_norm": 1.3497958183288574, "learning_rate": 4.542284645029492e-06, "loss": 0.4618, "step": 7028 }, { "epoch": 0.4217315653686926, "grad_norm": 1.4192485809326172, "learning_rate": 4.541635318086637e-06, "loss": 0.4134, "step": 7029 }, { "epoch": 0.4217915641687166, "grad_norm": 1.3200056552886963, "learning_rate": 4.540985951805962e-06, "loss": 0.4571, "step": 7030 }, { "epoch": 0.4218515629687406, "grad_norm": 1.2713547945022583, "learning_rate": 4.54033654621199e-06, "loss": 0.4207, "step": 7031 }, { "epoch": 0.4219115617687646, "grad_norm": 1.3095003366470337, "learning_rate": 4.539687101329248e-06, "loss": 0.3759, "step": 7032 }, { "epoch": 0.42197156056878865, "grad_norm": 1.1636157035827637, "learning_rate": 4.539037617182259e-06, "loss": 0.3918, "step": 7033 }, { "epoch": 0.42203155936881265, "grad_norm": 1.4179095029830933, "learning_rate": 4.538388093795555e-06, "loss": 0.4979, "step": 7034 }, { "epoch": 0.42209155816883664, "grad_norm": 1.2907475233078003, "learning_rate": 4.537738531193661e-06, "loss": 0.4123, "step": 7035 }, { "epoch": 0.42215155696886064, "grad_norm": 1.4590140581130981, "learning_rate": 4.537088929401112e-06, "loss": 0.4587, "step": 7036 }, { "epoch": 0.42221155576888464, "grad_norm": 1.1836215257644653, "learning_rate": 4.53643928844244e-06, "loss": 0.3786, "step": 7037 }, { "epoch": 0.42227155456890864, "grad_norm": 1.3491311073303223, "learning_rate": 4.535789608342177e-06, "loss": 0.4022, "step": 7038 }, { "epoch": 0.42233155336893263, "grad_norm": 1.2495287656784058, "learning_rate": 4.535139889124859e-06, "loss": 0.428, "step": 7039 }, { "epoch": 0.42239155216895663, "grad_norm": 1.1992535591125488, "learning_rate": 4.534490130815024e-06, "loss": 0.4385, "step": 7040 }, { "epoch": 0.4224515509689806, "grad_norm": 1.318915605545044, "learning_rate": 4.53384033343721e-06, "loss": 0.3979, "step": 7041 }, { "epoch": 0.4225115497690046, "grad_norm": 1.1528795957565308, "learning_rate": 4.533190497015957e-06, "loss": 0.3908, "step": 7042 }, { "epoch": 0.4225715485690286, "grad_norm": 1.3050363063812256, "learning_rate": 4.5325406215758055e-06, "loss": 0.4405, "step": 7043 }, { "epoch": 0.4226315473690526, "grad_norm": 1.2900632619857788, "learning_rate": 4.5318907071413e-06, "loss": 0.3958, "step": 7044 }, { "epoch": 0.4226915461690766, "grad_norm": 1.347393274307251, "learning_rate": 4.531240753736983e-06, "loss": 0.4216, "step": 7045 }, { "epoch": 0.4227515449691006, "grad_norm": 1.2415395975112915, "learning_rate": 4.530590761387402e-06, "loss": 0.3928, "step": 7046 }, { "epoch": 0.4228115437691246, "grad_norm": 1.3214834928512573, "learning_rate": 4.529940730117101e-06, "loss": 0.4482, "step": 7047 }, { "epoch": 0.4228715425691486, "grad_norm": 1.3209282159805298, "learning_rate": 4.529290659950634e-06, "loss": 0.3788, "step": 7048 }, { "epoch": 0.4229315413691726, "grad_norm": 1.0920206308364868, "learning_rate": 4.528640550912547e-06, "loss": 0.4107, "step": 7049 }, { "epoch": 0.4229915401691966, "grad_norm": 1.2722392082214355, "learning_rate": 4.527990403027391e-06, "loss": 0.4448, "step": 7050 }, { "epoch": 0.4230515389692206, "grad_norm": 1.2645691633224487, "learning_rate": 4.527340216319724e-06, "loss": 0.4344, "step": 7051 }, { "epoch": 0.4231115377692446, "grad_norm": 1.1849792003631592, "learning_rate": 4.526689990814096e-06, "loss": 0.3739, "step": 7052 }, { "epoch": 0.4231715365692686, "grad_norm": 1.3285733461380005, "learning_rate": 4.526039726535065e-06, "loss": 0.427, "step": 7053 }, { "epoch": 0.4232315353692926, "grad_norm": 1.2882120609283447, "learning_rate": 4.525389423507187e-06, "loss": 0.4194, "step": 7054 }, { "epoch": 0.42329153416931664, "grad_norm": 1.3019872903823853, "learning_rate": 4.524739081755024e-06, "loss": 0.4582, "step": 7055 }, { "epoch": 0.42335153296934064, "grad_norm": 1.127389669418335, "learning_rate": 4.524088701303134e-06, "loss": 0.4266, "step": 7056 }, { "epoch": 0.42341153176936464, "grad_norm": 1.298167109489441, "learning_rate": 4.523438282176079e-06, "loss": 0.4281, "step": 7057 }, { "epoch": 0.42347153056938863, "grad_norm": 1.2861860990524292, "learning_rate": 4.522787824398422e-06, "loss": 0.4212, "step": 7058 }, { "epoch": 0.42353152936941263, "grad_norm": 1.1527026891708374, "learning_rate": 4.522137327994731e-06, "loss": 0.3931, "step": 7059 }, { "epoch": 0.4235915281694366, "grad_norm": 1.239312767982483, "learning_rate": 4.521486792989568e-06, "loss": 0.3823, "step": 7060 }, { "epoch": 0.4236515269694606, "grad_norm": 1.0997956991195679, "learning_rate": 4.5208362194075035e-06, "loss": 0.3925, "step": 7061 }, { "epoch": 0.4237115257694846, "grad_norm": 1.3077399730682373, "learning_rate": 4.520185607273106e-06, "loss": 0.4194, "step": 7062 }, { "epoch": 0.4237715245695086, "grad_norm": 1.2358498573303223, "learning_rate": 4.519534956610946e-06, "loss": 0.4352, "step": 7063 }, { "epoch": 0.4238315233695326, "grad_norm": 1.2687650918960571, "learning_rate": 4.518884267445596e-06, "loss": 0.409, "step": 7064 }, { "epoch": 0.4238915221695566, "grad_norm": 1.1569534540176392, "learning_rate": 4.518233539801629e-06, "loss": 0.4421, "step": 7065 }, { "epoch": 0.4239515209695806, "grad_norm": 1.2259994745254517, "learning_rate": 4.517582773703618e-06, "loss": 0.386, "step": 7066 }, { "epoch": 0.4240115197696046, "grad_norm": 1.3827476501464844, "learning_rate": 4.516931969176145e-06, "loss": 0.4149, "step": 7067 }, { "epoch": 0.4240715185696286, "grad_norm": 1.2769352197647095, "learning_rate": 4.516281126243783e-06, "loss": 0.4656, "step": 7068 }, { "epoch": 0.4241315173696526, "grad_norm": 1.1627111434936523, "learning_rate": 4.515630244931114e-06, "loss": 0.3869, "step": 7069 }, { "epoch": 0.4241915161696766, "grad_norm": 1.145633339881897, "learning_rate": 4.514979325262717e-06, "loss": 0.4168, "step": 7070 }, { "epoch": 0.4242515149697006, "grad_norm": 1.2096353769302368, "learning_rate": 4.514328367263176e-06, "loss": 0.4492, "step": 7071 }, { "epoch": 0.4243115137697246, "grad_norm": 1.2575678825378418, "learning_rate": 4.513677370957074e-06, "loss": 0.3989, "step": 7072 }, { "epoch": 0.4243715125697486, "grad_norm": 1.1611627340316772, "learning_rate": 4.513026336368994e-06, "loss": 0.4179, "step": 7073 }, { "epoch": 0.4244315113697726, "grad_norm": 1.2273608446121216, "learning_rate": 4.512375263523527e-06, "loss": 0.4128, "step": 7074 }, { "epoch": 0.4244915101697966, "grad_norm": 1.2943768501281738, "learning_rate": 4.511724152445256e-06, "loss": 0.4287, "step": 7075 }, { "epoch": 0.4245515089698206, "grad_norm": 1.2318158149719238, "learning_rate": 4.511073003158776e-06, "loss": 0.3669, "step": 7076 }, { "epoch": 0.4246115077698446, "grad_norm": 1.3033992052078247, "learning_rate": 4.5104218156886735e-06, "loss": 0.4283, "step": 7077 }, { "epoch": 0.42467150656986863, "grad_norm": 1.3580092191696167, "learning_rate": 4.509770590059543e-06, "loss": 0.413, "step": 7078 }, { "epoch": 0.4247315053698926, "grad_norm": 1.1670258045196533, "learning_rate": 4.509119326295977e-06, "loss": 0.3988, "step": 7079 }, { "epoch": 0.4247915041699166, "grad_norm": 1.2348774671554565, "learning_rate": 4.508468024422573e-06, "loss": 0.3816, "step": 7080 }, { "epoch": 0.4248515029699406, "grad_norm": 1.2750577926635742, "learning_rate": 4.507816684463925e-06, "loss": 0.4406, "step": 7081 }, { "epoch": 0.4249115017699646, "grad_norm": 1.3174508810043335, "learning_rate": 4.507165306444634e-06, "loss": 0.4687, "step": 7082 }, { "epoch": 0.4249715005699886, "grad_norm": 1.4836283922195435, "learning_rate": 4.5065138903892965e-06, "loss": 0.4665, "step": 7083 }, { "epoch": 0.4250314993700126, "grad_norm": 1.345682144165039, "learning_rate": 4.505862436322515e-06, "loss": 0.4366, "step": 7084 }, { "epoch": 0.4250914981700366, "grad_norm": 1.2208753824234009, "learning_rate": 4.505210944268893e-06, "loss": 0.4207, "step": 7085 }, { "epoch": 0.4251514969700606, "grad_norm": 1.2742825746536255, "learning_rate": 4.504559414253034e-06, "loss": 0.4014, "step": 7086 }, { "epoch": 0.4252114957700846, "grad_norm": 1.267958641052246, "learning_rate": 4.5039078462995406e-06, "loss": 0.4133, "step": 7087 }, { "epoch": 0.4252714945701086, "grad_norm": 1.1344534158706665, "learning_rate": 4.503256240433023e-06, "loss": 0.3767, "step": 7088 }, { "epoch": 0.4253314933701326, "grad_norm": 1.3558663129806519, "learning_rate": 4.502604596678086e-06, "loss": 0.454, "step": 7089 }, { "epoch": 0.4253914921701566, "grad_norm": 1.1935113668441772, "learning_rate": 4.501952915059344e-06, "loss": 0.4083, "step": 7090 }, { "epoch": 0.4254514909701806, "grad_norm": 1.199143409729004, "learning_rate": 4.501301195601403e-06, "loss": 0.4179, "step": 7091 }, { "epoch": 0.4255114897702046, "grad_norm": 1.2249327898025513, "learning_rate": 4.50064943832888e-06, "loss": 0.4184, "step": 7092 }, { "epoch": 0.4255714885702286, "grad_norm": 1.1896278858184814, "learning_rate": 4.499997643266384e-06, "loss": 0.4088, "step": 7093 }, { "epoch": 0.4256314873702526, "grad_norm": 1.164188265800476, "learning_rate": 4.499345810438536e-06, "loss": 0.4343, "step": 7094 }, { "epoch": 0.4256914861702766, "grad_norm": 1.2411973476409912, "learning_rate": 4.4986939398699485e-06, "loss": 0.4422, "step": 7095 }, { "epoch": 0.4257514849703006, "grad_norm": 1.2201446294784546, "learning_rate": 4.49804203158524e-06, "loss": 0.4173, "step": 7096 }, { "epoch": 0.4258114837703246, "grad_norm": 1.2526366710662842, "learning_rate": 4.497390085609032e-06, "loss": 0.3928, "step": 7097 }, { "epoch": 0.42587148257034857, "grad_norm": 1.380281686782837, "learning_rate": 4.496738101965944e-06, "loss": 0.4574, "step": 7098 }, { "epoch": 0.42593148137037257, "grad_norm": 1.2759040594100952, "learning_rate": 4.496086080680599e-06, "loss": 0.38, "step": 7099 }, { "epoch": 0.4259914801703966, "grad_norm": 1.1495447158813477, "learning_rate": 4.495434021777623e-06, "loss": 0.388, "step": 7100 }, { "epoch": 0.4260514789704206, "grad_norm": 1.3438689708709717, "learning_rate": 4.494781925281637e-06, "loss": 0.4016, "step": 7101 }, { "epoch": 0.4261114777704446, "grad_norm": 1.205381989479065, "learning_rate": 4.494129791217269e-06, "loss": 0.385, "step": 7102 }, { "epoch": 0.4261714765704686, "grad_norm": 1.0562995672225952, "learning_rate": 4.49347761960915e-06, "loss": 0.3419, "step": 7103 }, { "epoch": 0.4262314753704926, "grad_norm": 1.2792693376541138, "learning_rate": 4.492825410481907e-06, "loss": 0.3875, "step": 7104 }, { "epoch": 0.4262914741705166, "grad_norm": 1.2552975416183472, "learning_rate": 4.492173163860172e-06, "loss": 0.3781, "step": 7105 }, { "epoch": 0.4263514729705406, "grad_norm": 1.2097277641296387, "learning_rate": 4.491520879768576e-06, "loss": 0.3869, "step": 7106 }, { "epoch": 0.4264114717705646, "grad_norm": 1.2892522811889648, "learning_rate": 4.4908685582317534e-06, "loss": 0.4488, "step": 7107 }, { "epoch": 0.4264714705705886, "grad_norm": 1.174263596534729, "learning_rate": 4.49021619927434e-06, "loss": 0.416, "step": 7108 }, { "epoch": 0.4265314693706126, "grad_norm": 1.3745006322860718, "learning_rate": 4.489563802920974e-06, "loss": 0.4498, "step": 7109 }, { "epoch": 0.4265914681706366, "grad_norm": 1.1694449186325073, "learning_rate": 4.488911369196289e-06, "loss": 0.4426, "step": 7110 }, { "epoch": 0.4266514669706606, "grad_norm": 1.1337047815322876, "learning_rate": 4.488258898124929e-06, "loss": 0.3703, "step": 7111 }, { "epoch": 0.4267114657706846, "grad_norm": 1.2030527591705322, "learning_rate": 4.487606389731531e-06, "loss": 0.3959, "step": 7112 }, { "epoch": 0.4267714645707086, "grad_norm": 1.354300618171692, "learning_rate": 4.486953844040741e-06, "loss": 0.4151, "step": 7113 }, { "epoch": 0.4268314633707326, "grad_norm": 1.392881155014038, "learning_rate": 4.4863012610772e-06, "loss": 0.4381, "step": 7114 }, { "epoch": 0.4268914621707566, "grad_norm": 1.3726438283920288, "learning_rate": 4.485648640865555e-06, "loss": 0.4997, "step": 7115 }, { "epoch": 0.42695146097078057, "grad_norm": 1.4283310174942017, "learning_rate": 4.484995983430451e-06, "loss": 0.4852, "step": 7116 }, { "epoch": 0.42701145977080457, "grad_norm": 1.270291805267334, "learning_rate": 4.484343288796535e-06, "loss": 0.4354, "step": 7117 }, { "epoch": 0.42707145857082857, "grad_norm": 1.220757007598877, "learning_rate": 4.483690556988459e-06, "loss": 0.4339, "step": 7118 }, { "epoch": 0.42713145737085256, "grad_norm": 1.2788770198822021, "learning_rate": 4.483037788030871e-06, "loss": 0.4366, "step": 7119 }, { "epoch": 0.42719145617087656, "grad_norm": 1.101792335510254, "learning_rate": 4.482384981948426e-06, "loss": 0.4105, "step": 7120 }, { "epoch": 0.42725145497090056, "grad_norm": 1.1616897583007812, "learning_rate": 4.4817321387657755e-06, "loss": 0.409, "step": 7121 }, { "epoch": 0.42731145377092455, "grad_norm": 1.2301273345947266, "learning_rate": 4.481079258507575e-06, "loss": 0.3722, "step": 7122 }, { "epoch": 0.4273714525709486, "grad_norm": 1.1994671821594238, "learning_rate": 4.480426341198481e-06, "loss": 0.398, "step": 7123 }, { "epoch": 0.4274314513709726, "grad_norm": 1.223324179649353, "learning_rate": 4.479773386863149e-06, "loss": 0.4126, "step": 7124 }, { "epoch": 0.4274914501709966, "grad_norm": 1.2651634216308594, "learning_rate": 4.479120395526242e-06, "loss": 0.4414, "step": 7125 }, { "epoch": 0.4275514489710206, "grad_norm": 1.282235860824585, "learning_rate": 4.478467367212418e-06, "loss": 0.413, "step": 7126 }, { "epoch": 0.4276114477710446, "grad_norm": 1.276961088180542, "learning_rate": 4.47781430194634e-06, "loss": 0.461, "step": 7127 }, { "epoch": 0.4276714465710686, "grad_norm": 1.2151039838790894, "learning_rate": 4.47716119975267e-06, "loss": 0.43, "step": 7128 }, { "epoch": 0.4277314453710926, "grad_norm": 1.1382715702056885, "learning_rate": 4.476508060656073e-06, "loss": 0.3969, "step": 7129 }, { "epoch": 0.4277914441711166, "grad_norm": 1.3212072849273682, "learning_rate": 4.475854884681216e-06, "loss": 0.4243, "step": 7130 }, { "epoch": 0.4278514429711406, "grad_norm": 1.2035746574401855, "learning_rate": 4.4752016718527654e-06, "loss": 0.43, "step": 7131 }, { "epoch": 0.4279114417711646, "grad_norm": 1.271755337715149, "learning_rate": 4.474548422195392e-06, "loss": 0.3735, "step": 7132 }, { "epoch": 0.4279714405711886, "grad_norm": 1.1893231868743896, "learning_rate": 4.473895135733763e-06, "loss": 0.3945, "step": 7133 }, { "epoch": 0.4280314393712126, "grad_norm": 1.3792731761932373, "learning_rate": 4.473241812492553e-06, "loss": 0.4101, "step": 7134 }, { "epoch": 0.42809143817123657, "grad_norm": 1.4833683967590332, "learning_rate": 4.4725884524964335e-06, "loss": 0.4577, "step": 7135 }, { "epoch": 0.42815143697126057, "grad_norm": 1.5047142505645752, "learning_rate": 4.471935055770079e-06, "loss": 0.4725, "step": 7136 }, { "epoch": 0.42821143577128457, "grad_norm": 1.3003947734832764, "learning_rate": 4.4712816223381655e-06, "loss": 0.465, "step": 7137 }, { "epoch": 0.42827143457130856, "grad_norm": 1.1005498170852661, "learning_rate": 4.470628152225371e-06, "loss": 0.455, "step": 7138 }, { "epoch": 0.42833143337133256, "grad_norm": 1.3971333503723145, "learning_rate": 4.4699746454563735e-06, "loss": 0.4017, "step": 7139 }, { "epoch": 0.42839143217135656, "grad_norm": 1.3032981157302856, "learning_rate": 4.469321102055853e-06, "loss": 0.4285, "step": 7140 }, { "epoch": 0.42845143097138055, "grad_norm": 1.2862316370010376, "learning_rate": 4.468667522048489e-06, "loss": 0.4637, "step": 7141 }, { "epoch": 0.42851142977140455, "grad_norm": 1.2942190170288086, "learning_rate": 4.468013905458967e-06, "loss": 0.4118, "step": 7142 }, { "epoch": 0.42857142857142855, "grad_norm": 1.2813955545425415, "learning_rate": 4.467360252311971e-06, "loss": 0.457, "step": 7143 }, { "epoch": 0.42863142737145254, "grad_norm": 1.2647191286087036, "learning_rate": 4.466706562632184e-06, "loss": 0.4176, "step": 7144 }, { "epoch": 0.4286914261714766, "grad_norm": 1.2970794439315796, "learning_rate": 4.466052836444294e-06, "loss": 0.4407, "step": 7145 }, { "epoch": 0.4287514249715006, "grad_norm": 1.3251551389694214, "learning_rate": 4.465399073772991e-06, "loss": 0.4405, "step": 7146 }, { "epoch": 0.4288114237715246, "grad_norm": 1.2506170272827148, "learning_rate": 4.4647452746429636e-06, "loss": 0.4272, "step": 7147 }, { "epoch": 0.4288714225715486, "grad_norm": 1.4067450761795044, "learning_rate": 4.464091439078902e-06, "loss": 0.4931, "step": 7148 }, { "epoch": 0.4289314213715726, "grad_norm": 1.3547435998916626, "learning_rate": 4.4634375671054996e-06, "loss": 0.4308, "step": 7149 }, { "epoch": 0.4289914201715966, "grad_norm": 1.3386468887329102, "learning_rate": 4.462783658747449e-06, "loss": 0.3986, "step": 7150 }, { "epoch": 0.4290514189716206, "grad_norm": 1.2172839641571045, "learning_rate": 4.462129714029446e-06, "loss": 0.4058, "step": 7151 }, { "epoch": 0.4291114177716446, "grad_norm": 1.3515655994415283, "learning_rate": 4.461475732976188e-06, "loss": 0.421, "step": 7152 }, { "epoch": 0.4291714165716686, "grad_norm": 1.1500974893569946, "learning_rate": 4.46082171561237e-06, "loss": 0.4149, "step": 7153 }, { "epoch": 0.42923141537169257, "grad_norm": 1.2171508073806763, "learning_rate": 4.460167661962695e-06, "loss": 0.4024, "step": 7154 }, { "epoch": 0.42929141417171657, "grad_norm": 1.1418724060058594, "learning_rate": 4.459513572051862e-06, "loss": 0.3956, "step": 7155 }, { "epoch": 0.42935141297174056, "grad_norm": 1.256022572517395, "learning_rate": 4.458859445904572e-06, "loss": 0.4301, "step": 7156 }, { "epoch": 0.42941141177176456, "grad_norm": 1.2765966653823853, "learning_rate": 4.458205283545531e-06, "loss": 0.471, "step": 7157 }, { "epoch": 0.42947141057178856, "grad_norm": 1.3053985834121704, "learning_rate": 4.45755108499944e-06, "loss": 0.4322, "step": 7158 }, { "epoch": 0.42953140937181256, "grad_norm": 1.2188800573349, "learning_rate": 4.456896850291008e-06, "loss": 0.4233, "step": 7159 }, { "epoch": 0.42959140817183655, "grad_norm": 1.3314223289489746, "learning_rate": 4.4562425794449415e-06, "loss": 0.3669, "step": 7160 }, { "epoch": 0.42965140697186055, "grad_norm": 1.4238495826721191, "learning_rate": 4.45558827248595e-06, "loss": 0.4745, "step": 7161 }, { "epoch": 0.42971140577188455, "grad_norm": 1.2532403469085693, "learning_rate": 4.454933929438742e-06, "loss": 0.4185, "step": 7162 }, { "epoch": 0.42977140457190854, "grad_norm": 1.3171799182891846, "learning_rate": 4.454279550328031e-06, "loss": 0.4424, "step": 7163 }, { "epoch": 0.42983140337193254, "grad_norm": 1.401959776878357, "learning_rate": 4.453625135178529e-06, "loss": 0.4665, "step": 7164 }, { "epoch": 0.42989140217195654, "grad_norm": 1.2266008853912354, "learning_rate": 4.45297068401495e-06, "loss": 0.4341, "step": 7165 }, { "epoch": 0.42995140097198054, "grad_norm": 1.4250738620758057, "learning_rate": 4.452316196862009e-06, "loss": 0.442, "step": 7166 }, { "epoch": 0.43001139977200453, "grad_norm": 1.2919509410858154, "learning_rate": 4.451661673744425e-06, "loss": 0.3832, "step": 7167 }, { "epoch": 0.4300713985720286, "grad_norm": 1.4061609506607056, "learning_rate": 4.451007114686916e-06, "loss": 0.4249, "step": 7168 }, { "epoch": 0.4301313973720526, "grad_norm": 1.3469492197036743, "learning_rate": 4.450352519714201e-06, "loss": 0.4651, "step": 7169 }, { "epoch": 0.4301913961720766, "grad_norm": 1.144569993019104, "learning_rate": 4.449697888851e-06, "loss": 0.391, "step": 7170 }, { "epoch": 0.4302513949721006, "grad_norm": 1.4029163122177124, "learning_rate": 4.4490432221220375e-06, "loss": 0.4136, "step": 7171 }, { "epoch": 0.4303113937721246, "grad_norm": 1.264439344406128, "learning_rate": 4.448388519552036e-06, "loss": 0.3645, "step": 7172 }, { "epoch": 0.43037139257214857, "grad_norm": 1.2976880073547363, "learning_rate": 4.447733781165721e-06, "loss": 0.4463, "step": 7173 }, { "epoch": 0.43043139137217257, "grad_norm": 1.167235255241394, "learning_rate": 4.44707900698782e-06, "loss": 0.3883, "step": 7174 }, { "epoch": 0.43049139017219656, "grad_norm": 1.2801432609558105, "learning_rate": 4.446424197043058e-06, "loss": 0.4219, "step": 7175 }, { "epoch": 0.43055138897222056, "grad_norm": 1.3615316152572632, "learning_rate": 4.445769351356167e-06, "loss": 0.4137, "step": 7176 }, { "epoch": 0.43061138777224456, "grad_norm": 1.3047860860824585, "learning_rate": 4.445114469951875e-06, "loss": 0.435, "step": 7177 }, { "epoch": 0.43067138657226856, "grad_norm": 1.4179682731628418, "learning_rate": 4.4444595528549165e-06, "loss": 0.4429, "step": 7178 }, { "epoch": 0.43073138537229255, "grad_norm": 1.213926911354065, "learning_rate": 4.443804600090024e-06, "loss": 0.3938, "step": 7179 }, { "epoch": 0.43079138417231655, "grad_norm": 1.3065012693405151, "learning_rate": 4.443149611681932e-06, "loss": 0.439, "step": 7180 }, { "epoch": 0.43085138297234055, "grad_norm": 1.1940735578536987, "learning_rate": 4.442494587655376e-06, "loss": 0.4686, "step": 7181 }, { "epoch": 0.43091138177236454, "grad_norm": 1.1822017431259155, "learning_rate": 4.441839528035093e-06, "loss": 0.4487, "step": 7182 }, { "epoch": 0.43097138057238854, "grad_norm": 1.0581943988800049, "learning_rate": 4.441184432845821e-06, "loss": 0.3621, "step": 7183 }, { "epoch": 0.43103137937241254, "grad_norm": 1.1826168298721313, "learning_rate": 4.440529302112303e-06, "loss": 0.4323, "step": 7184 }, { "epoch": 0.43109137817243653, "grad_norm": 1.1252824068069458, "learning_rate": 4.439874135859277e-06, "loss": 0.4101, "step": 7185 }, { "epoch": 0.43115137697246053, "grad_norm": 1.1716735363006592, "learning_rate": 4.439218934111487e-06, "loss": 0.3672, "step": 7186 }, { "epoch": 0.43121137577248453, "grad_norm": 1.2752447128295898, "learning_rate": 4.438563696893678e-06, "loss": 0.4185, "step": 7187 }, { "epoch": 0.4312713745725085, "grad_norm": 1.3488967418670654, "learning_rate": 4.437908424230593e-06, "loss": 0.4213, "step": 7188 }, { "epoch": 0.4313313733725325, "grad_norm": 1.2178713083267212, "learning_rate": 4.437253116146979e-06, "loss": 0.4043, "step": 7189 }, { "epoch": 0.4313913721725566, "grad_norm": 1.2433416843414307, "learning_rate": 4.436597772667588e-06, "loss": 0.371, "step": 7190 }, { "epoch": 0.4314513709725806, "grad_norm": 1.2245134115219116, "learning_rate": 4.435942393817164e-06, "loss": 0.412, "step": 7191 }, { "epoch": 0.43151136977260457, "grad_norm": 1.2895135879516602, "learning_rate": 4.435286979620461e-06, "loss": 0.4881, "step": 7192 }, { "epoch": 0.43157136857262857, "grad_norm": 1.3465758562088013, "learning_rate": 4.43463153010223e-06, "loss": 0.4234, "step": 7193 }, { "epoch": 0.43163136737265256, "grad_norm": 1.1923234462738037, "learning_rate": 4.4339760452872226e-06, "loss": 0.429, "step": 7194 }, { "epoch": 0.43169136617267656, "grad_norm": 1.1084668636322021, "learning_rate": 4.4333205252001964e-06, "loss": 0.358, "step": 7195 }, { "epoch": 0.43175136497270056, "grad_norm": 1.1371513605117798, "learning_rate": 4.432664969865905e-06, "loss": 0.3948, "step": 7196 }, { "epoch": 0.43181136377272455, "grad_norm": 1.3975509405136108, "learning_rate": 4.432009379309109e-06, "loss": 0.4673, "step": 7197 }, { "epoch": 0.43187136257274855, "grad_norm": 1.1728496551513672, "learning_rate": 4.431353753554564e-06, "loss": 0.4266, "step": 7198 }, { "epoch": 0.43193136137277255, "grad_norm": 1.3408551216125488, "learning_rate": 4.430698092627031e-06, "loss": 0.449, "step": 7199 }, { "epoch": 0.43199136017279655, "grad_norm": 1.2208144664764404, "learning_rate": 4.430042396551271e-06, "loss": 0.4529, "step": 7200 }, { "epoch": 0.43205135897282054, "grad_norm": 1.351743221282959, "learning_rate": 4.429386665352047e-06, "loss": 0.4217, "step": 7201 }, { "epoch": 0.43211135777284454, "grad_norm": 1.1838176250457764, "learning_rate": 4.428730899054123e-06, "loss": 0.38, "step": 7202 }, { "epoch": 0.43217135657286854, "grad_norm": 1.1998015642166138, "learning_rate": 4.4280750976822645e-06, "loss": 0.4588, "step": 7203 }, { "epoch": 0.43223135537289253, "grad_norm": 1.2823377847671509, "learning_rate": 4.427419261261237e-06, "loss": 0.4263, "step": 7204 }, { "epoch": 0.43229135417291653, "grad_norm": 1.2441881895065308, "learning_rate": 4.426763389815811e-06, "loss": 0.4341, "step": 7205 }, { "epoch": 0.43235135297294053, "grad_norm": 1.0669021606445312, "learning_rate": 4.426107483370752e-06, "loss": 0.359, "step": 7206 }, { "epoch": 0.4324113517729645, "grad_norm": 1.2524503469467163, "learning_rate": 4.425451541950833e-06, "loss": 0.4553, "step": 7207 }, { "epoch": 0.4324713505729885, "grad_norm": 1.34772789478302, "learning_rate": 4.424795565580827e-06, "loss": 0.451, "step": 7208 }, { "epoch": 0.4325313493730125, "grad_norm": 1.386609435081482, "learning_rate": 4.424139554285505e-06, "loss": 0.4289, "step": 7209 }, { "epoch": 0.4325913481730365, "grad_norm": 1.1526429653167725, "learning_rate": 4.423483508089643e-06, "loss": 0.4413, "step": 7210 }, { "epoch": 0.4326513469730605, "grad_norm": 1.3980916738510132, "learning_rate": 4.422827427018016e-06, "loss": 0.4067, "step": 7211 }, { "epoch": 0.43271134577308457, "grad_norm": 1.3097089529037476, "learning_rate": 4.422171311095401e-06, "loss": 0.4036, "step": 7212 }, { "epoch": 0.43277134457310856, "grad_norm": 1.274498701095581, "learning_rate": 4.421515160346577e-06, "loss": 0.4349, "step": 7213 }, { "epoch": 0.43283134337313256, "grad_norm": 1.4092791080474854, "learning_rate": 4.4208589747963236e-06, "loss": 0.4452, "step": 7214 }, { "epoch": 0.43289134217315656, "grad_norm": 1.4602694511413574, "learning_rate": 4.420202754469423e-06, "loss": 0.4699, "step": 7215 }, { "epoch": 0.43295134097318055, "grad_norm": 1.2778065204620361, "learning_rate": 4.419546499390657e-06, "loss": 0.4178, "step": 7216 }, { "epoch": 0.43301133977320455, "grad_norm": 1.163475751876831, "learning_rate": 4.418890209584809e-06, "loss": 0.4208, "step": 7217 }, { "epoch": 0.43307133857322855, "grad_norm": 1.3047744035720825, "learning_rate": 4.4182338850766636e-06, "loss": 0.4589, "step": 7218 }, { "epoch": 0.43313133737325255, "grad_norm": 1.1814101934432983, "learning_rate": 4.417577525891007e-06, "loss": 0.3754, "step": 7219 }, { "epoch": 0.43319133617327654, "grad_norm": 1.3541218042373657, "learning_rate": 4.41692113205263e-06, "loss": 0.446, "step": 7220 }, { "epoch": 0.43325133497330054, "grad_norm": 1.365248203277588, "learning_rate": 4.416264703586318e-06, "loss": 0.4214, "step": 7221 }, { "epoch": 0.43331133377332454, "grad_norm": 1.4138598442077637, "learning_rate": 4.415608240516863e-06, "loss": 0.4103, "step": 7222 }, { "epoch": 0.43337133257334853, "grad_norm": 1.2035186290740967, "learning_rate": 4.4149517428690555e-06, "loss": 0.386, "step": 7223 }, { "epoch": 0.43343133137337253, "grad_norm": 1.4452649354934692, "learning_rate": 4.414295210667689e-06, "loss": 0.4164, "step": 7224 }, { "epoch": 0.4334913301733965, "grad_norm": 1.1288286447525024, "learning_rate": 4.413638643937559e-06, "loss": 0.3961, "step": 7225 }, { "epoch": 0.4335513289734205, "grad_norm": 1.2063980102539062, "learning_rate": 4.41298204270346e-06, "loss": 0.4108, "step": 7226 }, { "epoch": 0.4336113277734445, "grad_norm": 1.1775301694869995, "learning_rate": 4.4123254069901884e-06, "loss": 0.4045, "step": 7227 }, { "epoch": 0.4336713265734685, "grad_norm": 1.2604475021362305, "learning_rate": 4.411668736822542e-06, "loss": 0.4597, "step": 7228 }, { "epoch": 0.4337313253734925, "grad_norm": 1.1468675136566162, "learning_rate": 4.411012032225322e-06, "loss": 0.3886, "step": 7229 }, { "epoch": 0.4337913241735165, "grad_norm": 1.4033160209655762, "learning_rate": 4.4103552932233275e-06, "loss": 0.4507, "step": 7230 }, { "epoch": 0.4338513229735405, "grad_norm": 1.342370867729187, "learning_rate": 4.409698519841362e-06, "loss": 0.4145, "step": 7231 }, { "epoch": 0.4339113217735645, "grad_norm": 1.3724607229232788, "learning_rate": 4.4090417121042276e-06, "loss": 0.3593, "step": 7232 }, { "epoch": 0.4339713205735885, "grad_norm": 1.2498388290405273, "learning_rate": 4.408384870036729e-06, "loss": 0.4312, "step": 7233 }, { "epoch": 0.4340313193736125, "grad_norm": 1.5223175287246704, "learning_rate": 4.407727993663672e-06, "loss": 0.4539, "step": 7234 }, { "epoch": 0.43409131817363655, "grad_norm": 1.2823594808578491, "learning_rate": 4.407071083009864e-06, "loss": 0.4263, "step": 7235 }, { "epoch": 0.43415131697366055, "grad_norm": 1.0901706218719482, "learning_rate": 4.406414138100115e-06, "loss": 0.3458, "step": 7236 }, { "epoch": 0.43421131577368455, "grad_norm": 1.3170667886734009, "learning_rate": 4.405757158959233e-06, "loss": 0.4008, "step": 7237 }, { "epoch": 0.43427131457370854, "grad_norm": 1.178292989730835, "learning_rate": 4.405100145612031e-06, "loss": 0.3927, "step": 7238 }, { "epoch": 0.43433131337373254, "grad_norm": 1.5682694911956787, "learning_rate": 4.404443098083318e-06, "loss": 0.4301, "step": 7239 }, { "epoch": 0.43439131217375654, "grad_norm": 1.3518668413162231, "learning_rate": 4.403786016397912e-06, "loss": 0.4357, "step": 7240 }, { "epoch": 0.43445131097378054, "grad_norm": 1.2276313304901123, "learning_rate": 4.403128900580624e-06, "loss": 0.4352, "step": 7241 }, { "epoch": 0.43451130977380453, "grad_norm": 1.3800711631774902, "learning_rate": 4.402471750656272e-06, "loss": 0.4415, "step": 7242 }, { "epoch": 0.43457130857382853, "grad_norm": 1.2519677877426147, "learning_rate": 4.401814566649676e-06, "loss": 0.4329, "step": 7243 }, { "epoch": 0.4346313073738525, "grad_norm": 1.2947808504104614, "learning_rate": 4.40115734858565e-06, "loss": 0.4002, "step": 7244 }, { "epoch": 0.4346913061738765, "grad_norm": 1.2723102569580078, "learning_rate": 4.4005000964890175e-06, "loss": 0.399, "step": 7245 }, { "epoch": 0.4347513049739005, "grad_norm": 1.2720279693603516, "learning_rate": 4.3998428103845986e-06, "loss": 0.4065, "step": 7246 }, { "epoch": 0.4348113037739245, "grad_norm": 1.1915340423583984, "learning_rate": 4.399185490297217e-06, "loss": 0.3659, "step": 7247 }, { "epoch": 0.4348713025739485, "grad_norm": 1.3803433179855347, "learning_rate": 4.398528136251696e-06, "loss": 0.4445, "step": 7248 }, { "epoch": 0.4349313013739725, "grad_norm": 1.1540318727493286, "learning_rate": 4.397870748272861e-06, "loss": 0.3432, "step": 7249 }, { "epoch": 0.4349913001739965, "grad_norm": 1.1396228075027466, "learning_rate": 4.397213326385538e-06, "loss": 0.4164, "step": 7250 }, { "epoch": 0.4350512989740205, "grad_norm": 1.3838919401168823, "learning_rate": 4.396555870614556e-06, "loss": 0.455, "step": 7251 }, { "epoch": 0.4351112977740445, "grad_norm": 1.2949414253234863, "learning_rate": 4.395898380984744e-06, "loss": 0.4467, "step": 7252 }, { "epoch": 0.4351712965740685, "grad_norm": 1.2835330963134766, "learning_rate": 4.3952408575209305e-06, "loss": 0.4391, "step": 7253 }, { "epoch": 0.4352312953740925, "grad_norm": 1.2234792709350586, "learning_rate": 4.394583300247948e-06, "loss": 0.437, "step": 7254 }, { "epoch": 0.4352912941741165, "grad_norm": 1.2573298215866089, "learning_rate": 4.393925709190632e-06, "loss": 0.4405, "step": 7255 }, { "epoch": 0.4353512929741405, "grad_norm": 1.3115018606185913, "learning_rate": 4.393268084373814e-06, "loss": 0.3983, "step": 7256 }, { "epoch": 0.43541129177416454, "grad_norm": 1.3013379573822021, "learning_rate": 4.39261042582233e-06, "loss": 0.4328, "step": 7257 }, { "epoch": 0.43547129057418854, "grad_norm": 1.2953858375549316, "learning_rate": 4.391952733561017e-06, "loss": 0.3885, "step": 7258 }, { "epoch": 0.43553128937421254, "grad_norm": 1.1218407154083252, "learning_rate": 4.391295007614712e-06, "loss": 0.4122, "step": 7259 }, { "epoch": 0.43559128817423654, "grad_norm": 1.1667815446853638, "learning_rate": 4.3906372480082556e-06, "loss": 0.4031, "step": 7260 }, { "epoch": 0.43565128697426053, "grad_norm": 1.363015055656433, "learning_rate": 4.38997945476649e-06, "loss": 0.3936, "step": 7261 }, { "epoch": 0.43571128577428453, "grad_norm": 1.2968573570251465, "learning_rate": 4.389321627914253e-06, "loss": 0.4668, "step": 7262 }, { "epoch": 0.4357712845743085, "grad_norm": 1.248355507850647, "learning_rate": 4.388663767476391e-06, "loss": 0.4033, "step": 7263 }, { "epoch": 0.4358312833743325, "grad_norm": 1.370185136795044, "learning_rate": 4.388005873477746e-06, "loss": 0.4352, "step": 7264 }, { "epoch": 0.4358912821743565, "grad_norm": 1.2312768697738647, "learning_rate": 4.387347945943166e-06, "loss": 0.4577, "step": 7265 }, { "epoch": 0.4359512809743805, "grad_norm": 1.129370093345642, "learning_rate": 4.3866899848974974e-06, "loss": 0.4244, "step": 7266 }, { "epoch": 0.4360112797744045, "grad_norm": 1.2375761270523071, "learning_rate": 4.3860319903655864e-06, "loss": 0.4176, "step": 7267 }, { "epoch": 0.4360712785744285, "grad_norm": 1.2705134153366089, "learning_rate": 4.385373962372285e-06, "loss": 0.4303, "step": 7268 }, { "epoch": 0.4361312773744525, "grad_norm": 1.2279253005981445, "learning_rate": 4.384715900942442e-06, "loss": 0.4095, "step": 7269 }, { "epoch": 0.4361912761744765, "grad_norm": 1.2046905755996704, "learning_rate": 4.384057806100911e-06, "loss": 0.4, "step": 7270 }, { "epoch": 0.4362512749745005, "grad_norm": 1.3335150480270386, "learning_rate": 4.383399677872543e-06, "loss": 0.4492, "step": 7271 }, { "epoch": 0.4363112737745245, "grad_norm": 1.2897588014602661, "learning_rate": 4.382741516282196e-06, "loss": 0.4257, "step": 7272 }, { "epoch": 0.4363712725745485, "grad_norm": 1.2633473873138428, "learning_rate": 4.382083321354722e-06, "loss": 0.387, "step": 7273 }, { "epoch": 0.4364312713745725, "grad_norm": 1.2916325330734253, "learning_rate": 4.38142509311498e-06, "loss": 0.4275, "step": 7274 }, { "epoch": 0.4364912701745965, "grad_norm": 1.1963415145874023, "learning_rate": 4.3807668315878274e-06, "loss": 0.3975, "step": 7275 }, { "epoch": 0.4365512689746205, "grad_norm": 1.3685282468795776, "learning_rate": 4.3801085367981255e-06, "loss": 0.4234, "step": 7276 }, { "epoch": 0.4366112677746445, "grad_norm": 1.3516572713851929, "learning_rate": 4.379450208770734e-06, "loss": 0.4886, "step": 7277 }, { "epoch": 0.4366712665746685, "grad_norm": 1.3529329299926758, "learning_rate": 4.378791847530515e-06, "loss": 0.418, "step": 7278 }, { "epoch": 0.4367312653746925, "grad_norm": 1.2764356136322021, "learning_rate": 4.37813345310233e-06, "loss": 0.4063, "step": 7279 }, { "epoch": 0.43679126417471653, "grad_norm": 1.257632851600647, "learning_rate": 4.377475025511047e-06, "loss": 0.4579, "step": 7280 }, { "epoch": 0.43685126297474053, "grad_norm": 1.3591140508651733, "learning_rate": 4.376816564781529e-06, "loss": 0.4203, "step": 7281 }, { "epoch": 0.4369112617747645, "grad_norm": 1.1904635429382324, "learning_rate": 4.376158070938644e-06, "loss": 0.3838, "step": 7282 }, { "epoch": 0.4369712605747885, "grad_norm": 1.306659460067749, "learning_rate": 4.375499544007261e-06, "loss": 0.3971, "step": 7283 }, { "epoch": 0.4370312593748125, "grad_norm": 1.2607795000076294, "learning_rate": 4.374840984012249e-06, "loss": 0.3942, "step": 7284 }, { "epoch": 0.4370912581748365, "grad_norm": 1.2616873979568481, "learning_rate": 4.374182390978478e-06, "loss": 0.4212, "step": 7285 }, { "epoch": 0.4371512569748605, "grad_norm": 1.4283125400543213, "learning_rate": 4.373523764930823e-06, "loss": 0.4284, "step": 7286 }, { "epoch": 0.4372112557748845, "grad_norm": 1.2682148218154907, "learning_rate": 4.3728651058941534e-06, "loss": 0.3812, "step": 7287 }, { "epoch": 0.4372712545749085, "grad_norm": 1.3409146070480347, "learning_rate": 4.372206413893346e-06, "loss": 0.4162, "step": 7288 }, { "epoch": 0.4373312533749325, "grad_norm": 1.2479617595672607, "learning_rate": 4.371547688953276e-06, "loss": 0.3935, "step": 7289 }, { "epoch": 0.4373912521749565, "grad_norm": 1.294950008392334, "learning_rate": 4.3708889310988205e-06, "loss": 0.3603, "step": 7290 }, { "epoch": 0.4374512509749805, "grad_norm": 1.2996361255645752, "learning_rate": 4.370230140354858e-06, "loss": 0.4103, "step": 7291 }, { "epoch": 0.4375112497750045, "grad_norm": 1.3076560497283936, "learning_rate": 4.3695713167462676e-06, "loss": 0.3952, "step": 7292 }, { "epoch": 0.4375712485750285, "grad_norm": 1.2852662801742554, "learning_rate": 4.368912460297931e-06, "loss": 0.4401, "step": 7293 }, { "epoch": 0.4376312473750525, "grad_norm": 1.1808350086212158, "learning_rate": 4.368253571034728e-06, "loss": 0.3893, "step": 7294 }, { "epoch": 0.4376912461750765, "grad_norm": 1.2874321937561035, "learning_rate": 4.367594648981545e-06, "loss": 0.4265, "step": 7295 }, { "epoch": 0.4377512449751005, "grad_norm": 1.3696876764297485, "learning_rate": 4.366935694163264e-06, "loss": 0.4751, "step": 7296 }, { "epoch": 0.4378112437751245, "grad_norm": 1.1739294528961182, "learning_rate": 4.366276706604772e-06, "loss": 0.411, "step": 7297 }, { "epoch": 0.4378712425751485, "grad_norm": 1.3560316562652588, "learning_rate": 4.365617686330955e-06, "loss": 0.446, "step": 7298 }, { "epoch": 0.4379312413751725, "grad_norm": 1.300600290298462, "learning_rate": 4.3649586333667025e-06, "loss": 0.4358, "step": 7299 }, { "epoch": 0.4379912401751965, "grad_norm": 1.182896614074707, "learning_rate": 4.364299547736902e-06, "loss": 0.4204, "step": 7300 }, { "epoch": 0.43805123897522047, "grad_norm": 1.3852096796035767, "learning_rate": 4.363640429466447e-06, "loss": 0.3973, "step": 7301 }, { "epoch": 0.4381112377752445, "grad_norm": 1.3333178758621216, "learning_rate": 4.362981278580226e-06, "loss": 0.4429, "step": 7302 }, { "epoch": 0.4381712365752685, "grad_norm": 1.3432674407958984, "learning_rate": 4.362322095103135e-06, "loss": 0.4452, "step": 7303 }, { "epoch": 0.4382312353752925, "grad_norm": 1.2251346111297607, "learning_rate": 4.361662879060066e-06, "loss": 0.3936, "step": 7304 }, { "epoch": 0.4382912341753165, "grad_norm": 1.3073331117630005, "learning_rate": 4.361003630475918e-06, "loss": 0.416, "step": 7305 }, { "epoch": 0.4383512329753405, "grad_norm": 1.4911023378372192, "learning_rate": 4.360344349375582e-06, "loss": 0.4307, "step": 7306 }, { "epoch": 0.4384112317753645, "grad_norm": 1.2033843994140625, "learning_rate": 4.359685035783964e-06, "loss": 0.417, "step": 7307 }, { "epoch": 0.4384712305753885, "grad_norm": 1.1747031211853027, "learning_rate": 4.359025689725956e-06, "loss": 0.4642, "step": 7308 }, { "epoch": 0.4385312293754125, "grad_norm": 1.2555875778198242, "learning_rate": 4.358366311226462e-06, "loss": 0.4129, "step": 7309 }, { "epoch": 0.4385912281754365, "grad_norm": 1.0859819650650024, "learning_rate": 4.357706900310383e-06, "loss": 0.3946, "step": 7310 }, { "epoch": 0.4386512269754605, "grad_norm": 1.3790267705917358, "learning_rate": 4.357047457002622e-06, "loss": 0.4649, "step": 7311 }, { "epoch": 0.4387112257754845, "grad_norm": 1.2395662069320679, "learning_rate": 4.3563879813280845e-06, "loss": 0.3993, "step": 7312 }, { "epoch": 0.4387712245755085, "grad_norm": 1.2398734092712402, "learning_rate": 4.355728473311673e-06, "loss": 0.4201, "step": 7313 }, { "epoch": 0.4388312233755325, "grad_norm": 1.2709176540374756, "learning_rate": 4.355068932978297e-06, "loss": 0.4413, "step": 7314 }, { "epoch": 0.4388912221755565, "grad_norm": 1.2452174425125122, "learning_rate": 4.354409360352863e-06, "loss": 0.3718, "step": 7315 }, { "epoch": 0.4389512209755805, "grad_norm": 1.1749274730682373, "learning_rate": 4.353749755460279e-06, "loss": 0.4373, "step": 7316 }, { "epoch": 0.4390112197756045, "grad_norm": 1.2430322170257568, "learning_rate": 4.353090118325457e-06, "loss": 0.4101, "step": 7317 }, { "epoch": 0.4390712185756285, "grad_norm": 1.2133749723434448, "learning_rate": 4.352430448973309e-06, "loss": 0.4083, "step": 7318 }, { "epoch": 0.43913121737565247, "grad_norm": 1.2983955144882202, "learning_rate": 4.351770747428745e-06, "loss": 0.4615, "step": 7319 }, { "epoch": 0.43919121617567647, "grad_norm": 1.337288737297058, "learning_rate": 4.351111013716682e-06, "loss": 0.4898, "step": 7320 }, { "epoch": 0.43925121497570047, "grad_norm": 1.2801355123519897, "learning_rate": 4.350451247862032e-06, "loss": 0.4616, "step": 7321 }, { "epoch": 0.43931121377572446, "grad_norm": 1.4294486045837402, "learning_rate": 4.349791449889714e-06, "loss": 0.42, "step": 7322 }, { "epoch": 0.43937121257574846, "grad_norm": 1.2589247226715088, "learning_rate": 4.349131619824644e-06, "loss": 0.4137, "step": 7323 }, { "epoch": 0.43943121137577246, "grad_norm": 1.1718322038650513, "learning_rate": 4.348471757691742e-06, "loss": 0.37, "step": 7324 }, { "epoch": 0.4394912101757965, "grad_norm": 1.1936661005020142, "learning_rate": 4.347811863515927e-06, "loss": 0.4182, "step": 7325 }, { "epoch": 0.4395512089758205, "grad_norm": 1.3118189573287964, "learning_rate": 4.347151937322122e-06, "loss": 0.377, "step": 7326 }, { "epoch": 0.4396112077758445, "grad_norm": 1.3246018886566162, "learning_rate": 4.346491979135245e-06, "loss": 0.421, "step": 7327 }, { "epoch": 0.4396712065758685, "grad_norm": 1.0752307176589966, "learning_rate": 4.345831988980225e-06, "loss": 0.3729, "step": 7328 }, { "epoch": 0.4397312053758925, "grad_norm": 1.2989099025726318, "learning_rate": 4.345171966881982e-06, "loss": 0.415, "step": 7329 }, { "epoch": 0.4397912041759165, "grad_norm": 1.218541145324707, "learning_rate": 4.344511912865447e-06, "loss": 0.4206, "step": 7330 }, { "epoch": 0.4398512029759405, "grad_norm": 1.1991877555847168, "learning_rate": 4.343851826955543e-06, "loss": 0.3932, "step": 7331 }, { "epoch": 0.4399112017759645, "grad_norm": 1.2379978895187378, "learning_rate": 4.343191709177202e-06, "loss": 0.4248, "step": 7332 }, { "epoch": 0.4399712005759885, "grad_norm": 1.1436773538589478, "learning_rate": 4.34253155955535e-06, "loss": 0.3887, "step": 7333 }, { "epoch": 0.4400311993760125, "grad_norm": 1.2225114107131958, "learning_rate": 4.34187137811492e-06, "loss": 0.4416, "step": 7334 }, { "epoch": 0.4400911981760365, "grad_norm": 1.3560420274734497, "learning_rate": 4.341211164880843e-06, "loss": 0.4072, "step": 7335 }, { "epoch": 0.4401511969760605, "grad_norm": 1.3778784275054932, "learning_rate": 4.340550919878053e-06, "loss": 0.4421, "step": 7336 }, { "epoch": 0.4402111957760845, "grad_norm": 1.2732561826705933, "learning_rate": 4.339890643131486e-06, "loss": 0.4536, "step": 7337 }, { "epoch": 0.44027119457610847, "grad_norm": 1.3420114517211914, "learning_rate": 4.339230334666074e-06, "loss": 0.4306, "step": 7338 }, { "epoch": 0.44033119337613247, "grad_norm": 1.2757823467254639, "learning_rate": 4.3385699945067566e-06, "loss": 0.4327, "step": 7339 }, { "epoch": 0.44039119217615647, "grad_norm": 1.1605618000030518, "learning_rate": 4.337909622678471e-06, "loss": 0.4091, "step": 7340 }, { "epoch": 0.44045119097618046, "grad_norm": 1.22830331325531, "learning_rate": 4.337249219206156e-06, "loss": 0.4088, "step": 7341 }, { "epoch": 0.44051118977620446, "grad_norm": 1.2669261693954468, "learning_rate": 4.336588784114753e-06, "loss": 0.4033, "step": 7342 }, { "epoch": 0.44057118857622846, "grad_norm": 1.264644742012024, "learning_rate": 4.335928317429203e-06, "loss": 0.4323, "step": 7343 }, { "epoch": 0.44063118737625245, "grad_norm": 1.3290154933929443, "learning_rate": 4.335267819174448e-06, "loss": 0.391, "step": 7344 }, { "epoch": 0.44069118617627645, "grad_norm": 1.2817397117614746, "learning_rate": 4.334607289375434e-06, "loss": 0.3942, "step": 7345 }, { "epoch": 0.44075118497630045, "grad_norm": 1.2385987043380737, "learning_rate": 4.3339467280571055e-06, "loss": 0.3927, "step": 7346 }, { "epoch": 0.4408111837763245, "grad_norm": 1.354921817779541, "learning_rate": 4.333286135244408e-06, "loss": 0.4224, "step": 7347 }, { "epoch": 0.4408711825763485, "grad_norm": 1.3449229001998901, "learning_rate": 4.332625510962289e-06, "loss": 0.4259, "step": 7348 }, { "epoch": 0.4409311813763725, "grad_norm": 1.2670048475265503, "learning_rate": 4.331964855235699e-06, "loss": 0.3742, "step": 7349 }, { "epoch": 0.4409911801763965, "grad_norm": 1.232125997543335, "learning_rate": 4.331304168089586e-06, "loss": 0.4287, "step": 7350 }, { "epoch": 0.4410511789764205, "grad_norm": 1.3006221055984497, "learning_rate": 4.330643449548903e-06, "loss": 0.4028, "step": 7351 }, { "epoch": 0.4411111777764445, "grad_norm": 1.2184232473373413, "learning_rate": 4.3299826996385985e-06, "loss": 0.4341, "step": 7352 }, { "epoch": 0.4411711765764685, "grad_norm": 1.278022289276123, "learning_rate": 4.329321918383632e-06, "loss": 0.4506, "step": 7353 }, { "epoch": 0.4412311753764925, "grad_norm": 1.3972619771957397, "learning_rate": 4.328661105808953e-06, "loss": 0.4644, "step": 7354 }, { "epoch": 0.4412911741765165, "grad_norm": 1.1925793886184692, "learning_rate": 4.328000261939521e-06, "loss": 0.4231, "step": 7355 }, { "epoch": 0.4413511729765405, "grad_norm": 1.4240072965621948, "learning_rate": 4.327339386800291e-06, "loss": 0.4425, "step": 7356 }, { "epoch": 0.44141117177656447, "grad_norm": 1.2344889640808105, "learning_rate": 4.326678480416222e-06, "loss": 0.4252, "step": 7357 }, { "epoch": 0.44147117057658847, "grad_norm": 1.3492380380630493, "learning_rate": 4.3260175428122725e-06, "loss": 0.4474, "step": 7358 }, { "epoch": 0.44153116937661246, "grad_norm": 1.2070916891098022, "learning_rate": 4.325356574013403e-06, "loss": 0.3779, "step": 7359 }, { "epoch": 0.44159116817663646, "grad_norm": 1.2132922410964966, "learning_rate": 4.324695574044577e-06, "loss": 0.3977, "step": 7360 }, { "epoch": 0.44165116697666046, "grad_norm": 1.2584842443466187, "learning_rate": 4.324034542930757e-06, "loss": 0.4141, "step": 7361 }, { "epoch": 0.44171116577668446, "grad_norm": 1.2900030612945557, "learning_rate": 4.323373480696905e-06, "loss": 0.4441, "step": 7362 }, { "epoch": 0.44177116457670845, "grad_norm": 1.2383980751037598, "learning_rate": 4.3227123873679895e-06, "loss": 0.3818, "step": 7363 }, { "epoch": 0.44183116337673245, "grad_norm": 1.2848052978515625, "learning_rate": 4.3220512629689745e-06, "loss": 0.3784, "step": 7364 }, { "epoch": 0.44189116217675645, "grad_norm": 1.2197233438491821, "learning_rate": 4.321390107524829e-06, "loss": 0.4026, "step": 7365 }, { "epoch": 0.44195116097678044, "grad_norm": 1.3773170709609985, "learning_rate": 4.3207289210605215e-06, "loss": 0.462, "step": 7366 }, { "epoch": 0.44201115977680444, "grad_norm": 1.3417260646820068, "learning_rate": 4.320067703601021e-06, "loss": 0.4086, "step": 7367 }, { "epoch": 0.44207115857682844, "grad_norm": 1.2464910745620728, "learning_rate": 4.319406455171301e-06, "loss": 0.3951, "step": 7368 }, { "epoch": 0.4421311573768525, "grad_norm": 1.3208985328674316, "learning_rate": 4.318745175796331e-06, "loss": 0.3977, "step": 7369 }, { "epoch": 0.4421911561768765, "grad_norm": 1.2461618185043335, "learning_rate": 4.3180838655010865e-06, "loss": 0.3878, "step": 7370 }, { "epoch": 0.4422511549769005, "grad_norm": 1.2673017978668213, "learning_rate": 4.317422524310541e-06, "loss": 0.4406, "step": 7371 }, { "epoch": 0.4423111537769245, "grad_norm": 1.1865283250808716, "learning_rate": 4.316761152249673e-06, "loss": 0.4138, "step": 7372 }, { "epoch": 0.4423711525769485, "grad_norm": 1.3624613285064697, "learning_rate": 4.316099749343455e-06, "loss": 0.4473, "step": 7373 }, { "epoch": 0.4424311513769725, "grad_norm": 1.210033655166626, "learning_rate": 4.315438315616869e-06, "loss": 0.3838, "step": 7374 }, { "epoch": 0.4424911501769965, "grad_norm": 1.23308265209198, "learning_rate": 4.314776851094892e-06, "loss": 0.4067, "step": 7375 }, { "epoch": 0.44255114897702047, "grad_norm": 1.317202091217041, "learning_rate": 4.314115355802505e-06, "loss": 0.3966, "step": 7376 }, { "epoch": 0.44261114777704447, "grad_norm": 1.4913071393966675, "learning_rate": 4.313453829764692e-06, "loss": 0.4539, "step": 7377 }, { "epoch": 0.44267114657706846, "grad_norm": 1.2257438898086548, "learning_rate": 4.312792273006433e-06, "loss": 0.3859, "step": 7378 }, { "epoch": 0.44273114537709246, "grad_norm": 1.3593769073486328, "learning_rate": 4.312130685552713e-06, "loss": 0.4245, "step": 7379 }, { "epoch": 0.44279114417711646, "grad_norm": 1.2541749477386475, "learning_rate": 4.311469067428517e-06, "loss": 0.3965, "step": 7380 }, { "epoch": 0.44285114297714046, "grad_norm": 1.3820462226867676, "learning_rate": 4.3108074186588314e-06, "loss": 0.4296, "step": 7381 }, { "epoch": 0.44291114177716445, "grad_norm": 1.12641441822052, "learning_rate": 4.310145739268643e-06, "loss": 0.4353, "step": 7382 }, { "epoch": 0.44297114057718845, "grad_norm": 1.2927383184432983, "learning_rate": 4.309484029282942e-06, "loss": 0.4242, "step": 7383 }, { "epoch": 0.44303113937721245, "grad_norm": 1.1410480737686157, "learning_rate": 4.308822288726717e-06, "loss": 0.4083, "step": 7384 }, { "epoch": 0.44309113817723644, "grad_norm": 1.1282764673233032, "learning_rate": 4.308160517624957e-06, "loss": 0.3501, "step": 7385 }, { "epoch": 0.44315113697726044, "grad_norm": 1.2665618658065796, "learning_rate": 4.3074987160026595e-06, "loss": 0.4169, "step": 7386 }, { "epoch": 0.44321113577728444, "grad_norm": 1.3162713050842285, "learning_rate": 4.306836883884813e-06, "loss": 0.4191, "step": 7387 }, { "epoch": 0.44327113457730843, "grad_norm": 1.2256156206130981, "learning_rate": 4.306175021296413e-06, "loss": 0.407, "step": 7388 }, { "epoch": 0.44333113337733243, "grad_norm": 1.2178664207458496, "learning_rate": 4.305513128262456e-06, "loss": 0.4011, "step": 7389 }, { "epoch": 0.44339113217735643, "grad_norm": 1.1517523527145386, "learning_rate": 4.304851204807938e-06, "loss": 0.3981, "step": 7390 }, { "epoch": 0.4434511309773804, "grad_norm": 1.3221341371536255, "learning_rate": 4.304189250957857e-06, "loss": 0.3974, "step": 7391 }, { "epoch": 0.4435111297774045, "grad_norm": 1.426912784576416, "learning_rate": 4.303527266737212e-06, "loss": 0.4489, "step": 7392 }, { "epoch": 0.4435711285774285, "grad_norm": 1.2828443050384521, "learning_rate": 4.302865252171001e-06, "loss": 0.4053, "step": 7393 }, { "epoch": 0.44363112737745247, "grad_norm": 1.3660829067230225, "learning_rate": 4.302203207284229e-06, "loss": 0.4876, "step": 7394 }, { "epoch": 0.44369112617747647, "grad_norm": 1.2015149593353271, "learning_rate": 4.301541132101897e-06, "loss": 0.4477, "step": 7395 }, { "epoch": 0.44375112497750047, "grad_norm": 1.1946144104003906, "learning_rate": 4.300879026649008e-06, "loss": 0.3726, "step": 7396 }, { "epoch": 0.44381112377752446, "grad_norm": 1.39069664478302, "learning_rate": 4.3002168909505675e-06, "loss": 0.4718, "step": 7397 }, { "epoch": 0.44387112257754846, "grad_norm": 1.2478089332580566, "learning_rate": 4.299554725031579e-06, "loss": 0.4255, "step": 7398 }, { "epoch": 0.44393112137757246, "grad_norm": 1.3629004955291748, "learning_rate": 4.2988925289170525e-06, "loss": 0.486, "step": 7399 }, { "epoch": 0.44399112017759645, "grad_norm": 1.302047848701477, "learning_rate": 4.298230302631995e-06, "loss": 0.4472, "step": 7400 }, { "epoch": 0.44405111897762045, "grad_norm": 1.1143501996994019, "learning_rate": 4.297568046201416e-06, "loss": 0.4345, "step": 7401 }, { "epoch": 0.44411111777764445, "grad_norm": 1.2877192497253418, "learning_rate": 4.2969057596503245e-06, "loss": 0.4221, "step": 7402 }, { "epoch": 0.44417111657766845, "grad_norm": 1.1584503650665283, "learning_rate": 4.296243443003734e-06, "loss": 0.3923, "step": 7403 }, { "epoch": 0.44423111537769244, "grad_norm": 1.2986671924591064, "learning_rate": 4.2955810962866555e-06, "loss": 0.3864, "step": 7404 }, { "epoch": 0.44429111417771644, "grad_norm": 1.1931750774383545, "learning_rate": 4.294918719524104e-06, "loss": 0.423, "step": 7405 }, { "epoch": 0.44435111297774044, "grad_norm": 1.1577476263046265, "learning_rate": 4.294256312741095e-06, "loss": 0.3819, "step": 7406 }, { "epoch": 0.44441111177776443, "grad_norm": 1.2462037801742554, "learning_rate": 4.2935938759626425e-06, "loss": 0.3997, "step": 7407 }, { "epoch": 0.44447111057778843, "grad_norm": 1.402134656906128, "learning_rate": 4.2929314092137665e-06, "loss": 0.4301, "step": 7408 }, { "epoch": 0.44453110937781243, "grad_norm": 1.3816579580307007, "learning_rate": 4.292268912519482e-06, "loss": 0.4342, "step": 7409 }, { "epoch": 0.4445911081778364, "grad_norm": 1.3273667097091675, "learning_rate": 4.291606385904811e-06, "loss": 0.3983, "step": 7410 }, { "epoch": 0.4446511069778604, "grad_norm": 1.3468955755233765, "learning_rate": 4.290943829394774e-06, "loss": 0.4437, "step": 7411 }, { "epoch": 0.4447111057778844, "grad_norm": 1.3803200721740723, "learning_rate": 4.290281243014391e-06, "loss": 0.4404, "step": 7412 }, { "epoch": 0.4447711045779084, "grad_norm": 1.0720314979553223, "learning_rate": 4.289618626788686e-06, "loss": 0.3828, "step": 7413 }, { "epoch": 0.44483110337793247, "grad_norm": 1.2909950017929077, "learning_rate": 4.288955980742684e-06, "loss": 0.4182, "step": 7414 }, { "epoch": 0.44489110217795647, "grad_norm": 1.2002211809158325, "learning_rate": 4.288293304901408e-06, "loss": 0.422, "step": 7415 }, { "epoch": 0.44495110097798046, "grad_norm": 1.3040322065353394, "learning_rate": 4.287630599289887e-06, "loss": 0.4305, "step": 7416 }, { "epoch": 0.44501109977800446, "grad_norm": 1.332910418510437, "learning_rate": 4.286967863933144e-06, "loss": 0.4221, "step": 7417 }, { "epoch": 0.44507109857802846, "grad_norm": 1.240993618965149, "learning_rate": 4.286305098856212e-06, "loss": 0.4345, "step": 7418 }, { "epoch": 0.44513109737805245, "grad_norm": 1.1076430082321167, "learning_rate": 4.285642304084118e-06, "loss": 0.3722, "step": 7419 }, { "epoch": 0.44519109617807645, "grad_norm": 1.3099161386489868, "learning_rate": 4.2849794796418945e-06, "loss": 0.4163, "step": 7420 }, { "epoch": 0.44525109497810045, "grad_norm": 1.360174536705017, "learning_rate": 4.284316625554572e-06, "loss": 0.3748, "step": 7421 }, { "epoch": 0.44531109377812444, "grad_norm": 1.216883897781372, "learning_rate": 4.283653741847184e-06, "loss": 0.4553, "step": 7422 }, { "epoch": 0.44537109257814844, "grad_norm": 1.2418440580368042, "learning_rate": 4.282990828544763e-06, "loss": 0.4262, "step": 7423 }, { "epoch": 0.44543109137817244, "grad_norm": 1.2551723718643188, "learning_rate": 4.2823278856723475e-06, "loss": 0.4017, "step": 7424 }, { "epoch": 0.44549109017819644, "grad_norm": 1.297607183456421, "learning_rate": 4.2816649132549714e-06, "loss": 0.3982, "step": 7425 }, { "epoch": 0.44555108897822043, "grad_norm": 1.2569730281829834, "learning_rate": 4.281001911317674e-06, "loss": 0.4483, "step": 7426 }, { "epoch": 0.44561108777824443, "grad_norm": 1.2078849077224731, "learning_rate": 4.280338879885491e-06, "loss": 0.4464, "step": 7427 }, { "epoch": 0.4456710865782684, "grad_norm": 1.2721885442733765, "learning_rate": 4.2796758189834646e-06, "loss": 0.41, "step": 7428 }, { "epoch": 0.4457310853782924, "grad_norm": 1.2176011800765991, "learning_rate": 4.279012728636634e-06, "loss": 0.3648, "step": 7429 }, { "epoch": 0.4457910841783164, "grad_norm": 1.2971314191818237, "learning_rate": 4.278349608870042e-06, "loss": 0.411, "step": 7430 }, { "epoch": 0.4458510829783404, "grad_norm": 1.3028661012649536, "learning_rate": 4.277686459708731e-06, "loss": 0.4451, "step": 7431 }, { "epoch": 0.4459110817783644, "grad_norm": 1.1556612253189087, "learning_rate": 4.277023281177747e-06, "loss": 0.4246, "step": 7432 }, { "epoch": 0.4459710805783884, "grad_norm": 1.170055866241455, "learning_rate": 4.276360073302133e-06, "loss": 0.4177, "step": 7433 }, { "epoch": 0.4460310793784124, "grad_norm": 1.3697514533996582, "learning_rate": 4.275696836106936e-06, "loss": 0.396, "step": 7434 }, { "epoch": 0.4460910781784364, "grad_norm": 1.3041383028030396, "learning_rate": 4.275033569617204e-06, "loss": 0.4407, "step": 7435 }, { "epoch": 0.4461510769784604, "grad_norm": 1.1660292148590088, "learning_rate": 4.274370273857984e-06, "loss": 0.381, "step": 7436 }, { "epoch": 0.44621107577848446, "grad_norm": 1.456703782081604, "learning_rate": 4.273706948854329e-06, "loss": 0.4427, "step": 7437 }, { "epoch": 0.44627107457850845, "grad_norm": 1.2818011045455933, "learning_rate": 4.273043594631285e-06, "loss": 0.4143, "step": 7438 }, { "epoch": 0.44633107337853245, "grad_norm": 1.13911771774292, "learning_rate": 4.272380211213908e-06, "loss": 0.4361, "step": 7439 }, { "epoch": 0.44639107217855645, "grad_norm": 1.3176007270812988, "learning_rate": 4.271716798627248e-06, "loss": 0.4371, "step": 7440 }, { "epoch": 0.44645107097858044, "grad_norm": 1.2248109579086304, "learning_rate": 4.271053356896362e-06, "loss": 0.3919, "step": 7441 }, { "epoch": 0.44651106977860444, "grad_norm": 1.3818942308425903, "learning_rate": 4.270389886046302e-06, "loss": 0.4222, "step": 7442 }, { "epoch": 0.44657106857862844, "grad_norm": 1.2634855508804321, "learning_rate": 4.269726386102128e-06, "loss": 0.4811, "step": 7443 }, { "epoch": 0.44663106737865244, "grad_norm": 1.2000287771224976, "learning_rate": 4.269062857088893e-06, "loss": 0.4034, "step": 7444 }, { "epoch": 0.44669106617867643, "grad_norm": 1.3114606142044067, "learning_rate": 4.26839929903166e-06, "loss": 0.4401, "step": 7445 }, { "epoch": 0.44675106497870043, "grad_norm": 1.3059450387954712, "learning_rate": 4.267735711955484e-06, "loss": 0.4247, "step": 7446 }, { "epoch": 0.4468110637787244, "grad_norm": 1.291343331336975, "learning_rate": 4.26707209588543e-06, "loss": 0.439, "step": 7447 }, { "epoch": 0.4468710625787484, "grad_norm": 1.3234394788742065, "learning_rate": 4.266408450846556e-06, "loss": 0.3954, "step": 7448 }, { "epoch": 0.4469310613787724, "grad_norm": 1.2768625020980835, "learning_rate": 4.265744776863928e-06, "loss": 0.3954, "step": 7449 }, { "epoch": 0.4469910601787964, "grad_norm": 1.2867839336395264, "learning_rate": 4.2650810739626075e-06, "loss": 0.455, "step": 7450 }, { "epoch": 0.4470510589788204, "grad_norm": 1.39531672000885, "learning_rate": 4.2644173421676615e-06, "loss": 0.4302, "step": 7451 }, { "epoch": 0.4471110577788444, "grad_norm": 1.2999502420425415, "learning_rate": 4.263753581504153e-06, "loss": 0.4533, "step": 7452 }, { "epoch": 0.4471710565788684, "grad_norm": 1.2037642002105713, "learning_rate": 4.263089791997154e-06, "loss": 0.3763, "step": 7453 }, { "epoch": 0.4472310553788924, "grad_norm": 1.2119483947753906, "learning_rate": 4.262425973671729e-06, "loss": 0.4254, "step": 7454 }, { "epoch": 0.4472910541789164, "grad_norm": 1.1684067249298096, "learning_rate": 4.261762126552949e-06, "loss": 0.4072, "step": 7455 }, { "epoch": 0.4473510529789404, "grad_norm": 1.1904610395431519, "learning_rate": 4.261098250665883e-06, "loss": 0.3434, "step": 7456 }, { "epoch": 0.4474110517789644, "grad_norm": 1.2528566122055054, "learning_rate": 4.260434346035606e-06, "loss": 0.4065, "step": 7457 }, { "epoch": 0.4474710505789884, "grad_norm": 1.3127890825271606, "learning_rate": 4.259770412687185e-06, "loss": 0.3957, "step": 7458 }, { "epoch": 0.44753104937901245, "grad_norm": 1.2411401271820068, "learning_rate": 4.259106450645699e-06, "loss": 0.4237, "step": 7459 }, { "epoch": 0.44759104817903644, "grad_norm": 1.1129658222198486, "learning_rate": 4.258442459936221e-06, "loss": 0.3761, "step": 7460 }, { "epoch": 0.44765104697906044, "grad_norm": 1.2373106479644775, "learning_rate": 4.257778440583827e-06, "loss": 0.3986, "step": 7461 }, { "epoch": 0.44771104577908444, "grad_norm": 1.3001657724380493, "learning_rate": 4.257114392613592e-06, "loss": 0.3694, "step": 7462 }, { "epoch": 0.44777104457910843, "grad_norm": 1.25166654586792, "learning_rate": 4.256450316050596e-06, "loss": 0.4114, "step": 7463 }, { "epoch": 0.44783104337913243, "grad_norm": 1.4956482648849487, "learning_rate": 4.25578621091992e-06, "loss": 0.4325, "step": 7464 }, { "epoch": 0.44789104217915643, "grad_norm": 1.390878438949585, "learning_rate": 4.25512207724664e-06, "loss": 0.4565, "step": 7465 }, { "epoch": 0.4479510409791804, "grad_norm": 1.1481784582138062, "learning_rate": 4.254457915055841e-06, "loss": 0.3932, "step": 7466 }, { "epoch": 0.4480110397792044, "grad_norm": 1.134543538093567, "learning_rate": 4.2537937243726025e-06, "loss": 0.3692, "step": 7467 }, { "epoch": 0.4480710385792284, "grad_norm": 1.2855665683746338, "learning_rate": 4.253129505222011e-06, "loss": 0.3842, "step": 7468 }, { "epoch": 0.4481310373792524, "grad_norm": 1.1574361324310303, "learning_rate": 4.252465257629147e-06, "loss": 0.3356, "step": 7469 }, { "epoch": 0.4481910361792764, "grad_norm": 1.273590326309204, "learning_rate": 4.2518009816191e-06, "loss": 0.4089, "step": 7470 }, { "epoch": 0.4482510349793004, "grad_norm": 1.3900741338729858, "learning_rate": 4.251136677216955e-06, "loss": 0.464, "step": 7471 }, { "epoch": 0.4483110337793244, "grad_norm": 1.207834005355835, "learning_rate": 4.2504723444478e-06, "loss": 0.4126, "step": 7472 }, { "epoch": 0.4483710325793484, "grad_norm": 1.243080735206604, "learning_rate": 4.249807983336723e-06, "loss": 0.4054, "step": 7473 }, { "epoch": 0.4484310313793724, "grad_norm": 1.1728674173355103, "learning_rate": 4.249143593908816e-06, "loss": 0.3843, "step": 7474 }, { "epoch": 0.4484910301793964, "grad_norm": 1.312483787536621, "learning_rate": 4.248479176189166e-06, "loss": 0.4707, "step": 7475 }, { "epoch": 0.4485510289794204, "grad_norm": 1.1664000749588013, "learning_rate": 4.24781473020287e-06, "loss": 0.4053, "step": 7476 }, { "epoch": 0.4486110277794444, "grad_norm": 1.294724702835083, "learning_rate": 4.247150255975018e-06, "loss": 0.3793, "step": 7477 }, { "epoch": 0.4486710265794684, "grad_norm": 1.293521761894226, "learning_rate": 4.246485753530705e-06, "loss": 0.4432, "step": 7478 }, { "epoch": 0.4487310253794924, "grad_norm": 1.280900478363037, "learning_rate": 4.245821222895026e-06, "loss": 0.4116, "step": 7479 }, { "epoch": 0.4487910241795164, "grad_norm": 1.2726751565933228, "learning_rate": 4.245156664093078e-06, "loss": 0.4418, "step": 7480 }, { "epoch": 0.4488510229795404, "grad_norm": 1.2555532455444336, "learning_rate": 4.244492077149956e-06, "loss": 0.4049, "step": 7481 }, { "epoch": 0.44891102177956443, "grad_norm": 1.2288213968276978, "learning_rate": 4.24382746209076e-06, "loss": 0.3981, "step": 7482 }, { "epoch": 0.44897102057958843, "grad_norm": 1.2725703716278076, "learning_rate": 4.243162818940592e-06, "loss": 0.3848, "step": 7483 }, { "epoch": 0.44903101937961243, "grad_norm": 1.203579068183899, "learning_rate": 4.242498147724548e-06, "loss": 0.454, "step": 7484 }, { "epoch": 0.4490910181796364, "grad_norm": 1.300033450126648, "learning_rate": 4.241833448467733e-06, "loss": 0.3967, "step": 7485 }, { "epoch": 0.4491510169796604, "grad_norm": 1.2228357791900635, "learning_rate": 4.241168721195248e-06, "loss": 0.3907, "step": 7486 }, { "epoch": 0.4492110157796844, "grad_norm": 1.2502349615097046, "learning_rate": 4.240503965932197e-06, "loss": 0.4136, "step": 7487 }, { "epoch": 0.4492710145797084, "grad_norm": 1.1884369850158691, "learning_rate": 4.239839182703685e-06, "loss": 0.4608, "step": 7488 }, { "epoch": 0.4493310133797324, "grad_norm": 1.4761877059936523, "learning_rate": 4.239174371534818e-06, "loss": 0.4515, "step": 7489 }, { "epoch": 0.4493910121797564, "grad_norm": 1.3344935178756714, "learning_rate": 4.2385095324507024e-06, "loss": 0.4149, "step": 7490 }, { "epoch": 0.4494510109797804, "grad_norm": 1.157394289970398, "learning_rate": 4.237844665476448e-06, "loss": 0.3546, "step": 7491 }, { "epoch": 0.4495110097798044, "grad_norm": 1.3723313808441162, "learning_rate": 4.23717977063716e-06, "loss": 0.4499, "step": 7492 }, { "epoch": 0.4495710085798284, "grad_norm": 1.2210358381271362, "learning_rate": 4.236514847957951e-06, "loss": 0.3826, "step": 7493 }, { "epoch": 0.4496310073798524, "grad_norm": 1.323914647102356, "learning_rate": 4.235849897463931e-06, "loss": 0.4103, "step": 7494 }, { "epoch": 0.4496910061798764, "grad_norm": 1.3074736595153809, "learning_rate": 4.235184919180215e-06, "loss": 0.3951, "step": 7495 }, { "epoch": 0.4497510049799004, "grad_norm": 1.319399356842041, "learning_rate": 4.234519913131913e-06, "loss": 0.4685, "step": 7496 }, { "epoch": 0.4498110037799244, "grad_norm": 1.2109417915344238, "learning_rate": 4.233854879344141e-06, "loss": 0.3537, "step": 7497 }, { "epoch": 0.4498710025799484, "grad_norm": 1.3536763191223145, "learning_rate": 4.233189817842013e-06, "loss": 0.437, "step": 7498 }, { "epoch": 0.4499310013799724, "grad_norm": 1.3459535837173462, "learning_rate": 4.2325247286506454e-06, "loss": 0.3934, "step": 7499 }, { "epoch": 0.4499910001799964, "grad_norm": 1.2023531198501587, "learning_rate": 4.231859611795157e-06, "loss": 0.3924, "step": 7500 }, { "epoch": 0.4500509989800204, "grad_norm": 1.1507484912872314, "learning_rate": 4.231194467300665e-06, "loss": 0.3708, "step": 7501 }, { "epoch": 0.4501109977800444, "grad_norm": 1.2163670063018799, "learning_rate": 4.23052929519229e-06, "loss": 0.37, "step": 7502 }, { "epoch": 0.45017099658006837, "grad_norm": 1.1034619808197021, "learning_rate": 4.2298640954951516e-06, "loss": 0.3978, "step": 7503 }, { "epoch": 0.4502309953800924, "grad_norm": 1.3116717338562012, "learning_rate": 4.229198868234371e-06, "loss": 0.4418, "step": 7504 }, { "epoch": 0.4502909941801164, "grad_norm": 1.1385692358016968, "learning_rate": 4.228533613435072e-06, "loss": 0.3635, "step": 7505 }, { "epoch": 0.4503509929801404, "grad_norm": 1.332765817642212, "learning_rate": 4.227868331122377e-06, "loss": 0.4401, "step": 7506 }, { "epoch": 0.4504109917801644, "grad_norm": 1.1098378896713257, "learning_rate": 4.2272030213214115e-06, "loss": 0.387, "step": 7507 }, { "epoch": 0.4504709905801884, "grad_norm": 1.1988909244537354, "learning_rate": 4.226537684057302e-06, "loss": 0.4102, "step": 7508 }, { "epoch": 0.4505309893802124, "grad_norm": 1.181193232536316, "learning_rate": 4.225872319355173e-06, "loss": 0.386, "step": 7509 }, { "epoch": 0.4505909881802364, "grad_norm": 1.2572425603866577, "learning_rate": 4.225206927240154e-06, "loss": 0.4462, "step": 7510 }, { "epoch": 0.4506509869802604, "grad_norm": 1.2393349409103394, "learning_rate": 4.2245415077373725e-06, "loss": 0.3752, "step": 7511 }, { "epoch": 0.4507109857802844, "grad_norm": 1.31586492061615, "learning_rate": 4.223876060871961e-06, "loss": 0.4526, "step": 7512 }, { "epoch": 0.4507709845803084, "grad_norm": 1.2983225584030151, "learning_rate": 4.223210586669047e-06, "loss": 0.4202, "step": 7513 }, { "epoch": 0.4508309833803324, "grad_norm": 1.2316806316375732, "learning_rate": 4.222545085153765e-06, "loss": 0.365, "step": 7514 }, { "epoch": 0.4508909821803564, "grad_norm": 1.2914661169052124, "learning_rate": 4.221879556351248e-06, "loss": 0.4511, "step": 7515 }, { "epoch": 0.4509509809803804, "grad_norm": 1.3497081995010376, "learning_rate": 4.221214000286627e-06, "loss": 0.3947, "step": 7516 }, { "epoch": 0.4510109797804044, "grad_norm": 1.16147780418396, "learning_rate": 4.22054841698504e-06, "loss": 0.3581, "step": 7517 }, { "epoch": 0.4510709785804284, "grad_norm": 1.2877341508865356, "learning_rate": 4.219882806471622e-06, "loss": 0.4521, "step": 7518 }, { "epoch": 0.4511309773804524, "grad_norm": 1.2851791381835938, "learning_rate": 4.2192171687715104e-06, "loss": 0.4006, "step": 7519 }, { "epoch": 0.4511909761804764, "grad_norm": 1.3274550437927246, "learning_rate": 4.218551503909843e-06, "loss": 0.4513, "step": 7520 }, { "epoch": 0.4512509749805004, "grad_norm": 1.3039820194244385, "learning_rate": 4.217885811911758e-06, "loss": 0.4541, "step": 7521 }, { "epoch": 0.45131097378052437, "grad_norm": 1.1834993362426758, "learning_rate": 4.217220092802398e-06, "loss": 0.4367, "step": 7522 }, { "epoch": 0.45137097258054837, "grad_norm": 1.1408756971359253, "learning_rate": 4.216554346606901e-06, "loss": 0.3716, "step": 7523 }, { "epoch": 0.45143097138057237, "grad_norm": 1.3110008239746094, "learning_rate": 4.215888573350413e-06, "loss": 0.4026, "step": 7524 }, { "epoch": 0.45149097018059636, "grad_norm": 1.2558866739273071, "learning_rate": 4.2152227730580745e-06, "loss": 0.4079, "step": 7525 }, { "epoch": 0.4515509689806204, "grad_norm": 1.3361979722976685, "learning_rate": 4.214556945755031e-06, "loss": 0.4287, "step": 7526 }, { "epoch": 0.4516109677806444, "grad_norm": 1.253225564956665, "learning_rate": 4.213891091466426e-06, "loss": 0.4174, "step": 7527 }, { "epoch": 0.4516709665806684, "grad_norm": 1.25124990940094, "learning_rate": 4.2132252102174074e-06, "loss": 0.3719, "step": 7528 }, { "epoch": 0.4517309653806924, "grad_norm": 1.3251922130584717, "learning_rate": 4.2125593020331225e-06, "loss": 0.3895, "step": 7529 }, { "epoch": 0.4517909641807164, "grad_norm": 1.2565133571624756, "learning_rate": 4.2118933669387185e-06, "loss": 0.39, "step": 7530 }, { "epoch": 0.4518509629807404, "grad_norm": 1.197537899017334, "learning_rate": 4.2112274049593464e-06, "loss": 0.4111, "step": 7531 }, { "epoch": 0.4519109617807644, "grad_norm": 1.094895601272583, "learning_rate": 4.210561416120153e-06, "loss": 0.4258, "step": 7532 }, { "epoch": 0.4519709605807884, "grad_norm": 1.2284647226333618, "learning_rate": 4.209895400446295e-06, "loss": 0.4126, "step": 7533 }, { "epoch": 0.4520309593808124, "grad_norm": 1.3140335083007812, "learning_rate": 4.20922935796292e-06, "loss": 0.3955, "step": 7534 }, { "epoch": 0.4520909581808364, "grad_norm": 1.2138477563858032, "learning_rate": 4.208563288695186e-06, "loss": 0.4156, "step": 7535 }, { "epoch": 0.4521509569808604, "grad_norm": 1.241522192955017, "learning_rate": 4.207897192668242e-06, "loss": 0.382, "step": 7536 }, { "epoch": 0.4522109557808844, "grad_norm": 1.3616734743118286, "learning_rate": 4.207231069907248e-06, "loss": 0.4497, "step": 7537 }, { "epoch": 0.4522709545809084, "grad_norm": 1.3891047239303589, "learning_rate": 4.206564920437357e-06, "loss": 0.4431, "step": 7538 }, { "epoch": 0.4523309533809324, "grad_norm": 1.3070205450057983, "learning_rate": 4.20589874428373e-06, "loss": 0.4328, "step": 7539 }, { "epoch": 0.4523909521809564, "grad_norm": 1.3934866189956665, "learning_rate": 4.205232541471521e-06, "loss": 0.4321, "step": 7540 }, { "epoch": 0.45245095098098037, "grad_norm": 1.296224594116211, "learning_rate": 4.204566312025895e-06, "loss": 0.392, "step": 7541 }, { "epoch": 0.45251094978100437, "grad_norm": 1.197989583015442, "learning_rate": 4.203900055972007e-06, "loss": 0.3539, "step": 7542 }, { "epoch": 0.45257094858102836, "grad_norm": 1.2714816331863403, "learning_rate": 4.203233773335022e-06, "loss": 0.4402, "step": 7543 }, { "epoch": 0.45263094738105236, "grad_norm": 1.3226546049118042, "learning_rate": 4.202567464140101e-06, "loss": 0.3832, "step": 7544 }, { "epoch": 0.45269094618107636, "grad_norm": 1.2964104413986206, "learning_rate": 4.201901128412409e-06, "loss": 0.4069, "step": 7545 }, { "epoch": 0.45275094498110036, "grad_norm": 1.173964262008667, "learning_rate": 4.201234766177107e-06, "loss": 0.3759, "step": 7546 }, { "epoch": 0.45281094378112435, "grad_norm": 1.408733606338501, "learning_rate": 4.200568377459364e-06, "loss": 0.4625, "step": 7547 }, { "epoch": 0.45287094258114835, "grad_norm": 1.2882652282714844, "learning_rate": 4.199901962284345e-06, "loss": 0.4064, "step": 7548 }, { "epoch": 0.4529309413811724, "grad_norm": 1.197381615638733, "learning_rate": 4.199235520677218e-06, "loss": 0.4323, "step": 7549 }, { "epoch": 0.4529909401811964, "grad_norm": 1.3145904541015625, "learning_rate": 4.19856905266315e-06, "loss": 0.3527, "step": 7550 }, { "epoch": 0.4530509389812204, "grad_norm": 1.303025484085083, "learning_rate": 4.197902558267312e-06, "loss": 0.44, "step": 7551 }, { "epoch": 0.4531109377812444, "grad_norm": 1.2564164400100708, "learning_rate": 4.1972360375148745e-06, "loss": 0.4028, "step": 7552 }, { "epoch": 0.4531709365812684, "grad_norm": 1.2500958442687988, "learning_rate": 4.196569490431008e-06, "loss": 0.4347, "step": 7553 }, { "epoch": 0.4532309353812924, "grad_norm": 1.2730587720870972, "learning_rate": 4.195902917040886e-06, "loss": 0.4512, "step": 7554 }, { "epoch": 0.4532909341813164, "grad_norm": 1.215038537979126, "learning_rate": 4.19523631736968e-06, "loss": 0.3782, "step": 7555 }, { "epoch": 0.4533509329813404, "grad_norm": 1.2971489429473877, "learning_rate": 4.194569691442567e-06, "loss": 0.405, "step": 7556 }, { "epoch": 0.4534109317813644, "grad_norm": 1.1688241958618164, "learning_rate": 4.19390303928472e-06, "loss": 0.376, "step": 7557 }, { "epoch": 0.4534709305813884, "grad_norm": 1.2085788249969482, "learning_rate": 4.193236360921318e-06, "loss": 0.4358, "step": 7558 }, { "epoch": 0.4535309293814124, "grad_norm": 1.2357581853866577, "learning_rate": 4.192569656377537e-06, "loss": 0.3687, "step": 7559 }, { "epoch": 0.45359092818143637, "grad_norm": 1.2559587955474854, "learning_rate": 4.1919029256785555e-06, "loss": 0.4367, "step": 7560 }, { "epoch": 0.45365092698146037, "grad_norm": 1.2619414329528809, "learning_rate": 4.191236168849552e-06, "loss": 0.4368, "step": 7561 }, { "epoch": 0.45371092578148436, "grad_norm": 1.3329191207885742, "learning_rate": 4.190569385915708e-06, "loss": 0.4531, "step": 7562 }, { "epoch": 0.45377092458150836, "grad_norm": 1.2164721488952637, "learning_rate": 4.189902576902204e-06, "loss": 0.3797, "step": 7563 }, { "epoch": 0.45383092338153236, "grad_norm": 1.2819784879684448, "learning_rate": 4.189235741834224e-06, "loss": 0.4696, "step": 7564 }, { "epoch": 0.45389092218155636, "grad_norm": 1.2125391960144043, "learning_rate": 4.188568880736949e-06, "loss": 0.4165, "step": 7565 }, { "epoch": 0.45395092098158035, "grad_norm": 1.166181206703186, "learning_rate": 4.187901993635565e-06, "loss": 0.4698, "step": 7566 }, { "epoch": 0.45401091978160435, "grad_norm": 1.24530827999115, "learning_rate": 4.187235080555256e-06, "loss": 0.4581, "step": 7567 }, { "epoch": 0.45407091858162835, "grad_norm": 1.338752031326294, "learning_rate": 4.186568141521211e-06, "loss": 0.423, "step": 7568 }, { "epoch": 0.45413091738165234, "grad_norm": 1.116832971572876, "learning_rate": 4.185901176558613e-06, "loss": 0.4512, "step": 7569 }, { "epoch": 0.45419091618167634, "grad_norm": 1.2308954000473022, "learning_rate": 4.185234185692654e-06, "loss": 0.3921, "step": 7570 }, { "epoch": 0.4542509149817004, "grad_norm": 1.2472734451293945, "learning_rate": 4.184567168948521e-06, "loss": 0.4276, "step": 7571 }, { "epoch": 0.4543109137817244, "grad_norm": 1.3200451135635376, "learning_rate": 4.1839001263514066e-06, "loss": 0.4128, "step": 7572 }, { "epoch": 0.4543709125817484, "grad_norm": 1.2407439947128296, "learning_rate": 4.183233057926498e-06, "loss": 0.4314, "step": 7573 }, { "epoch": 0.4544309113817724, "grad_norm": 1.320799708366394, "learning_rate": 4.182565963698992e-06, "loss": 0.4593, "step": 7574 }, { "epoch": 0.4544909101817964, "grad_norm": 1.2078003883361816, "learning_rate": 4.1818988436940764e-06, "loss": 0.3942, "step": 7575 }, { "epoch": 0.4545509089818204, "grad_norm": 1.2700128555297852, "learning_rate": 4.18123169793695e-06, "loss": 0.4351, "step": 7576 }, { "epoch": 0.4546109077818444, "grad_norm": 1.1780891418457031, "learning_rate": 4.180564526452806e-06, "loss": 0.4456, "step": 7577 }, { "epoch": 0.4546709065818684, "grad_norm": 1.096773386001587, "learning_rate": 4.1798973292668395e-06, "loss": 0.4343, "step": 7578 }, { "epoch": 0.45473090538189237, "grad_norm": 1.3487924337387085, "learning_rate": 4.179230106404249e-06, "loss": 0.4372, "step": 7579 }, { "epoch": 0.45479090418191637, "grad_norm": 1.2814514636993408, "learning_rate": 4.178562857890232e-06, "loss": 0.3855, "step": 7580 }, { "epoch": 0.45485090298194036, "grad_norm": 1.2293264865875244, "learning_rate": 4.1778955837499876e-06, "loss": 0.3964, "step": 7581 }, { "epoch": 0.45491090178196436, "grad_norm": 1.3468866348266602, "learning_rate": 4.177228284008715e-06, "loss": 0.4402, "step": 7582 }, { "epoch": 0.45497090058198836, "grad_norm": 1.3219023942947388, "learning_rate": 4.176560958691617e-06, "loss": 0.4561, "step": 7583 }, { "epoch": 0.45503089938201235, "grad_norm": 1.2073606252670288, "learning_rate": 4.175893607823892e-06, "loss": 0.3957, "step": 7584 }, { "epoch": 0.45509089818203635, "grad_norm": 1.235475778579712, "learning_rate": 4.175226231430747e-06, "loss": 0.4016, "step": 7585 }, { "epoch": 0.45515089698206035, "grad_norm": 1.3471380472183228, "learning_rate": 4.174558829537382e-06, "loss": 0.4151, "step": 7586 }, { "epoch": 0.45521089578208435, "grad_norm": 1.177724003791809, "learning_rate": 4.1738914021690025e-06, "loss": 0.3765, "step": 7587 }, { "epoch": 0.45527089458210834, "grad_norm": 1.4518290758132935, "learning_rate": 4.173223949350816e-06, "loss": 0.4145, "step": 7588 }, { "epoch": 0.45533089338213234, "grad_norm": 1.251826286315918, "learning_rate": 4.172556471108029e-06, "loss": 0.3648, "step": 7589 }, { "epoch": 0.45539089218215634, "grad_norm": 1.332567811012268, "learning_rate": 4.171888967465846e-06, "loss": 0.451, "step": 7590 }, { "epoch": 0.45545089098218033, "grad_norm": 1.1941862106323242, "learning_rate": 4.171221438449481e-06, "loss": 0.4033, "step": 7591 }, { "epoch": 0.45551088978220433, "grad_norm": 1.2940932512283325, "learning_rate": 4.170553884084137e-06, "loss": 0.4197, "step": 7592 }, { "epoch": 0.45557088858222833, "grad_norm": 1.2047022581100464, "learning_rate": 4.16988630439503e-06, "loss": 0.4107, "step": 7593 }, { "epoch": 0.4556308873822524, "grad_norm": 1.3268760442733765, "learning_rate": 4.169218699407369e-06, "loss": 0.416, "step": 7594 }, { "epoch": 0.4556908861822764, "grad_norm": 1.2320975065231323, "learning_rate": 4.168551069146366e-06, "loss": 0.3993, "step": 7595 }, { "epoch": 0.4557508849823004, "grad_norm": 1.3683154582977295, "learning_rate": 4.167883413637236e-06, "loss": 0.4452, "step": 7596 }, { "epoch": 0.45581088378232437, "grad_norm": 1.4030567407608032, "learning_rate": 4.1672157329051915e-06, "loss": 0.4197, "step": 7597 }, { "epoch": 0.45587088258234837, "grad_norm": 1.3542267084121704, "learning_rate": 4.166548026975448e-06, "loss": 0.3797, "step": 7598 }, { "epoch": 0.45593088138237237, "grad_norm": 1.248740553855896, "learning_rate": 4.165880295873224e-06, "loss": 0.4371, "step": 7599 }, { "epoch": 0.45599088018239636, "grad_norm": 1.2432748079299927, "learning_rate": 4.165212539623734e-06, "loss": 0.3879, "step": 7600 }, { "epoch": 0.45605087898242036, "grad_norm": 1.2174022197723389, "learning_rate": 4.164544758252198e-06, "loss": 0.4242, "step": 7601 }, { "epoch": 0.45611087778244436, "grad_norm": 1.2230010032653809, "learning_rate": 4.163876951783832e-06, "loss": 0.4243, "step": 7602 }, { "epoch": 0.45617087658246835, "grad_norm": 1.27876877784729, "learning_rate": 4.16320912024386e-06, "loss": 0.4366, "step": 7603 }, { "epoch": 0.45623087538249235, "grad_norm": 1.1763105392456055, "learning_rate": 4.162541263657501e-06, "loss": 0.41, "step": 7604 }, { "epoch": 0.45629087418251635, "grad_norm": 1.359188437461853, "learning_rate": 4.161873382049976e-06, "loss": 0.4069, "step": 7605 }, { "epoch": 0.45635087298254035, "grad_norm": 1.5626786947250366, "learning_rate": 4.16120547544651e-06, "loss": 0.4125, "step": 7606 }, { "epoch": 0.45641087178256434, "grad_norm": 1.3424752950668335, "learning_rate": 4.160537543872325e-06, "loss": 0.4561, "step": 7607 }, { "epoch": 0.45647087058258834, "grad_norm": 1.280523419380188, "learning_rate": 4.159869587352647e-06, "loss": 0.4421, "step": 7608 }, { "epoch": 0.45653086938261234, "grad_norm": 1.305795431137085, "learning_rate": 4.1592016059127e-06, "loss": 0.4392, "step": 7609 }, { "epoch": 0.45659086818263633, "grad_norm": 1.1842223405838013, "learning_rate": 4.158533599577712e-06, "loss": 0.3982, "step": 7610 }, { "epoch": 0.45665086698266033, "grad_norm": 1.2492057085037231, "learning_rate": 4.15786556837291e-06, "loss": 0.4197, "step": 7611 }, { "epoch": 0.4567108657826843, "grad_norm": 1.2046914100646973, "learning_rate": 4.1571975123235235e-06, "loss": 0.3585, "step": 7612 }, { "epoch": 0.4567708645827083, "grad_norm": 1.2321562767028809, "learning_rate": 4.15652943145478e-06, "loss": 0.3796, "step": 7613 }, { "epoch": 0.4568308633827323, "grad_norm": 1.391931414604187, "learning_rate": 4.155861325791912e-06, "loss": 0.4129, "step": 7614 }, { "epoch": 0.4568908621827563, "grad_norm": 1.3630260229110718, "learning_rate": 4.1551931953601495e-06, "loss": 0.4758, "step": 7615 }, { "epoch": 0.45695086098278037, "grad_norm": 1.3060262203216553, "learning_rate": 4.154525040184724e-06, "loss": 0.4307, "step": 7616 }, { "epoch": 0.45701085978280437, "grad_norm": 1.1823419332504272, "learning_rate": 4.153856860290871e-06, "loss": 0.3754, "step": 7617 }, { "epoch": 0.45707085858282837, "grad_norm": 1.161393404006958, "learning_rate": 4.153188655703825e-06, "loss": 0.3899, "step": 7618 }, { "epoch": 0.45713085738285236, "grad_norm": 1.2829866409301758, "learning_rate": 4.152520426448816e-06, "loss": 0.4455, "step": 7619 }, { "epoch": 0.45719085618287636, "grad_norm": 1.3089275360107422, "learning_rate": 4.151852172551085e-06, "loss": 0.4665, "step": 7620 }, { "epoch": 0.45725085498290036, "grad_norm": 1.1739170551300049, "learning_rate": 4.151183894035868e-06, "loss": 0.3607, "step": 7621 }, { "epoch": 0.45731085378292435, "grad_norm": 1.1671587228775024, "learning_rate": 4.150515590928401e-06, "loss": 0.359, "step": 7622 }, { "epoch": 0.45737085258294835, "grad_norm": 1.2110968828201294, "learning_rate": 4.149847263253925e-06, "loss": 0.4287, "step": 7623 }, { "epoch": 0.45743085138297235, "grad_norm": 1.32047438621521, "learning_rate": 4.149178911037677e-06, "loss": 0.3987, "step": 7624 }, { "epoch": 0.45749085018299634, "grad_norm": 1.4057965278625488, "learning_rate": 4.1485105343049e-06, "loss": 0.4063, "step": 7625 }, { "epoch": 0.45755084898302034, "grad_norm": 1.2483922243118286, "learning_rate": 4.147842133080835e-06, "loss": 0.3884, "step": 7626 }, { "epoch": 0.45761084778304434, "grad_norm": 1.1868445873260498, "learning_rate": 4.147173707390724e-06, "loss": 0.3755, "step": 7627 }, { "epoch": 0.45767084658306834, "grad_norm": 1.2176250219345093, "learning_rate": 4.14650525725981e-06, "loss": 0.436, "step": 7628 }, { "epoch": 0.45773084538309233, "grad_norm": 1.3893135786056519, "learning_rate": 4.145836782713339e-06, "loss": 0.4114, "step": 7629 }, { "epoch": 0.45779084418311633, "grad_norm": 1.307724118232727, "learning_rate": 4.145168283776554e-06, "loss": 0.4451, "step": 7630 }, { "epoch": 0.4578508429831403, "grad_norm": 1.4151508808135986, "learning_rate": 4.144499760474703e-06, "loss": 0.4296, "step": 7631 }, { "epoch": 0.4579108417831643, "grad_norm": 1.2602040767669678, "learning_rate": 4.143831212833032e-06, "loss": 0.398, "step": 7632 }, { "epoch": 0.4579708405831883, "grad_norm": 1.2249408960342407, "learning_rate": 4.14316264087679e-06, "loss": 0.3764, "step": 7633 }, { "epoch": 0.4580308393832123, "grad_norm": 1.2329907417297363, "learning_rate": 4.142494044631225e-06, "loss": 0.4246, "step": 7634 }, { "epoch": 0.4580908381832363, "grad_norm": 1.2119324207305908, "learning_rate": 4.141825424121586e-06, "loss": 0.4194, "step": 7635 }, { "epoch": 0.4581508369832603, "grad_norm": 1.3597654104232788, "learning_rate": 4.141156779373127e-06, "loss": 0.4343, "step": 7636 }, { "epoch": 0.4582108357832843, "grad_norm": 1.179743766784668, "learning_rate": 4.140488110411096e-06, "loss": 0.4166, "step": 7637 }, { "epoch": 0.4582708345833083, "grad_norm": 1.3724726438522339, "learning_rate": 4.139819417260747e-06, "loss": 0.3987, "step": 7638 }, { "epoch": 0.45833083338333236, "grad_norm": 1.2753571271896362, "learning_rate": 4.1391506999473345e-06, "loss": 0.4213, "step": 7639 }, { "epoch": 0.45839083218335636, "grad_norm": 1.3951507806777954, "learning_rate": 4.138481958496111e-06, "loss": 0.4248, "step": 7640 }, { "epoch": 0.45845083098338035, "grad_norm": 1.5357943773269653, "learning_rate": 4.137813192932335e-06, "loss": 0.4481, "step": 7641 }, { "epoch": 0.45851082978340435, "grad_norm": 1.275272011756897, "learning_rate": 4.137144403281259e-06, "loss": 0.3832, "step": 7642 }, { "epoch": 0.45857082858342835, "grad_norm": 1.2488523721694946, "learning_rate": 4.136475589568142e-06, "loss": 0.4001, "step": 7643 }, { "epoch": 0.45863082738345234, "grad_norm": 1.1072427034378052, "learning_rate": 4.135806751818241e-06, "loss": 0.3984, "step": 7644 }, { "epoch": 0.45869082618347634, "grad_norm": 1.382400631904602, "learning_rate": 4.135137890056816e-06, "loss": 0.4367, "step": 7645 }, { "epoch": 0.45875082498350034, "grad_norm": 1.1773631572723389, "learning_rate": 4.134469004309127e-06, "loss": 0.4319, "step": 7646 }, { "epoch": 0.45881082378352434, "grad_norm": 1.3392584323883057, "learning_rate": 4.133800094600434e-06, "loss": 0.4396, "step": 7647 }, { "epoch": 0.45887082258354833, "grad_norm": 1.2719539403915405, "learning_rate": 4.133131160955999e-06, "loss": 0.4256, "step": 7648 }, { "epoch": 0.45893082138357233, "grad_norm": 1.2311075925827026, "learning_rate": 4.132462203401085e-06, "loss": 0.3928, "step": 7649 }, { "epoch": 0.4589908201835963, "grad_norm": 1.290774941444397, "learning_rate": 4.131793221960955e-06, "loss": 0.4019, "step": 7650 }, { "epoch": 0.4590508189836203, "grad_norm": 1.2499873638153076, "learning_rate": 4.131124216660874e-06, "loss": 0.4102, "step": 7651 }, { "epoch": 0.4591108177836443, "grad_norm": 1.1928192377090454, "learning_rate": 4.130455187526106e-06, "loss": 0.3727, "step": 7652 }, { "epoch": 0.4591708165836683, "grad_norm": 1.1537455320358276, "learning_rate": 4.129786134581917e-06, "loss": 0.3924, "step": 7653 }, { "epoch": 0.4592308153836923, "grad_norm": 1.2408270835876465, "learning_rate": 4.129117057853577e-06, "loss": 0.4161, "step": 7654 }, { "epoch": 0.4592908141837163, "grad_norm": 1.2145569324493408, "learning_rate": 4.128447957366351e-06, "loss": 0.3921, "step": 7655 }, { "epoch": 0.4593508129837403, "grad_norm": 1.3046448230743408, "learning_rate": 4.127778833145509e-06, "loss": 0.3885, "step": 7656 }, { "epoch": 0.4594108117837643, "grad_norm": 1.2653318643569946, "learning_rate": 4.12710968521632e-06, "loss": 0.4362, "step": 7657 }, { "epoch": 0.4594708105837883, "grad_norm": 1.2580033540725708, "learning_rate": 4.126440513604056e-06, "loss": 0.4145, "step": 7658 }, { "epoch": 0.4595308093838123, "grad_norm": 1.2130156755447388, "learning_rate": 4.125771318333987e-06, "loss": 0.4204, "step": 7659 }, { "epoch": 0.4595908081838363, "grad_norm": 1.4070261716842651, "learning_rate": 4.125102099431388e-06, "loss": 0.4144, "step": 7660 }, { "epoch": 0.45965080698386035, "grad_norm": 1.2985260486602783, "learning_rate": 4.124432856921528e-06, "loss": 0.4558, "step": 7661 }, { "epoch": 0.45971080578388435, "grad_norm": 1.2172117233276367, "learning_rate": 4.123763590829686e-06, "loss": 0.3725, "step": 7662 }, { "epoch": 0.45977080458390834, "grad_norm": 1.3952741622924805, "learning_rate": 4.123094301181134e-06, "loss": 0.3747, "step": 7663 }, { "epoch": 0.45983080338393234, "grad_norm": 1.3673794269561768, "learning_rate": 4.12242498800115e-06, "loss": 0.4493, "step": 7664 }, { "epoch": 0.45989080218395634, "grad_norm": 1.3588377237319946, "learning_rate": 4.121755651315009e-06, "loss": 0.4196, "step": 7665 }, { "epoch": 0.45995080098398033, "grad_norm": 1.310662031173706, "learning_rate": 4.121086291147991e-06, "loss": 0.4245, "step": 7666 }, { "epoch": 0.46001079978400433, "grad_norm": 1.3078136444091797, "learning_rate": 4.120416907525372e-06, "loss": 0.4182, "step": 7667 }, { "epoch": 0.46007079858402833, "grad_norm": 1.2134783267974854, "learning_rate": 4.119747500472434e-06, "loss": 0.3644, "step": 7668 }, { "epoch": 0.4601307973840523, "grad_norm": 1.2771072387695312, "learning_rate": 4.119078070014456e-06, "loss": 0.4282, "step": 7669 }, { "epoch": 0.4601907961840763, "grad_norm": 1.2982169389724731, "learning_rate": 4.11840861617672e-06, "loss": 0.4061, "step": 7670 }, { "epoch": 0.4602507949841003, "grad_norm": 1.3012244701385498, "learning_rate": 4.117739138984509e-06, "loss": 0.4122, "step": 7671 }, { "epoch": 0.4603107937841243, "grad_norm": 1.3767856359481812, "learning_rate": 4.117069638463104e-06, "loss": 0.434, "step": 7672 }, { "epoch": 0.4603707925841483, "grad_norm": 1.2747265100479126, "learning_rate": 4.11640011463779e-06, "loss": 0.408, "step": 7673 }, { "epoch": 0.4604307913841723, "grad_norm": 1.1581342220306396, "learning_rate": 4.115730567533852e-06, "loss": 0.3757, "step": 7674 }, { "epoch": 0.4604907901841963, "grad_norm": 1.2139314413070679, "learning_rate": 4.115060997176576e-06, "loss": 0.4281, "step": 7675 }, { "epoch": 0.4605507889842203, "grad_norm": 1.2089967727661133, "learning_rate": 4.114391403591249e-06, "loss": 0.4228, "step": 7676 }, { "epoch": 0.4606107877842443, "grad_norm": 1.3999830484390259, "learning_rate": 4.113721786803157e-06, "loss": 0.4598, "step": 7677 }, { "epoch": 0.4606707865842683, "grad_norm": 1.2924264669418335, "learning_rate": 4.113052146837589e-06, "loss": 0.4526, "step": 7678 }, { "epoch": 0.4607307853842923, "grad_norm": 1.313284158706665, "learning_rate": 4.112382483719836e-06, "loss": 0.4377, "step": 7679 }, { "epoch": 0.4607907841843163, "grad_norm": 1.344231367111206, "learning_rate": 4.111712797475184e-06, "loss": 0.4348, "step": 7680 }, { "epoch": 0.4608507829843403, "grad_norm": 1.3699400424957275, "learning_rate": 4.111043088128928e-06, "loss": 0.3897, "step": 7681 }, { "epoch": 0.4609107817843643, "grad_norm": 1.206560730934143, "learning_rate": 4.110373355706359e-06, "loss": 0.4429, "step": 7682 }, { "epoch": 0.46097078058438834, "grad_norm": 1.3088382482528687, "learning_rate": 4.109703600232769e-06, "loss": 0.4038, "step": 7683 }, { "epoch": 0.46103077938441234, "grad_norm": 1.2192133665084839, "learning_rate": 4.1090338217334515e-06, "loss": 0.3884, "step": 7684 }, { "epoch": 0.46109077818443633, "grad_norm": 1.1752972602844238, "learning_rate": 4.108364020233702e-06, "loss": 0.3968, "step": 7685 }, { "epoch": 0.46115077698446033, "grad_norm": 1.1936872005462646, "learning_rate": 4.1076941957588135e-06, "loss": 0.398, "step": 7686 }, { "epoch": 0.46121077578448433, "grad_norm": 1.4468998908996582, "learning_rate": 4.1070243483340855e-06, "loss": 0.4347, "step": 7687 }, { "epoch": 0.4612707745845083, "grad_norm": 1.3129773139953613, "learning_rate": 4.106354477984813e-06, "loss": 0.4042, "step": 7688 }, { "epoch": 0.4613307733845323, "grad_norm": 1.3761954307556152, "learning_rate": 4.105684584736294e-06, "loss": 0.3785, "step": 7689 }, { "epoch": 0.4613907721845563, "grad_norm": 1.1756336688995361, "learning_rate": 4.105014668613828e-06, "loss": 0.4018, "step": 7690 }, { "epoch": 0.4614507709845803, "grad_norm": 1.1313120126724243, "learning_rate": 4.104344729642715e-06, "loss": 0.4049, "step": 7691 }, { "epoch": 0.4615107697846043, "grad_norm": 1.3578953742980957, "learning_rate": 4.103674767848253e-06, "loss": 0.4233, "step": 7692 }, { "epoch": 0.4615707685846283, "grad_norm": 1.1775126457214355, "learning_rate": 4.103004783255747e-06, "loss": 0.422, "step": 7693 }, { "epoch": 0.4616307673846523, "grad_norm": 1.306885838508606, "learning_rate": 4.102334775890498e-06, "loss": 0.4152, "step": 7694 }, { "epoch": 0.4616907661846763, "grad_norm": 1.309962511062622, "learning_rate": 4.101664745777808e-06, "loss": 0.4079, "step": 7695 }, { "epoch": 0.4617507649847003, "grad_norm": 1.3676244020462036, "learning_rate": 4.1009946929429815e-06, "loss": 0.5067, "step": 7696 }, { "epoch": 0.4618107637847243, "grad_norm": 1.2300118207931519, "learning_rate": 4.1003246174113245e-06, "loss": 0.4054, "step": 7697 }, { "epoch": 0.4618707625847483, "grad_norm": 1.2830437421798706, "learning_rate": 4.099654519208141e-06, "loss": 0.4386, "step": 7698 }, { "epoch": 0.4619307613847723, "grad_norm": 1.1778757572174072, "learning_rate": 4.098984398358738e-06, "loss": 0.4164, "step": 7699 }, { "epoch": 0.4619907601847963, "grad_norm": 1.324028491973877, "learning_rate": 4.0983142548884245e-06, "loss": 0.3888, "step": 7700 }, { "epoch": 0.4620507589848203, "grad_norm": 1.2819889783859253, "learning_rate": 4.0976440888225065e-06, "loss": 0.4234, "step": 7701 }, { "epoch": 0.4621107577848443, "grad_norm": 1.2227438688278198, "learning_rate": 4.096973900186295e-06, "loss": 0.4452, "step": 7702 }, { "epoch": 0.4621707565848683, "grad_norm": 1.344131588935852, "learning_rate": 4.096303689005098e-06, "loss": 0.4745, "step": 7703 }, { "epoch": 0.4622307553848923, "grad_norm": 1.3638559579849243, "learning_rate": 4.095633455304227e-06, "loss": 0.4186, "step": 7704 }, { "epoch": 0.4622907541849163, "grad_norm": 1.2965730428695679, "learning_rate": 4.094963199108996e-06, "loss": 0.4085, "step": 7705 }, { "epoch": 0.4623507529849403, "grad_norm": 1.2967150211334229, "learning_rate": 4.094292920444714e-06, "loss": 0.4069, "step": 7706 }, { "epoch": 0.4624107517849643, "grad_norm": 1.324371337890625, "learning_rate": 4.0936226193366965e-06, "loss": 0.4328, "step": 7707 }, { "epoch": 0.4624707505849883, "grad_norm": 1.3155641555786133, "learning_rate": 4.092952295810258e-06, "loss": 0.3906, "step": 7708 }, { "epoch": 0.4625307493850123, "grad_norm": 1.291174054145813, "learning_rate": 4.0922819498907104e-06, "loss": 0.4235, "step": 7709 }, { "epoch": 0.4625907481850363, "grad_norm": 1.3374998569488525, "learning_rate": 4.091611581603375e-06, "loss": 0.4124, "step": 7710 }, { "epoch": 0.4626507469850603, "grad_norm": 1.2948379516601562, "learning_rate": 4.090941190973563e-06, "loss": 0.4031, "step": 7711 }, { "epoch": 0.4627107457850843, "grad_norm": 1.2181099653244019, "learning_rate": 4.090270778026597e-06, "loss": 0.3801, "step": 7712 }, { "epoch": 0.4627707445851083, "grad_norm": 1.1387747526168823, "learning_rate": 4.089600342787791e-06, "loss": 0.3834, "step": 7713 }, { "epoch": 0.4628307433851323, "grad_norm": 1.223989725112915, "learning_rate": 4.0889298852824675e-06, "loss": 0.4126, "step": 7714 }, { "epoch": 0.4628907421851563, "grad_norm": 1.2069553136825562, "learning_rate": 4.088259405535944e-06, "loss": 0.4244, "step": 7715 }, { "epoch": 0.4629507409851803, "grad_norm": 1.4659990072250366, "learning_rate": 4.087588903573544e-06, "loss": 0.4334, "step": 7716 }, { "epoch": 0.4630107397852043, "grad_norm": 1.3552908897399902, "learning_rate": 4.086918379420587e-06, "loss": 0.4611, "step": 7717 }, { "epoch": 0.4630707385852283, "grad_norm": 1.2641605138778687, "learning_rate": 4.086247833102399e-06, "loss": 0.386, "step": 7718 }, { "epoch": 0.4631307373852523, "grad_norm": 1.1717129945755005, "learning_rate": 4.085577264644299e-06, "loss": 0.4083, "step": 7719 }, { "epoch": 0.4631907361852763, "grad_norm": 1.181190013885498, "learning_rate": 4.084906674071616e-06, "loss": 0.4459, "step": 7720 }, { "epoch": 0.4632507349853003, "grad_norm": 1.1699748039245605, "learning_rate": 4.084236061409671e-06, "loss": 0.4049, "step": 7721 }, { "epoch": 0.4633107337853243, "grad_norm": 1.2088297605514526, "learning_rate": 4.083565426683791e-06, "loss": 0.402, "step": 7722 }, { "epoch": 0.4633707325853483, "grad_norm": 1.2055716514587402, "learning_rate": 4.0828947699193055e-06, "loss": 0.4432, "step": 7723 }, { "epoch": 0.4634307313853723, "grad_norm": 1.2829418182373047, "learning_rate": 4.082224091141539e-06, "loss": 0.4577, "step": 7724 }, { "epoch": 0.46349073018539627, "grad_norm": 1.144033670425415, "learning_rate": 4.081553390375822e-06, "loss": 0.4192, "step": 7725 }, { "epoch": 0.46355072898542027, "grad_norm": 1.393157720565796, "learning_rate": 4.080882667647482e-06, "loss": 0.3873, "step": 7726 }, { "epoch": 0.46361072778544427, "grad_norm": 1.2790735960006714, "learning_rate": 4.08021192298185e-06, "loss": 0.3923, "step": 7727 }, { "epoch": 0.4636707265854683, "grad_norm": 1.1852232217788696, "learning_rate": 4.079541156404258e-06, "loss": 0.4082, "step": 7728 }, { "epoch": 0.4637307253854923, "grad_norm": 1.2893515825271606, "learning_rate": 4.078870367940037e-06, "loss": 0.397, "step": 7729 }, { "epoch": 0.4637907241855163, "grad_norm": 1.2407863140106201, "learning_rate": 4.078199557614518e-06, "loss": 0.4354, "step": 7730 }, { "epoch": 0.4638507229855403, "grad_norm": 1.2772026062011719, "learning_rate": 4.077528725453038e-06, "loss": 0.4113, "step": 7731 }, { "epoch": 0.4639107217855643, "grad_norm": 1.2609528303146362, "learning_rate": 4.076857871480929e-06, "loss": 0.3922, "step": 7732 }, { "epoch": 0.4639707205855883, "grad_norm": 1.2198426723480225, "learning_rate": 4.076186995723526e-06, "loss": 0.4579, "step": 7733 }, { "epoch": 0.4640307193856123, "grad_norm": 1.1787172555923462, "learning_rate": 4.075516098206164e-06, "loss": 0.3926, "step": 7734 }, { "epoch": 0.4640907181856363, "grad_norm": 1.3295915126800537, "learning_rate": 4.074845178954182e-06, "loss": 0.3987, "step": 7735 }, { "epoch": 0.4641507169856603, "grad_norm": 1.3177858591079712, "learning_rate": 4.074174237992917e-06, "loss": 0.4287, "step": 7736 }, { "epoch": 0.4642107157856843, "grad_norm": 1.2043516635894775, "learning_rate": 4.073503275347706e-06, "loss": 0.3865, "step": 7737 }, { "epoch": 0.4642707145857083, "grad_norm": 1.2696388959884644, "learning_rate": 4.07283229104389e-06, "loss": 0.4203, "step": 7738 }, { "epoch": 0.4643307133857323, "grad_norm": 1.258575677871704, "learning_rate": 4.072161285106808e-06, "loss": 0.41, "step": 7739 }, { "epoch": 0.4643907121857563, "grad_norm": 1.3738903999328613, "learning_rate": 4.0714902575618e-06, "loss": 0.4004, "step": 7740 }, { "epoch": 0.4644507109857803, "grad_norm": 1.200161099433899, "learning_rate": 4.0708192084342105e-06, "loss": 0.4374, "step": 7741 }, { "epoch": 0.4645107097858043, "grad_norm": 1.316063642501831, "learning_rate": 4.070148137749379e-06, "loss": 0.4275, "step": 7742 }, { "epoch": 0.4645707085858283, "grad_norm": 1.1900960206985474, "learning_rate": 4.06947704553265e-06, "loss": 0.3979, "step": 7743 }, { "epoch": 0.46463070738585227, "grad_norm": 1.4066025018692017, "learning_rate": 4.0688059318093674e-06, "loss": 0.4092, "step": 7744 }, { "epoch": 0.46469070618587627, "grad_norm": 1.2755475044250488, "learning_rate": 4.068134796604877e-06, "loss": 0.4279, "step": 7745 }, { "epoch": 0.46475070498590026, "grad_norm": 1.214789628982544, "learning_rate": 4.067463639944524e-06, "loss": 0.4118, "step": 7746 }, { "epoch": 0.46481070378592426, "grad_norm": 1.3649733066558838, "learning_rate": 4.066792461853654e-06, "loss": 0.4505, "step": 7747 }, { "epoch": 0.46487070258594826, "grad_norm": 1.190732479095459, "learning_rate": 4.066121262357615e-06, "loss": 0.4455, "step": 7748 }, { "epoch": 0.46493070138597226, "grad_norm": 1.2172046899795532, "learning_rate": 4.065450041481755e-06, "loss": 0.3932, "step": 7749 }, { "epoch": 0.46499070018599625, "grad_norm": 1.2230473756790161, "learning_rate": 4.064778799251423e-06, "loss": 0.4194, "step": 7750 }, { "epoch": 0.4650506989860203, "grad_norm": 1.2062296867370605, "learning_rate": 4.064107535691968e-06, "loss": 0.3761, "step": 7751 }, { "epoch": 0.4651106977860443, "grad_norm": 1.1354726552963257, "learning_rate": 4.063436250828744e-06, "loss": 0.3673, "step": 7752 }, { "epoch": 0.4651706965860683, "grad_norm": 1.3696579933166504, "learning_rate": 4.062764944687097e-06, "loss": 0.4689, "step": 7753 }, { "epoch": 0.4652306953860923, "grad_norm": 1.477494478225708, "learning_rate": 4.062093617292384e-06, "loss": 0.4458, "step": 7754 }, { "epoch": 0.4652906941861163, "grad_norm": 1.1818313598632812, "learning_rate": 4.061422268669955e-06, "loss": 0.3876, "step": 7755 }, { "epoch": 0.4653506929861403, "grad_norm": 1.2892574071884155, "learning_rate": 4.060750898845166e-06, "loss": 0.4443, "step": 7756 }, { "epoch": 0.4654106917861643, "grad_norm": 1.2246813774108887, "learning_rate": 4.0600795078433685e-06, "loss": 0.4207, "step": 7757 }, { "epoch": 0.4654706905861883, "grad_norm": 1.2675526142120361, "learning_rate": 4.059408095689921e-06, "loss": 0.412, "step": 7758 }, { "epoch": 0.4655306893862123, "grad_norm": 1.2993969917297363, "learning_rate": 4.058736662410178e-06, "loss": 0.415, "step": 7759 }, { "epoch": 0.4655906881862363, "grad_norm": 1.1417138576507568, "learning_rate": 4.058065208029498e-06, "loss": 0.3749, "step": 7760 }, { "epoch": 0.4656506869862603, "grad_norm": 1.199306607246399, "learning_rate": 4.0573937325732355e-06, "loss": 0.4207, "step": 7761 }, { "epoch": 0.4657106857862843, "grad_norm": 1.248761534690857, "learning_rate": 4.0567222360667515e-06, "loss": 0.415, "step": 7762 }, { "epoch": 0.46577068458630827, "grad_norm": 1.2856502532958984, "learning_rate": 4.056050718535406e-06, "loss": 0.4087, "step": 7763 }, { "epoch": 0.46583068338633227, "grad_norm": 1.4309213161468506, "learning_rate": 4.055379180004558e-06, "loss": 0.4762, "step": 7764 }, { "epoch": 0.46589068218635626, "grad_norm": 1.3499318361282349, "learning_rate": 4.0547076204995695e-06, "loss": 0.3949, "step": 7765 }, { "epoch": 0.46595068098638026, "grad_norm": 1.4329942464828491, "learning_rate": 4.054036040045801e-06, "loss": 0.4699, "step": 7766 }, { "epoch": 0.46601067978640426, "grad_norm": 1.2737858295440674, "learning_rate": 4.053364438668614e-06, "loss": 0.4435, "step": 7767 }, { "epoch": 0.46607067858642826, "grad_norm": 1.2883403301239014, "learning_rate": 4.052692816393374e-06, "loss": 0.4397, "step": 7768 }, { "epoch": 0.46613067738645225, "grad_norm": 1.226344108581543, "learning_rate": 4.0520211732454455e-06, "loss": 0.4174, "step": 7769 }, { "epoch": 0.46619067618647625, "grad_norm": 1.3744597434997559, "learning_rate": 4.051349509250191e-06, "loss": 0.3749, "step": 7770 }, { "epoch": 0.46625067498650025, "grad_norm": 1.3024951219558716, "learning_rate": 4.050677824432979e-06, "loss": 0.4314, "step": 7771 }, { "epoch": 0.46631067378652424, "grad_norm": 1.324105143547058, "learning_rate": 4.050006118819173e-06, "loss": 0.4415, "step": 7772 }, { "epoch": 0.4663706725865483, "grad_norm": 1.0718239545822144, "learning_rate": 4.049334392434141e-06, "loss": 0.3434, "step": 7773 }, { "epoch": 0.4664306713865723, "grad_norm": 1.2814037799835205, "learning_rate": 4.048662645303253e-06, "loss": 0.418, "step": 7774 }, { "epoch": 0.4664906701865963, "grad_norm": 1.292592167854309, "learning_rate": 4.047990877451877e-06, "loss": 0.4479, "step": 7775 }, { "epoch": 0.4665506689866203, "grad_norm": 1.3635656833648682, "learning_rate": 4.047319088905382e-06, "loss": 0.4092, "step": 7776 }, { "epoch": 0.4666106677866443, "grad_norm": 1.209674596786499, "learning_rate": 4.046647279689139e-06, "loss": 0.3864, "step": 7777 }, { "epoch": 0.4666706665866683, "grad_norm": 1.413802981376648, "learning_rate": 4.045975449828517e-06, "loss": 0.4341, "step": 7778 }, { "epoch": 0.4667306653866923, "grad_norm": 1.3187297582626343, "learning_rate": 4.045303599348891e-06, "loss": 0.4632, "step": 7779 }, { "epoch": 0.4667906641867163, "grad_norm": 1.184729814529419, "learning_rate": 4.044631728275631e-06, "loss": 0.3992, "step": 7780 }, { "epoch": 0.4668506629867403, "grad_norm": 1.272701382637024, "learning_rate": 4.043959836634114e-06, "loss": 0.4369, "step": 7781 }, { "epoch": 0.46691066178676427, "grad_norm": 1.4653735160827637, "learning_rate": 4.043287924449711e-06, "loss": 0.4406, "step": 7782 }, { "epoch": 0.46697066058678827, "grad_norm": 1.4347898960113525, "learning_rate": 4.042615991747799e-06, "loss": 0.3731, "step": 7783 }, { "epoch": 0.46703065938681226, "grad_norm": 1.1330608129501343, "learning_rate": 4.041944038553753e-06, "loss": 0.3994, "step": 7784 }, { "epoch": 0.46709065818683626, "grad_norm": 1.2705503702163696, "learning_rate": 4.04127206489295e-06, "loss": 0.4302, "step": 7785 }, { "epoch": 0.46715065698686026, "grad_norm": 1.318629264831543, "learning_rate": 4.040600070790766e-06, "loss": 0.3845, "step": 7786 }, { "epoch": 0.46721065578688425, "grad_norm": 1.2416349649429321, "learning_rate": 4.039928056272581e-06, "loss": 0.4199, "step": 7787 }, { "epoch": 0.46727065458690825, "grad_norm": 1.237642765045166, "learning_rate": 4.039256021363773e-06, "loss": 0.3838, "step": 7788 }, { "epoch": 0.46733065338693225, "grad_norm": 1.2605693340301514, "learning_rate": 4.038583966089722e-06, "loss": 0.397, "step": 7789 }, { "epoch": 0.46739065218695625, "grad_norm": 1.2038640975952148, "learning_rate": 4.037911890475809e-06, "loss": 0.4038, "step": 7790 }, { "epoch": 0.46745065098698024, "grad_norm": 1.1399989128112793, "learning_rate": 4.037239794547414e-06, "loss": 0.3849, "step": 7791 }, { "epoch": 0.46751064978700424, "grad_norm": 1.2141979932785034, "learning_rate": 4.0365676783299205e-06, "loss": 0.4181, "step": 7792 }, { "epoch": 0.46757064858702824, "grad_norm": 1.2562555074691772, "learning_rate": 4.035895541848709e-06, "loss": 0.4223, "step": 7793 }, { "epoch": 0.46763064738705223, "grad_norm": 1.2111260890960693, "learning_rate": 4.035223385129165e-06, "loss": 0.4373, "step": 7794 }, { "epoch": 0.46769064618707623, "grad_norm": 1.3012574911117554, "learning_rate": 4.034551208196672e-06, "loss": 0.4048, "step": 7795 }, { "epoch": 0.4677506449871003, "grad_norm": 1.1708166599273682, "learning_rate": 4.0338790110766165e-06, "loss": 0.3768, "step": 7796 }, { "epoch": 0.4678106437871243, "grad_norm": 1.4088199138641357, "learning_rate": 4.0332067937943815e-06, "loss": 0.4619, "step": 7797 }, { "epoch": 0.4678706425871483, "grad_norm": 1.153076410293579, "learning_rate": 4.032534556375357e-06, "loss": 0.3534, "step": 7798 }, { "epoch": 0.4679306413871723, "grad_norm": 1.198192834854126, "learning_rate": 4.031862298844927e-06, "loss": 0.4061, "step": 7799 }, { "epoch": 0.46799064018719627, "grad_norm": 1.3676573038101196, "learning_rate": 4.031190021228481e-06, "loss": 0.4511, "step": 7800 }, { "epoch": 0.46805063898722027, "grad_norm": 1.3943768739700317, "learning_rate": 4.030517723551409e-06, "loss": 0.4491, "step": 7801 }, { "epoch": 0.46811063778724427, "grad_norm": 1.2371922731399536, "learning_rate": 4.029845405839099e-06, "loss": 0.4423, "step": 7802 }, { "epoch": 0.46817063658726826, "grad_norm": 1.4561433792114258, "learning_rate": 4.029173068116941e-06, "loss": 0.426, "step": 7803 }, { "epoch": 0.46823063538729226, "grad_norm": 1.2908437252044678, "learning_rate": 4.028500710410329e-06, "loss": 0.4339, "step": 7804 }, { "epoch": 0.46829063418731626, "grad_norm": 1.4270542860031128, "learning_rate": 4.0278283327446526e-06, "loss": 0.4653, "step": 7805 }, { "epoch": 0.46835063298734025, "grad_norm": 1.23714017868042, "learning_rate": 4.027155935145304e-06, "loss": 0.3896, "step": 7806 }, { "epoch": 0.46841063178736425, "grad_norm": 1.2299636602401733, "learning_rate": 4.026483517637678e-06, "loss": 0.4096, "step": 7807 }, { "epoch": 0.46847063058738825, "grad_norm": 1.2208341360092163, "learning_rate": 4.0258110802471685e-06, "loss": 0.407, "step": 7808 }, { "epoch": 0.46853062938741225, "grad_norm": 1.265910029411316, "learning_rate": 4.025138622999169e-06, "loss": 0.3874, "step": 7809 }, { "epoch": 0.46859062818743624, "grad_norm": 1.3274052143096924, "learning_rate": 4.024466145919078e-06, "loss": 0.4179, "step": 7810 }, { "epoch": 0.46865062698746024, "grad_norm": 1.333332896232605, "learning_rate": 4.0237936490322885e-06, "loss": 0.4366, "step": 7811 }, { "epoch": 0.46871062578748424, "grad_norm": 1.2354947328567505, "learning_rate": 4.0231211323642e-06, "loss": 0.4335, "step": 7812 }, { "epoch": 0.46877062458750823, "grad_norm": 1.2486035823822021, "learning_rate": 4.022448595940209e-06, "loss": 0.3921, "step": 7813 }, { "epoch": 0.46883062338753223, "grad_norm": 1.1766334772109985, "learning_rate": 4.0217760397857164e-06, "loss": 0.4285, "step": 7814 }, { "epoch": 0.4688906221875562, "grad_norm": 1.2018016576766968, "learning_rate": 4.021103463926119e-06, "loss": 0.3529, "step": 7815 }, { "epoch": 0.4689506209875802, "grad_norm": 1.351528525352478, "learning_rate": 4.020430868386817e-06, "loss": 0.4337, "step": 7816 }, { "epoch": 0.4690106197876042, "grad_norm": 1.2559984922409058, "learning_rate": 4.019758253193213e-06, "loss": 0.4517, "step": 7817 }, { "epoch": 0.4690706185876283, "grad_norm": 1.2420814037322998, "learning_rate": 4.019085618370708e-06, "loss": 0.4469, "step": 7818 }, { "epoch": 0.46913061738765227, "grad_norm": 1.2742997407913208, "learning_rate": 4.0184129639447035e-06, "loss": 0.4502, "step": 7819 }, { "epoch": 0.46919061618767627, "grad_norm": 1.2220187187194824, "learning_rate": 4.017740289940604e-06, "loss": 0.4108, "step": 7820 }, { "epoch": 0.46925061498770027, "grad_norm": 1.1283105611801147, "learning_rate": 4.0170675963838125e-06, "loss": 0.4039, "step": 7821 }, { "epoch": 0.46931061378772426, "grad_norm": 1.2558550834655762, "learning_rate": 4.016394883299733e-06, "loss": 0.432, "step": 7822 }, { "epoch": 0.46937061258774826, "grad_norm": 1.2390235662460327, "learning_rate": 4.015722150713772e-06, "loss": 0.4179, "step": 7823 }, { "epoch": 0.46943061138777226, "grad_norm": 1.357145071029663, "learning_rate": 4.015049398651335e-06, "loss": 0.4316, "step": 7824 }, { "epoch": 0.46949061018779625, "grad_norm": 1.3585902452468872, "learning_rate": 4.014376627137829e-06, "loss": 0.3683, "step": 7825 }, { "epoch": 0.46955060898782025, "grad_norm": 1.2742831707000732, "learning_rate": 4.013703836198661e-06, "loss": 0.3797, "step": 7826 }, { "epoch": 0.46961060778784425, "grad_norm": 1.4120900630950928, "learning_rate": 4.013031025859238e-06, "loss": 0.4374, "step": 7827 }, { "epoch": 0.46967060658786824, "grad_norm": 1.220969557762146, "learning_rate": 4.012358196144972e-06, "loss": 0.3896, "step": 7828 }, { "epoch": 0.46973060538789224, "grad_norm": 1.5057439804077148, "learning_rate": 4.011685347081272e-06, "loss": 0.4097, "step": 7829 }, { "epoch": 0.46979060418791624, "grad_norm": 1.2269468307495117, "learning_rate": 4.011012478693546e-06, "loss": 0.4024, "step": 7830 }, { "epoch": 0.46985060298794024, "grad_norm": 1.143083095550537, "learning_rate": 4.010339591007209e-06, "loss": 0.3959, "step": 7831 }, { "epoch": 0.46991060178796423, "grad_norm": 1.1879510879516602, "learning_rate": 4.009666684047668e-06, "loss": 0.4187, "step": 7832 }, { "epoch": 0.46997060058798823, "grad_norm": 1.1247000694274902, "learning_rate": 4.00899375784034e-06, "loss": 0.404, "step": 7833 }, { "epoch": 0.4700305993880122, "grad_norm": 1.3416575193405151, "learning_rate": 4.008320812410637e-06, "loss": 0.4262, "step": 7834 }, { "epoch": 0.4700905981880362, "grad_norm": 1.2885912656784058, "learning_rate": 4.007647847783972e-06, "loss": 0.4258, "step": 7835 }, { "epoch": 0.4701505969880602, "grad_norm": 1.3366906642913818, "learning_rate": 4.006974863985762e-06, "loss": 0.3963, "step": 7836 }, { "epoch": 0.4702105957880842, "grad_norm": 1.1824781894683838, "learning_rate": 4.00630186104142e-06, "loss": 0.4349, "step": 7837 }, { "epoch": 0.4702705945881082, "grad_norm": 1.425450325012207, "learning_rate": 4.005628838976363e-06, "loss": 0.4275, "step": 7838 }, { "epoch": 0.4703305933881322, "grad_norm": 1.2555264234542847, "learning_rate": 4.004955797816009e-06, "loss": 0.4041, "step": 7839 }, { "epoch": 0.47039059218815626, "grad_norm": 1.3650776147842407, "learning_rate": 4.004282737585775e-06, "loss": 0.3948, "step": 7840 }, { "epoch": 0.47045059098818026, "grad_norm": 1.1824039220809937, "learning_rate": 4.00360965831108e-06, "loss": 0.4019, "step": 7841 }, { "epoch": 0.47051058978820426, "grad_norm": 1.4490147829055786, "learning_rate": 4.002936560017342e-06, "loss": 0.4268, "step": 7842 }, { "epoch": 0.47057058858822826, "grad_norm": 1.3009414672851562, "learning_rate": 4.002263442729982e-06, "loss": 0.4213, "step": 7843 }, { "epoch": 0.47063058738825225, "grad_norm": 1.3361403942108154, "learning_rate": 4.001590306474419e-06, "loss": 0.4664, "step": 7844 }, { "epoch": 0.47069058618827625, "grad_norm": 1.3302030563354492, "learning_rate": 4.000917151276076e-06, "loss": 0.4003, "step": 7845 }, { "epoch": 0.47075058498830025, "grad_norm": 1.312644362449646, "learning_rate": 4.0002439771603755e-06, "loss": 0.4758, "step": 7846 }, { "epoch": 0.47081058378832424, "grad_norm": 1.2797696590423584, "learning_rate": 3.9995707841527375e-06, "loss": 0.4043, "step": 7847 }, { "epoch": 0.47087058258834824, "grad_norm": 1.2030850648880005, "learning_rate": 3.998897572278588e-06, "loss": 0.4311, "step": 7848 }, { "epoch": 0.47093058138837224, "grad_norm": 1.2057489156723022, "learning_rate": 3.99822434156335e-06, "loss": 0.4479, "step": 7849 }, { "epoch": 0.47099058018839624, "grad_norm": 1.3016622066497803, "learning_rate": 3.997551092032447e-06, "loss": 0.413, "step": 7850 }, { "epoch": 0.47105057898842023, "grad_norm": 1.3088688850402832, "learning_rate": 3.996877823711307e-06, "loss": 0.4224, "step": 7851 }, { "epoch": 0.47111057778844423, "grad_norm": 1.3252310752868652, "learning_rate": 3.996204536625357e-06, "loss": 0.4499, "step": 7852 }, { "epoch": 0.4711705765884682, "grad_norm": 1.1878827810287476, "learning_rate": 3.995531230800021e-06, "loss": 0.4491, "step": 7853 }, { "epoch": 0.4712305753884922, "grad_norm": 1.2108708620071411, "learning_rate": 3.994857906260729e-06, "loss": 0.4014, "step": 7854 }, { "epoch": 0.4712905741885162, "grad_norm": 1.430246353149414, "learning_rate": 3.994184563032907e-06, "loss": 0.4871, "step": 7855 }, { "epoch": 0.4713505729885402, "grad_norm": 1.4148006439208984, "learning_rate": 3.993511201141986e-06, "loss": 0.4527, "step": 7856 }, { "epoch": 0.4714105717885642, "grad_norm": 1.2964589595794678, "learning_rate": 3.992837820613395e-06, "loss": 0.4025, "step": 7857 }, { "epoch": 0.4714705705885882, "grad_norm": 1.321449875831604, "learning_rate": 3.992164421472566e-06, "loss": 0.3935, "step": 7858 }, { "epoch": 0.4715305693886122, "grad_norm": 1.1786072254180908, "learning_rate": 3.9914910037449285e-06, "loss": 0.3689, "step": 7859 }, { "epoch": 0.4715905681886362, "grad_norm": 1.3034095764160156, "learning_rate": 3.990817567455915e-06, "loss": 0.3949, "step": 7860 }, { "epoch": 0.4716505669886602, "grad_norm": 1.280686378479004, "learning_rate": 3.990144112630958e-06, "loss": 0.468, "step": 7861 }, { "epoch": 0.4717105657886842, "grad_norm": 1.5109302997589111, "learning_rate": 3.989470639295491e-06, "loss": 0.4325, "step": 7862 }, { "epoch": 0.47177056458870825, "grad_norm": 1.322790265083313, "learning_rate": 3.988797147474949e-06, "loss": 0.3967, "step": 7863 }, { "epoch": 0.47183056338873225, "grad_norm": 1.3040211200714111, "learning_rate": 3.988123637194764e-06, "loss": 0.3902, "step": 7864 }, { "epoch": 0.47189056218875625, "grad_norm": 1.3443371057510376, "learning_rate": 3.987450108480374e-06, "loss": 0.4346, "step": 7865 }, { "epoch": 0.47195056098878024, "grad_norm": 1.2516896724700928, "learning_rate": 3.986776561357215e-06, "loss": 0.4396, "step": 7866 }, { "epoch": 0.47201055978880424, "grad_norm": 1.1020234823226929, "learning_rate": 3.9861029958507216e-06, "loss": 0.3865, "step": 7867 }, { "epoch": 0.47207055858882824, "grad_norm": 1.2577972412109375, "learning_rate": 3.985429411986332e-06, "loss": 0.4475, "step": 7868 }, { "epoch": 0.47213055738885223, "grad_norm": 1.252329707145691, "learning_rate": 3.984755809789488e-06, "loss": 0.3974, "step": 7869 }, { "epoch": 0.47219055618887623, "grad_norm": 1.3365795612335205, "learning_rate": 3.984082189285623e-06, "loss": 0.4266, "step": 7870 }, { "epoch": 0.47225055498890023, "grad_norm": 1.2511197328567505, "learning_rate": 3.983408550500181e-06, "loss": 0.4166, "step": 7871 }, { "epoch": 0.4723105537889242, "grad_norm": 1.3536988496780396, "learning_rate": 3.982734893458599e-06, "loss": 0.4062, "step": 7872 }, { "epoch": 0.4723705525889482, "grad_norm": 1.1335681676864624, "learning_rate": 3.98206121818632e-06, "loss": 0.3752, "step": 7873 }, { "epoch": 0.4724305513889722, "grad_norm": 1.329195499420166, "learning_rate": 3.981387524708785e-06, "loss": 0.4167, "step": 7874 }, { "epoch": 0.4724905501889962, "grad_norm": 1.2489053010940552, "learning_rate": 3.980713813051437e-06, "loss": 0.3854, "step": 7875 }, { "epoch": 0.4725505489890202, "grad_norm": 1.278303861618042, "learning_rate": 3.980040083239718e-06, "loss": 0.3968, "step": 7876 }, { "epoch": 0.4726105477890442, "grad_norm": 1.3126097917556763, "learning_rate": 3.979366335299072e-06, "loss": 0.3895, "step": 7877 }, { "epoch": 0.4726705465890682, "grad_norm": 1.2770825624465942, "learning_rate": 3.978692569254943e-06, "loss": 0.4002, "step": 7878 }, { "epoch": 0.4727305453890922, "grad_norm": 1.2726874351501465, "learning_rate": 3.978018785132777e-06, "loss": 0.4448, "step": 7879 }, { "epoch": 0.4727905441891162, "grad_norm": 1.3155885934829712, "learning_rate": 3.977344982958019e-06, "loss": 0.4707, "step": 7880 }, { "epoch": 0.4728505429891402, "grad_norm": 1.1949018239974976, "learning_rate": 3.976671162756117e-06, "loss": 0.4106, "step": 7881 }, { "epoch": 0.4729105417891642, "grad_norm": 1.3237395286560059, "learning_rate": 3.975997324552515e-06, "loss": 0.4116, "step": 7882 }, { "epoch": 0.4729705405891882, "grad_norm": 1.200024962425232, "learning_rate": 3.975323468372665e-06, "loss": 0.3939, "step": 7883 }, { "epoch": 0.4730305393892122, "grad_norm": 1.213831901550293, "learning_rate": 3.974649594242012e-06, "loss": 0.3833, "step": 7884 }, { "epoch": 0.47309053818923624, "grad_norm": 1.270362138748169, "learning_rate": 3.973975702186006e-06, "loss": 0.437, "step": 7885 }, { "epoch": 0.47315053698926024, "grad_norm": 1.3850399255752563, "learning_rate": 3.973301792230098e-06, "loss": 0.4346, "step": 7886 }, { "epoch": 0.47321053578928424, "grad_norm": 1.3694583177566528, "learning_rate": 3.972627864399737e-06, "loss": 0.3884, "step": 7887 }, { "epoch": 0.47327053458930823, "grad_norm": 1.4825408458709717, "learning_rate": 3.971953918720375e-06, "loss": 0.4279, "step": 7888 }, { "epoch": 0.47333053338933223, "grad_norm": 1.1737626791000366, "learning_rate": 3.971279955217463e-06, "loss": 0.4282, "step": 7889 }, { "epoch": 0.47339053218935623, "grad_norm": 1.4075090885162354, "learning_rate": 3.970605973916455e-06, "loss": 0.4421, "step": 7890 }, { "epoch": 0.4734505309893802, "grad_norm": 1.265276312828064, "learning_rate": 3.969931974842803e-06, "loss": 0.4371, "step": 7891 }, { "epoch": 0.4735105297894042, "grad_norm": 1.1881991624832153, "learning_rate": 3.9692579580219615e-06, "loss": 0.3714, "step": 7892 }, { "epoch": 0.4735705285894282, "grad_norm": 1.301741361618042, "learning_rate": 3.968583923479385e-06, "loss": 0.4324, "step": 7893 }, { "epoch": 0.4736305273894522, "grad_norm": 1.226029634475708, "learning_rate": 3.967909871240529e-06, "loss": 0.4384, "step": 7894 }, { "epoch": 0.4736905261894762, "grad_norm": 1.2603187561035156, "learning_rate": 3.967235801330848e-06, "loss": 0.3691, "step": 7895 }, { "epoch": 0.4737505249895002, "grad_norm": 1.2783639430999756, "learning_rate": 3.966561713775801e-06, "loss": 0.4013, "step": 7896 }, { "epoch": 0.4738105237895242, "grad_norm": 1.32949960231781, "learning_rate": 3.9658876086008415e-06, "loss": 0.4202, "step": 7897 }, { "epoch": 0.4738705225895482, "grad_norm": 1.2824573516845703, "learning_rate": 3.965213485831432e-06, "loss": 0.417, "step": 7898 }, { "epoch": 0.4739305213895722, "grad_norm": 1.2611329555511475, "learning_rate": 3.964539345493027e-06, "loss": 0.3777, "step": 7899 }, { "epoch": 0.4739905201895962, "grad_norm": 1.3533389568328857, "learning_rate": 3.963865187611088e-06, "loss": 0.398, "step": 7900 }, { "epoch": 0.4740505189896202, "grad_norm": 1.208619475364685, "learning_rate": 3.963191012211074e-06, "loss": 0.3733, "step": 7901 }, { "epoch": 0.4741105177896442, "grad_norm": 1.3462650775909424, "learning_rate": 3.962516819318446e-06, "loss": 0.4411, "step": 7902 }, { "epoch": 0.4741705165896682, "grad_norm": 1.2476415634155273, "learning_rate": 3.961842608958664e-06, "loss": 0.3488, "step": 7903 }, { "epoch": 0.4742305153896922, "grad_norm": 1.3620413541793823, "learning_rate": 3.961168381157192e-06, "loss": 0.3743, "step": 7904 }, { "epoch": 0.4742905141897162, "grad_norm": 1.1603307723999023, "learning_rate": 3.960494135939491e-06, "loss": 0.4141, "step": 7905 }, { "epoch": 0.4743505129897402, "grad_norm": 1.3147696256637573, "learning_rate": 3.959819873331025e-06, "loss": 0.4347, "step": 7906 }, { "epoch": 0.4744105117897642, "grad_norm": 1.4104341268539429, "learning_rate": 3.959145593357256e-06, "loss": 0.3557, "step": 7907 }, { "epoch": 0.47447051058978823, "grad_norm": 1.116996169090271, "learning_rate": 3.9584712960436505e-06, "loss": 0.3617, "step": 7908 }, { "epoch": 0.4745305093898122, "grad_norm": 1.1898671388626099, "learning_rate": 3.957796981415673e-06, "loss": 0.4201, "step": 7909 }, { "epoch": 0.4745905081898362, "grad_norm": 1.2918860912322998, "learning_rate": 3.95712264949879e-06, "loss": 0.4386, "step": 7910 }, { "epoch": 0.4746505069898602, "grad_norm": 1.0912742614746094, "learning_rate": 3.956448300318466e-06, "loss": 0.3964, "step": 7911 }, { "epoch": 0.4747105057898842, "grad_norm": 1.1673235893249512, "learning_rate": 3.9557739339001695e-06, "loss": 0.4265, "step": 7912 }, { "epoch": 0.4747705045899082, "grad_norm": 1.2600687742233276, "learning_rate": 3.955099550269368e-06, "loss": 0.444, "step": 7913 }, { "epoch": 0.4748305033899322, "grad_norm": 1.3324053287506104, "learning_rate": 3.95442514945153e-06, "loss": 0.4177, "step": 7914 }, { "epoch": 0.4748905021899562, "grad_norm": 1.2164404392242432, "learning_rate": 3.9537507314721246e-06, "loss": 0.3872, "step": 7915 }, { "epoch": 0.4749505009899802, "grad_norm": 1.5804609060287476, "learning_rate": 3.953076296356622e-06, "loss": 0.4028, "step": 7916 }, { "epoch": 0.4750104997900042, "grad_norm": 1.302038311958313, "learning_rate": 3.9524018441304905e-06, "loss": 0.4835, "step": 7917 }, { "epoch": 0.4750704985900282, "grad_norm": 1.2308474779129028, "learning_rate": 3.951727374819204e-06, "loss": 0.3741, "step": 7918 }, { "epoch": 0.4751304973900522, "grad_norm": 1.2685805559158325, "learning_rate": 3.9510528884482316e-06, "loss": 0.3505, "step": 7919 }, { "epoch": 0.4751904961900762, "grad_norm": 1.252705693244934, "learning_rate": 3.9503783850430455e-06, "loss": 0.3838, "step": 7920 }, { "epoch": 0.4752504949901002, "grad_norm": 1.3122715950012207, "learning_rate": 3.949703864629122e-06, "loss": 0.4377, "step": 7921 }, { "epoch": 0.4753104937901242, "grad_norm": 1.2241730690002441, "learning_rate": 3.949029327231931e-06, "loss": 0.3696, "step": 7922 }, { "epoch": 0.4753704925901482, "grad_norm": 1.145850419998169, "learning_rate": 3.948354772876949e-06, "loss": 0.4189, "step": 7923 }, { "epoch": 0.4754304913901722, "grad_norm": 1.228716254234314, "learning_rate": 3.947680201589649e-06, "loss": 0.4166, "step": 7924 }, { "epoch": 0.4754904901901962, "grad_norm": 1.188133716583252, "learning_rate": 3.947005613395509e-06, "loss": 0.398, "step": 7925 }, { "epoch": 0.4755504889902202, "grad_norm": 1.2368279695510864, "learning_rate": 3.946331008320001e-06, "loss": 0.3901, "step": 7926 }, { "epoch": 0.4756104877902442, "grad_norm": 1.381489872932434, "learning_rate": 3.945656386388606e-06, "loss": 0.4403, "step": 7927 }, { "epoch": 0.47567048659026817, "grad_norm": 1.257758617401123, "learning_rate": 3.9449817476268e-06, "loss": 0.3593, "step": 7928 }, { "epoch": 0.47573048539029217, "grad_norm": 1.3342845439910889, "learning_rate": 3.94430709206006e-06, "loss": 0.4307, "step": 7929 }, { "epoch": 0.4757904841903162, "grad_norm": 1.3180737495422363, "learning_rate": 3.943632419713865e-06, "loss": 0.4314, "step": 7930 }, { "epoch": 0.4758504829903402, "grad_norm": 1.2425988912582397, "learning_rate": 3.942957730613696e-06, "loss": 0.4236, "step": 7931 }, { "epoch": 0.4759104817903642, "grad_norm": 1.3224929571151733, "learning_rate": 3.942283024785029e-06, "loss": 0.4503, "step": 7932 }, { "epoch": 0.4759704805903882, "grad_norm": 1.295821189880371, "learning_rate": 3.94160830225335e-06, "loss": 0.3958, "step": 7933 }, { "epoch": 0.4760304793904122, "grad_norm": 1.2951875925064087, "learning_rate": 3.940933563044135e-06, "loss": 0.4295, "step": 7934 }, { "epoch": 0.4760904781904362, "grad_norm": 1.2127912044525146, "learning_rate": 3.940258807182871e-06, "loss": 0.3694, "step": 7935 }, { "epoch": 0.4761504769904602, "grad_norm": 1.3805633783340454, "learning_rate": 3.939584034695035e-06, "loss": 0.4272, "step": 7936 }, { "epoch": 0.4762104757904842, "grad_norm": 1.1841888427734375, "learning_rate": 3.938909245606116e-06, "loss": 0.3897, "step": 7937 }, { "epoch": 0.4762704745905082, "grad_norm": 1.1681281328201294, "learning_rate": 3.938234439941591e-06, "loss": 0.3528, "step": 7938 }, { "epoch": 0.4763304733905322, "grad_norm": 1.3381319046020508, "learning_rate": 3.93755961772695e-06, "loss": 0.4204, "step": 7939 }, { "epoch": 0.4763904721905562, "grad_norm": 1.3339365720748901, "learning_rate": 3.936884778987674e-06, "loss": 0.4256, "step": 7940 }, { "epoch": 0.4764504709905802, "grad_norm": 1.2071858644485474, "learning_rate": 3.936209923749252e-06, "loss": 0.4153, "step": 7941 }, { "epoch": 0.4765104697906042, "grad_norm": 1.2374424934387207, "learning_rate": 3.935535052037168e-06, "loss": 0.3863, "step": 7942 }, { "epoch": 0.4765704685906282, "grad_norm": 1.2056310176849365, "learning_rate": 3.934860163876909e-06, "loss": 0.4265, "step": 7943 }, { "epoch": 0.4766304673906522, "grad_norm": 1.1350724697113037, "learning_rate": 3.934185259293963e-06, "loss": 0.3662, "step": 7944 }, { "epoch": 0.4766904661906762, "grad_norm": 1.2985559701919556, "learning_rate": 3.933510338313818e-06, "loss": 0.4522, "step": 7945 }, { "epoch": 0.4767504649907002, "grad_norm": 1.157700538635254, "learning_rate": 3.932835400961963e-06, "loss": 0.4022, "step": 7946 }, { "epoch": 0.47681046379072417, "grad_norm": 1.3242005109786987, "learning_rate": 3.932160447263886e-06, "loss": 0.4156, "step": 7947 }, { "epoch": 0.47687046259074817, "grad_norm": 1.3587493896484375, "learning_rate": 3.931485477245079e-06, "loss": 0.4219, "step": 7948 }, { "epoch": 0.47693046139077216, "grad_norm": 1.3754056692123413, "learning_rate": 3.930810490931029e-06, "loss": 0.4321, "step": 7949 }, { "epoch": 0.47699046019079616, "grad_norm": 1.4205271005630493, "learning_rate": 3.9301354883472325e-06, "loss": 0.4308, "step": 7950 }, { "epoch": 0.47705045899082016, "grad_norm": 1.3183571100234985, "learning_rate": 3.929460469519176e-06, "loss": 0.3855, "step": 7951 }, { "epoch": 0.47711045779084416, "grad_norm": 1.3490597009658813, "learning_rate": 3.928785434472356e-06, "loss": 0.3736, "step": 7952 }, { "epoch": 0.4771704565908682, "grad_norm": 1.3035800457000732, "learning_rate": 3.928110383232262e-06, "loss": 0.3985, "step": 7953 }, { "epoch": 0.4772304553908922, "grad_norm": 1.2186965942382812, "learning_rate": 3.927435315824391e-06, "loss": 0.3985, "step": 7954 }, { "epoch": 0.4772904541909162, "grad_norm": 1.3473981618881226, "learning_rate": 3.9267602322742345e-06, "loss": 0.4184, "step": 7955 }, { "epoch": 0.4773504529909402, "grad_norm": 1.3495874404907227, "learning_rate": 3.926085132607288e-06, "loss": 0.4213, "step": 7956 }, { "epoch": 0.4774104517909642, "grad_norm": 1.28123939037323, "learning_rate": 3.925410016849048e-06, "loss": 0.373, "step": 7957 }, { "epoch": 0.4774704505909882, "grad_norm": 1.1980425119400024, "learning_rate": 3.9247348850250105e-06, "loss": 0.3634, "step": 7958 }, { "epoch": 0.4775304493910122, "grad_norm": 1.215943694114685, "learning_rate": 3.92405973716067e-06, "loss": 0.3874, "step": 7959 }, { "epoch": 0.4775904481910362, "grad_norm": 1.3057771921157837, "learning_rate": 3.923384573281526e-06, "loss": 0.4585, "step": 7960 }, { "epoch": 0.4776504469910602, "grad_norm": 1.3844060897827148, "learning_rate": 3.922709393413075e-06, "loss": 0.3921, "step": 7961 }, { "epoch": 0.4777104457910842, "grad_norm": 1.338574767112732, "learning_rate": 3.922034197580816e-06, "loss": 0.3748, "step": 7962 }, { "epoch": 0.4777704445911082, "grad_norm": 1.290452480316162, "learning_rate": 3.92135898581025e-06, "loss": 0.4198, "step": 7963 }, { "epoch": 0.4778304433911322, "grad_norm": 1.2567455768585205, "learning_rate": 3.920683758126873e-06, "loss": 0.4178, "step": 7964 }, { "epoch": 0.4778904421911562, "grad_norm": 1.3093584775924683, "learning_rate": 3.920008514556188e-06, "loss": 0.426, "step": 7965 }, { "epoch": 0.47795044099118017, "grad_norm": 1.17010498046875, "learning_rate": 3.919333255123695e-06, "loss": 0.3705, "step": 7966 }, { "epoch": 0.47801043979120417, "grad_norm": 1.2162485122680664, "learning_rate": 3.918657979854894e-06, "loss": 0.4276, "step": 7967 }, { "epoch": 0.47807043859122816, "grad_norm": 1.3124678134918213, "learning_rate": 3.9179826887752896e-06, "loss": 0.4319, "step": 7968 }, { "epoch": 0.47813043739125216, "grad_norm": 1.2965651750564575, "learning_rate": 3.917307381910383e-06, "loss": 0.4156, "step": 7969 }, { "epoch": 0.47819043619127616, "grad_norm": 1.2261064052581787, "learning_rate": 3.916632059285677e-06, "loss": 0.4219, "step": 7970 }, { "epoch": 0.47825043499130016, "grad_norm": 1.280090093612671, "learning_rate": 3.915956720926679e-06, "loss": 0.4092, "step": 7971 }, { "epoch": 0.47831043379132415, "grad_norm": 1.087274432182312, "learning_rate": 3.915281366858888e-06, "loss": 0.439, "step": 7972 }, { "epoch": 0.47837043259134815, "grad_norm": 1.4132405519485474, "learning_rate": 3.914605997107813e-06, "loss": 0.4547, "step": 7973 }, { "epoch": 0.47843043139137215, "grad_norm": 1.2809010744094849, "learning_rate": 3.913930611698958e-06, "loss": 0.419, "step": 7974 }, { "epoch": 0.4784904301913962, "grad_norm": 1.1535881757736206, "learning_rate": 3.91325521065783e-06, "loss": 0.3744, "step": 7975 }, { "epoch": 0.4785504289914202, "grad_norm": 1.228211760520935, "learning_rate": 3.9125797940099355e-06, "loss": 0.4229, "step": 7976 }, { "epoch": 0.4786104277914442, "grad_norm": 1.4633396863937378, "learning_rate": 3.911904361780782e-06, "loss": 0.4622, "step": 7977 }, { "epoch": 0.4786704265914682, "grad_norm": 1.2300626039505005, "learning_rate": 3.911228913995878e-06, "loss": 0.3753, "step": 7978 }, { "epoch": 0.4787304253914922, "grad_norm": 1.1439995765686035, "learning_rate": 3.91055345068073e-06, "loss": 0.4247, "step": 7979 }, { "epoch": 0.4787904241915162, "grad_norm": 1.2150198221206665, "learning_rate": 3.909877971860848e-06, "loss": 0.3746, "step": 7980 }, { "epoch": 0.4788504229915402, "grad_norm": 1.3705744743347168, "learning_rate": 3.909202477561746e-06, "loss": 0.3963, "step": 7981 }, { "epoch": 0.4789104217915642, "grad_norm": 1.218740463256836, "learning_rate": 3.908526967808927e-06, "loss": 0.4354, "step": 7982 }, { "epoch": 0.4789704205915882, "grad_norm": 1.3706926107406616, "learning_rate": 3.907851442627908e-06, "loss": 0.4256, "step": 7983 }, { "epoch": 0.47903041939161217, "grad_norm": 1.2260704040527344, "learning_rate": 3.907175902044196e-06, "loss": 0.3979, "step": 7984 }, { "epoch": 0.47909041819163617, "grad_norm": 1.3213330507278442, "learning_rate": 3.906500346083307e-06, "loss": 0.4147, "step": 7985 }, { "epoch": 0.47915041699166017, "grad_norm": 1.2031601667404175, "learning_rate": 3.905824774770752e-06, "loss": 0.3815, "step": 7986 }, { "epoch": 0.47921041579168416, "grad_norm": 1.3773576021194458, "learning_rate": 3.905149188132043e-06, "loss": 0.3993, "step": 7987 }, { "epoch": 0.47927041459170816, "grad_norm": 1.3710721731185913, "learning_rate": 3.904473586192697e-06, "loss": 0.478, "step": 7988 }, { "epoch": 0.47933041339173216, "grad_norm": 1.306216835975647, "learning_rate": 3.903797968978225e-06, "loss": 0.4424, "step": 7989 }, { "epoch": 0.47939041219175615, "grad_norm": 1.2272584438323975, "learning_rate": 3.903122336514143e-06, "loss": 0.4006, "step": 7990 }, { "epoch": 0.47945041099178015, "grad_norm": 1.348086953163147, "learning_rate": 3.902446688825967e-06, "loss": 0.4205, "step": 7991 }, { "epoch": 0.47951040979180415, "grad_norm": 1.214523434638977, "learning_rate": 3.901771025939214e-06, "loss": 0.4173, "step": 7992 }, { "epoch": 0.47957040859182815, "grad_norm": 1.2474547624588013, "learning_rate": 3.9010953478794e-06, "loss": 0.4088, "step": 7993 }, { "epoch": 0.47963040739185214, "grad_norm": 1.2748583555221558, "learning_rate": 3.900419654672041e-06, "loss": 0.3788, "step": 7994 }, { "epoch": 0.47969040619187614, "grad_norm": 1.2240790128707886, "learning_rate": 3.899743946342657e-06, "loss": 0.3979, "step": 7995 }, { "epoch": 0.47975040499190014, "grad_norm": 1.3861092329025269, "learning_rate": 3.899068222916764e-06, "loss": 0.4429, "step": 7996 }, { "epoch": 0.47981040379192413, "grad_norm": 1.3053100109100342, "learning_rate": 3.898392484419884e-06, "loss": 0.3594, "step": 7997 }, { "epoch": 0.4798704025919482, "grad_norm": 1.3481534719467163, "learning_rate": 3.897716730877534e-06, "loss": 0.4399, "step": 7998 }, { "epoch": 0.4799304013919722, "grad_norm": 1.272834300994873, "learning_rate": 3.8970409623152354e-06, "loss": 0.4604, "step": 7999 }, { "epoch": 0.4799904001919962, "grad_norm": 1.2386828660964966, "learning_rate": 3.896365178758509e-06, "loss": 0.4133, "step": 8000 }, { "epoch": 0.4800503989920202, "grad_norm": 1.2441883087158203, "learning_rate": 3.895689380232875e-06, "loss": 0.4299, "step": 8001 }, { "epoch": 0.4801103977920442, "grad_norm": 1.3317524194717407, "learning_rate": 3.895013566763855e-06, "loss": 0.3699, "step": 8002 }, { "epoch": 0.48017039659206817, "grad_norm": 1.6809536218643188, "learning_rate": 3.894337738376973e-06, "loss": 0.4295, "step": 8003 }, { "epoch": 0.48023039539209217, "grad_norm": 1.410904884338379, "learning_rate": 3.893661895097752e-06, "loss": 0.4684, "step": 8004 }, { "epoch": 0.48029039419211617, "grad_norm": 1.2211319208145142, "learning_rate": 3.892986036951714e-06, "loss": 0.4064, "step": 8005 }, { "epoch": 0.48035039299214016, "grad_norm": 1.236188292503357, "learning_rate": 3.892310163964384e-06, "loss": 0.4022, "step": 8006 }, { "epoch": 0.48041039179216416, "grad_norm": 1.251278042793274, "learning_rate": 3.8916342761612865e-06, "loss": 0.3905, "step": 8007 }, { "epoch": 0.48047039059218816, "grad_norm": 1.3089728355407715, "learning_rate": 3.890958373567946e-06, "loss": 0.4203, "step": 8008 }, { "epoch": 0.48053038939221215, "grad_norm": 1.2922359704971313, "learning_rate": 3.89028245620989e-06, "loss": 0.4186, "step": 8009 }, { "epoch": 0.48059038819223615, "grad_norm": 1.215376377105713, "learning_rate": 3.889606524112643e-06, "loss": 0.4421, "step": 8010 }, { "epoch": 0.48065038699226015, "grad_norm": 1.3029541969299316, "learning_rate": 3.888930577301734e-06, "loss": 0.4065, "step": 8011 }, { "epoch": 0.48071038579228415, "grad_norm": 1.32620370388031, "learning_rate": 3.888254615802687e-06, "loss": 0.4251, "step": 8012 }, { "epoch": 0.48077038459230814, "grad_norm": 1.2333050966262817, "learning_rate": 3.887578639641034e-06, "loss": 0.4233, "step": 8013 }, { "epoch": 0.48083038339233214, "grad_norm": 1.2580528259277344, "learning_rate": 3.886902648842299e-06, "loss": 0.4037, "step": 8014 }, { "epoch": 0.48089038219235614, "grad_norm": 1.1400949954986572, "learning_rate": 3.886226643432016e-06, "loss": 0.4027, "step": 8015 }, { "epoch": 0.48095038099238013, "grad_norm": 1.2839198112487793, "learning_rate": 3.885550623435711e-06, "loss": 0.4234, "step": 8016 }, { "epoch": 0.48101037979240413, "grad_norm": 1.2557474374771118, "learning_rate": 3.884874588878917e-06, "loss": 0.4295, "step": 8017 }, { "epoch": 0.4810703785924281, "grad_norm": 1.1761488914489746, "learning_rate": 3.884198539787162e-06, "loss": 0.4112, "step": 8018 }, { "epoch": 0.4811303773924521, "grad_norm": 1.3601680994033813, "learning_rate": 3.88352247618598e-06, "loss": 0.4216, "step": 8019 }, { "epoch": 0.4811903761924762, "grad_norm": 1.284745693206787, "learning_rate": 3.8828463981009e-06, "loss": 0.4357, "step": 8020 }, { "epoch": 0.4812503749925002, "grad_norm": 1.3351253271102905, "learning_rate": 3.882170305557456e-06, "loss": 0.4213, "step": 8021 }, { "epoch": 0.48131037379252417, "grad_norm": 1.2550232410430908, "learning_rate": 3.88149419858118e-06, "loss": 0.4614, "step": 8022 }, { "epoch": 0.48137037259254817, "grad_norm": 1.3095496892929077, "learning_rate": 3.880818077197607e-06, "loss": 0.4039, "step": 8023 }, { "epoch": 0.48143037139257217, "grad_norm": 1.2433816194534302, "learning_rate": 3.8801419414322705e-06, "loss": 0.3603, "step": 8024 }, { "epoch": 0.48149037019259616, "grad_norm": 1.413718342781067, "learning_rate": 3.879465791310705e-06, "loss": 0.4431, "step": 8025 }, { "epoch": 0.48155036899262016, "grad_norm": 1.2839713096618652, "learning_rate": 3.8787896268584445e-06, "loss": 0.383, "step": 8026 }, { "epoch": 0.48161036779264416, "grad_norm": 1.2445859909057617, "learning_rate": 3.878113448101026e-06, "loss": 0.397, "step": 8027 }, { "epoch": 0.48167036659266815, "grad_norm": 1.1912561655044556, "learning_rate": 3.877437255063985e-06, "loss": 0.359, "step": 8028 }, { "epoch": 0.48173036539269215, "grad_norm": 1.34518301486969, "learning_rate": 3.87676104777286e-06, "loss": 0.395, "step": 8029 }, { "epoch": 0.48179036419271615, "grad_norm": 1.3655972480773926, "learning_rate": 3.876084826253185e-06, "loss": 0.3978, "step": 8030 }, { "epoch": 0.48185036299274014, "grad_norm": 1.2642654180526733, "learning_rate": 3.8754085905304995e-06, "loss": 0.3807, "step": 8031 }, { "epoch": 0.48191036179276414, "grad_norm": 1.2451939582824707, "learning_rate": 3.874732340630343e-06, "loss": 0.3698, "step": 8032 }, { "epoch": 0.48197036059278814, "grad_norm": 1.5127079486846924, "learning_rate": 3.874056076578253e-06, "loss": 0.4417, "step": 8033 }, { "epoch": 0.48203035939281214, "grad_norm": 1.1083496809005737, "learning_rate": 3.873379798399769e-06, "loss": 0.3749, "step": 8034 }, { "epoch": 0.48209035819283613, "grad_norm": 1.2170990705490112, "learning_rate": 3.87270350612043e-06, "loss": 0.4086, "step": 8035 }, { "epoch": 0.48215035699286013, "grad_norm": 1.1272153854370117, "learning_rate": 3.872027199765779e-06, "loss": 0.4052, "step": 8036 }, { "epoch": 0.4822103557928841, "grad_norm": 1.2181510925292969, "learning_rate": 3.8713508793613545e-06, "loss": 0.3697, "step": 8037 }, { "epoch": 0.4822703545929081, "grad_norm": 1.323179841041565, "learning_rate": 3.8706745449327e-06, "loss": 0.4159, "step": 8038 }, { "epoch": 0.4823303533929321, "grad_norm": 1.3726599216461182, "learning_rate": 3.869998196505358e-06, "loss": 0.4782, "step": 8039 }, { "epoch": 0.4823903521929561, "grad_norm": 1.2978876829147339, "learning_rate": 3.869321834104868e-06, "loss": 0.3827, "step": 8040 }, { "epoch": 0.4824503509929801, "grad_norm": 1.3763983249664307, "learning_rate": 3.868645457756775e-06, "loss": 0.4555, "step": 8041 }, { "epoch": 0.48251034979300417, "grad_norm": 1.3019987344741821, "learning_rate": 3.867969067486624e-06, "loss": 0.405, "step": 8042 }, { "epoch": 0.48257034859302816, "grad_norm": 1.3690862655639648, "learning_rate": 3.867292663319955e-06, "loss": 0.4159, "step": 8043 }, { "epoch": 0.48263034739305216, "grad_norm": 1.2921584844589233, "learning_rate": 3.866616245282318e-06, "loss": 0.4564, "step": 8044 }, { "epoch": 0.48269034619307616, "grad_norm": 1.2543230056762695, "learning_rate": 3.865939813399255e-06, "loss": 0.3958, "step": 8045 }, { "epoch": 0.48275034499310016, "grad_norm": 1.2155784368515015, "learning_rate": 3.865263367696313e-06, "loss": 0.3699, "step": 8046 }, { "epoch": 0.48281034379312415, "grad_norm": 1.4532876014709473, "learning_rate": 3.864586908199037e-06, "loss": 0.4129, "step": 8047 }, { "epoch": 0.48287034259314815, "grad_norm": 1.1629993915557861, "learning_rate": 3.863910434932975e-06, "loss": 0.3884, "step": 8048 }, { "epoch": 0.48293034139317215, "grad_norm": 1.2517826557159424, "learning_rate": 3.863233947923673e-06, "loss": 0.3901, "step": 8049 }, { "epoch": 0.48299034019319614, "grad_norm": 1.1943142414093018, "learning_rate": 3.862557447196681e-06, "loss": 0.3752, "step": 8050 }, { "epoch": 0.48305033899322014, "grad_norm": 1.3072168827056885, "learning_rate": 3.861880932777544e-06, "loss": 0.4305, "step": 8051 }, { "epoch": 0.48311033779324414, "grad_norm": 1.2298754453659058, "learning_rate": 3.861204404691815e-06, "loss": 0.4251, "step": 8052 }, { "epoch": 0.48317033659326813, "grad_norm": 1.2261810302734375, "learning_rate": 3.86052786296504e-06, "loss": 0.4241, "step": 8053 }, { "epoch": 0.48323033539329213, "grad_norm": 1.1342471837997437, "learning_rate": 3.859851307622771e-06, "loss": 0.3793, "step": 8054 }, { "epoch": 0.48329033419331613, "grad_norm": 1.2188067436218262, "learning_rate": 3.859174738690558e-06, "loss": 0.3885, "step": 8055 }, { "epoch": 0.4833503329933401, "grad_norm": 1.2248831987380981, "learning_rate": 3.85849815619395e-06, "loss": 0.4401, "step": 8056 }, { "epoch": 0.4834103317933641, "grad_norm": 1.1065789461135864, "learning_rate": 3.857821560158501e-06, "loss": 0.406, "step": 8057 }, { "epoch": 0.4834703305933881, "grad_norm": 1.3281853199005127, "learning_rate": 3.857144950609761e-06, "loss": 0.4283, "step": 8058 }, { "epoch": 0.4835303293934121, "grad_norm": 1.2452795505523682, "learning_rate": 3.856468327573284e-06, "loss": 0.3721, "step": 8059 }, { "epoch": 0.4835903281934361, "grad_norm": 1.305781602859497, "learning_rate": 3.855791691074623e-06, "loss": 0.3965, "step": 8060 }, { "epoch": 0.4836503269934601, "grad_norm": 1.242201805114746, "learning_rate": 3.85511504113933e-06, "loss": 0.4369, "step": 8061 }, { "epoch": 0.4837103257934841, "grad_norm": 1.2692877054214478, "learning_rate": 3.854438377792959e-06, "loss": 0.406, "step": 8062 }, { "epoch": 0.4837703245935081, "grad_norm": 1.2957240343093872, "learning_rate": 3.853761701061067e-06, "loss": 0.405, "step": 8063 }, { "epoch": 0.4838303233935321, "grad_norm": 1.1060019731521606, "learning_rate": 3.8530850109692065e-06, "loss": 0.3857, "step": 8064 }, { "epoch": 0.48389032219355615, "grad_norm": 1.2822277545928955, "learning_rate": 3.852408307542934e-06, "loss": 0.3532, "step": 8065 }, { "epoch": 0.48395032099358015, "grad_norm": 1.1872658729553223, "learning_rate": 3.851731590807805e-06, "loss": 0.3713, "step": 8066 }, { "epoch": 0.48401031979360415, "grad_norm": 1.2018492221832275, "learning_rate": 3.851054860789376e-06, "loss": 0.3657, "step": 8067 }, { "epoch": 0.48407031859362815, "grad_norm": 1.3360230922698975, "learning_rate": 3.850378117513204e-06, "loss": 0.4171, "step": 8068 }, { "epoch": 0.48413031739365214, "grad_norm": 1.2378846406936646, "learning_rate": 3.849701361004848e-06, "loss": 0.3634, "step": 8069 }, { "epoch": 0.48419031619367614, "grad_norm": 1.353426218032837, "learning_rate": 3.849024591289864e-06, "loss": 0.3275, "step": 8070 }, { "epoch": 0.48425031499370014, "grad_norm": 1.326732873916626, "learning_rate": 3.8483478083938126e-06, "loss": 0.3857, "step": 8071 }, { "epoch": 0.48431031379372413, "grad_norm": 1.2465461492538452, "learning_rate": 3.8476710123422495e-06, "loss": 0.459, "step": 8072 }, { "epoch": 0.48437031259374813, "grad_norm": 1.3124366998672485, "learning_rate": 3.846994203160737e-06, "loss": 0.3621, "step": 8073 }, { "epoch": 0.48443031139377213, "grad_norm": 1.223624587059021, "learning_rate": 3.846317380874835e-06, "loss": 0.3822, "step": 8074 }, { "epoch": 0.4844903101937961, "grad_norm": 1.2685260772705078, "learning_rate": 3.845640545510104e-06, "loss": 0.4199, "step": 8075 }, { "epoch": 0.4845503089938201, "grad_norm": 1.3339718580245972, "learning_rate": 3.8449636970921025e-06, "loss": 0.3916, "step": 8076 }, { "epoch": 0.4846103077938441, "grad_norm": 1.339430570602417, "learning_rate": 3.844286835646395e-06, "loss": 0.3959, "step": 8077 }, { "epoch": 0.4846703065938681, "grad_norm": 1.3663148880004883, "learning_rate": 3.843609961198542e-06, "loss": 0.4559, "step": 8078 }, { "epoch": 0.4847303053938921, "grad_norm": 1.3448621034622192, "learning_rate": 3.842933073774107e-06, "loss": 0.3953, "step": 8079 }, { "epoch": 0.4847903041939161, "grad_norm": 1.279030203819275, "learning_rate": 3.842256173398651e-06, "loss": 0.4152, "step": 8080 }, { "epoch": 0.4848503029939401, "grad_norm": 1.3049969673156738, "learning_rate": 3.841579260097739e-06, "loss": 0.4464, "step": 8081 }, { "epoch": 0.4849103017939641, "grad_norm": 1.4851123094558716, "learning_rate": 3.840902333896934e-06, "loss": 0.3993, "step": 8082 }, { "epoch": 0.4849703005939881, "grad_norm": 1.2014482021331787, "learning_rate": 3.840225394821802e-06, "loss": 0.3603, "step": 8083 }, { "epoch": 0.4850302993940121, "grad_norm": 1.0907269716262817, "learning_rate": 3.839548442897906e-06, "loss": 0.4273, "step": 8084 }, { "epoch": 0.4850902981940361, "grad_norm": 1.2390226125717163, "learning_rate": 3.838871478150813e-06, "loss": 0.3899, "step": 8085 }, { "epoch": 0.4851502969940601, "grad_norm": 1.222649097442627, "learning_rate": 3.838194500606088e-06, "loss": 0.4166, "step": 8086 }, { "epoch": 0.48521029579408415, "grad_norm": 1.2602182626724243, "learning_rate": 3.837517510289298e-06, "loss": 0.4805, "step": 8087 }, { "epoch": 0.48527029459410814, "grad_norm": 1.247168779373169, "learning_rate": 3.8368405072260095e-06, "loss": 0.3645, "step": 8088 }, { "epoch": 0.48533029339413214, "grad_norm": 1.4670581817626953, "learning_rate": 3.836163491441788e-06, "loss": 0.4146, "step": 8089 }, { "epoch": 0.48539029219415614, "grad_norm": 1.1797070503234863, "learning_rate": 3.835486462962204e-06, "loss": 0.4218, "step": 8090 }, { "epoch": 0.48545029099418013, "grad_norm": 1.2975633144378662, "learning_rate": 3.834809421812825e-06, "loss": 0.3684, "step": 8091 }, { "epoch": 0.48551028979420413, "grad_norm": 1.3517711162567139, "learning_rate": 3.83413236801922e-06, "loss": 0.4264, "step": 8092 }, { "epoch": 0.4855702885942281, "grad_norm": 1.1739773750305176, "learning_rate": 3.833455301606957e-06, "loss": 0.4184, "step": 8093 }, { "epoch": 0.4856302873942521, "grad_norm": 1.1510674953460693, "learning_rate": 3.832778222601607e-06, "loss": 0.4326, "step": 8094 }, { "epoch": 0.4856902861942761, "grad_norm": 1.1774293184280396, "learning_rate": 3.832101131028738e-06, "loss": 0.3671, "step": 8095 }, { "epoch": 0.4857502849943001, "grad_norm": 1.1753864288330078, "learning_rate": 3.831424026913924e-06, "loss": 0.4291, "step": 8096 }, { "epoch": 0.4858102837943241, "grad_norm": 1.4079186916351318, "learning_rate": 3.830746910282733e-06, "loss": 0.4192, "step": 8097 }, { "epoch": 0.4858702825943481, "grad_norm": 1.2602776288986206, "learning_rate": 3.83006978116074e-06, "loss": 0.3794, "step": 8098 }, { "epoch": 0.4859302813943721, "grad_norm": 1.1733843088150024, "learning_rate": 3.829392639573514e-06, "loss": 0.3564, "step": 8099 }, { "epoch": 0.4859902801943961, "grad_norm": 1.301830530166626, "learning_rate": 3.82871548554663e-06, "loss": 0.4134, "step": 8100 }, { "epoch": 0.4860502789944201, "grad_norm": 1.480873465538025, "learning_rate": 3.828038319105658e-06, "loss": 0.4237, "step": 8101 }, { "epoch": 0.4861102777944441, "grad_norm": 1.323783278465271, "learning_rate": 3.827361140276174e-06, "loss": 0.4099, "step": 8102 }, { "epoch": 0.4861702765944681, "grad_norm": 1.3512951135635376, "learning_rate": 3.826683949083752e-06, "loss": 0.4824, "step": 8103 }, { "epoch": 0.4862302753944921, "grad_norm": 1.2950420379638672, "learning_rate": 3.8260067455539635e-06, "loss": 0.3718, "step": 8104 }, { "epoch": 0.4862902741945161, "grad_norm": 1.2641569375991821, "learning_rate": 3.825329529712386e-06, "loss": 0.4197, "step": 8105 }, { "epoch": 0.4863502729945401, "grad_norm": 1.373914122581482, "learning_rate": 3.824652301584596e-06, "loss": 0.3591, "step": 8106 }, { "epoch": 0.4864102717945641, "grad_norm": 1.2177294492721558, "learning_rate": 3.823975061196167e-06, "loss": 0.4087, "step": 8107 }, { "epoch": 0.4864702705945881, "grad_norm": 1.3031914234161377, "learning_rate": 3.8232978085726756e-06, "loss": 0.4059, "step": 8108 }, { "epoch": 0.4865302693946121, "grad_norm": 1.191499948501587, "learning_rate": 3.8226205437397e-06, "loss": 0.3934, "step": 8109 }, { "epoch": 0.48659026819463613, "grad_norm": 1.2755029201507568, "learning_rate": 3.8219432667228145e-06, "loss": 0.3688, "step": 8110 }, { "epoch": 0.48665026699466013, "grad_norm": 1.3635693788528442, "learning_rate": 3.8212659775476e-06, "loss": 0.434, "step": 8111 }, { "epoch": 0.4867102657946841, "grad_norm": 1.151928186416626, "learning_rate": 3.820588676239633e-06, "loss": 0.3434, "step": 8112 }, { "epoch": 0.4867702645947081, "grad_norm": 1.417306900024414, "learning_rate": 3.819911362824492e-06, "loss": 0.4119, "step": 8113 }, { "epoch": 0.4868302633947321, "grad_norm": 1.228804349899292, "learning_rate": 3.8192340373277555e-06, "loss": 0.3936, "step": 8114 }, { "epoch": 0.4868902621947561, "grad_norm": 1.2043187618255615, "learning_rate": 3.818556699775005e-06, "loss": 0.372, "step": 8115 }, { "epoch": 0.4869502609947801, "grad_norm": 1.308640480041504, "learning_rate": 3.817879350191819e-06, "loss": 0.4277, "step": 8116 }, { "epoch": 0.4870102597948041, "grad_norm": 1.2741938829421997, "learning_rate": 3.817201988603779e-06, "loss": 0.4062, "step": 8117 }, { "epoch": 0.4870702585948281, "grad_norm": 1.1866257190704346, "learning_rate": 3.816524615036464e-06, "loss": 0.4307, "step": 8118 }, { "epoch": 0.4871302573948521, "grad_norm": 1.2999765872955322, "learning_rate": 3.815847229515455e-06, "loss": 0.4187, "step": 8119 }, { "epoch": 0.4871902561948761, "grad_norm": 1.499635934829712, "learning_rate": 3.815169832066337e-06, "loss": 0.4412, "step": 8120 }, { "epoch": 0.4872502549949001, "grad_norm": 1.3009947538375854, "learning_rate": 3.814492422714691e-06, "loss": 0.4509, "step": 8121 }, { "epoch": 0.4873102537949241, "grad_norm": 1.283882737159729, "learning_rate": 3.8138150014860973e-06, "loss": 0.4299, "step": 8122 }, { "epoch": 0.4873702525949481, "grad_norm": 1.2735962867736816, "learning_rate": 3.8131375684061424e-06, "loss": 0.4304, "step": 8123 }, { "epoch": 0.4874302513949721, "grad_norm": 1.1933467388153076, "learning_rate": 3.812460123500407e-06, "loss": 0.3992, "step": 8124 }, { "epoch": 0.4874902501949961, "grad_norm": 1.2241290807724, "learning_rate": 3.8117826667944763e-06, "loss": 0.4138, "step": 8125 }, { "epoch": 0.4875502489950201, "grad_norm": 1.365374207496643, "learning_rate": 3.8111051983139356e-06, "loss": 0.3729, "step": 8126 }, { "epoch": 0.4876102477950441, "grad_norm": 1.3553118705749512, "learning_rate": 3.810427718084368e-06, "loss": 0.4331, "step": 8127 }, { "epoch": 0.4876702465950681, "grad_norm": 1.20175302028656, "learning_rate": 3.80975022613136e-06, "loss": 0.4185, "step": 8128 }, { "epoch": 0.4877302453950921, "grad_norm": 1.2424612045288086, "learning_rate": 3.809072722480498e-06, "loss": 0.4016, "step": 8129 }, { "epoch": 0.4877902441951161, "grad_norm": 1.219752311706543, "learning_rate": 3.8083952071573668e-06, "loss": 0.3851, "step": 8130 }, { "epoch": 0.48785024299514007, "grad_norm": 1.3417489528656006, "learning_rate": 3.807717680187553e-06, "loss": 0.4287, "step": 8131 }, { "epoch": 0.4879102417951641, "grad_norm": 1.1493666172027588, "learning_rate": 3.8070401415966457e-06, "loss": 0.3252, "step": 8132 }, { "epoch": 0.4879702405951881, "grad_norm": 1.0678483247756958, "learning_rate": 3.8063625914102304e-06, "loss": 0.3941, "step": 8133 }, { "epoch": 0.4880302393952121, "grad_norm": 1.3276071548461914, "learning_rate": 3.8056850296538964e-06, "loss": 0.4421, "step": 8134 }, { "epoch": 0.4880902381952361, "grad_norm": 1.367293357849121, "learning_rate": 3.805007456353231e-06, "loss": 0.4123, "step": 8135 }, { "epoch": 0.4881502369952601, "grad_norm": 1.2919325828552246, "learning_rate": 3.8043298715338237e-06, "loss": 0.4305, "step": 8136 }, { "epoch": 0.4882102357952841, "grad_norm": 1.313966155052185, "learning_rate": 3.8036522752212623e-06, "loss": 0.419, "step": 8137 }, { "epoch": 0.4882702345953081, "grad_norm": 1.2043936252593994, "learning_rate": 3.80297466744114e-06, "loss": 0.3632, "step": 8138 }, { "epoch": 0.4883302333953321, "grad_norm": 1.1791783571243286, "learning_rate": 3.8022970482190434e-06, "loss": 0.4176, "step": 8139 }, { "epoch": 0.4883902321953561, "grad_norm": 1.2201642990112305, "learning_rate": 3.801619417580566e-06, "loss": 0.3954, "step": 8140 }, { "epoch": 0.4884502309953801, "grad_norm": 1.3320012092590332, "learning_rate": 3.8009417755512964e-06, "loss": 0.4112, "step": 8141 }, { "epoch": 0.4885102297954041, "grad_norm": 1.3514055013656616, "learning_rate": 3.8002641221568272e-06, "loss": 0.4304, "step": 8142 }, { "epoch": 0.4885702285954281, "grad_norm": 1.2947827577590942, "learning_rate": 3.7995864574227485e-06, "loss": 0.4267, "step": 8143 }, { "epoch": 0.4886302273954521, "grad_norm": 1.258521318435669, "learning_rate": 3.798908781374657e-06, "loss": 0.3865, "step": 8144 }, { "epoch": 0.4886902261954761, "grad_norm": 1.3701621294021606, "learning_rate": 3.7982310940381413e-06, "loss": 0.4879, "step": 8145 }, { "epoch": 0.4887502249955001, "grad_norm": 1.2883434295654297, "learning_rate": 3.7975533954387964e-06, "loss": 0.4262, "step": 8146 }, { "epoch": 0.4888102237955241, "grad_norm": 1.3001335859298706, "learning_rate": 3.7968756856022144e-06, "loss": 0.4216, "step": 8147 }, { "epoch": 0.4888702225955481, "grad_norm": 1.2579909563064575, "learning_rate": 3.796197964553991e-06, "loss": 0.4215, "step": 8148 }, { "epoch": 0.4889302213955721, "grad_norm": 1.3584208488464355, "learning_rate": 3.7955202323197202e-06, "loss": 0.4628, "step": 8149 }, { "epoch": 0.48899022019559607, "grad_norm": 1.2530698776245117, "learning_rate": 3.7948424889249964e-06, "loss": 0.4268, "step": 8150 }, { "epoch": 0.48905021899562007, "grad_norm": 1.0984951257705688, "learning_rate": 3.7941647343954147e-06, "loss": 0.3285, "step": 8151 }, { "epoch": 0.48911021779564406, "grad_norm": 1.2431979179382324, "learning_rate": 3.7934869687565723e-06, "loss": 0.405, "step": 8152 }, { "epoch": 0.48917021659566806, "grad_norm": 1.4118316173553467, "learning_rate": 3.7928091920340624e-06, "loss": 0.4139, "step": 8153 }, { "epoch": 0.48923021539569206, "grad_norm": 1.2881866693496704, "learning_rate": 3.792131404253485e-06, "loss": 0.3866, "step": 8154 }, { "epoch": 0.4892902141957161, "grad_norm": 1.240920901298523, "learning_rate": 3.791453605440435e-06, "loss": 0.4218, "step": 8155 }, { "epoch": 0.4893502129957401, "grad_norm": 1.2526124715805054, "learning_rate": 3.7907757956205094e-06, "loss": 0.4606, "step": 8156 }, { "epoch": 0.4894102117957641, "grad_norm": 1.1738399267196655, "learning_rate": 3.790097974819308e-06, "loss": 0.4201, "step": 8157 }, { "epoch": 0.4894702105957881, "grad_norm": 1.2179832458496094, "learning_rate": 3.7894201430624263e-06, "loss": 0.3833, "step": 8158 }, { "epoch": 0.4895302093958121, "grad_norm": 1.2600266933441162, "learning_rate": 3.788742300375466e-06, "loss": 0.4007, "step": 8159 }, { "epoch": 0.4895902081958361, "grad_norm": 1.1517971754074097, "learning_rate": 3.7880644467840228e-06, "loss": 0.362, "step": 8160 }, { "epoch": 0.4896502069958601, "grad_norm": 1.2353061437606812, "learning_rate": 3.7873865823136997e-06, "loss": 0.3999, "step": 8161 }, { "epoch": 0.4897102057958841, "grad_norm": 1.4078598022460938, "learning_rate": 3.786708706990093e-06, "loss": 0.3723, "step": 8162 }, { "epoch": 0.4897702045959081, "grad_norm": 1.3271287679672241, "learning_rate": 3.786030820838806e-06, "loss": 0.3788, "step": 8163 }, { "epoch": 0.4898302033959321, "grad_norm": 1.3009356260299683, "learning_rate": 3.7853529238854376e-06, "loss": 0.3952, "step": 8164 }, { "epoch": 0.4898902021959561, "grad_norm": 1.179713487625122, "learning_rate": 3.7846750161555897e-06, "loss": 0.4301, "step": 8165 }, { "epoch": 0.4899502009959801, "grad_norm": 1.250077724456787, "learning_rate": 3.7839970976748625e-06, "loss": 0.3964, "step": 8166 }, { "epoch": 0.4900101997960041, "grad_norm": 1.3699718713760376, "learning_rate": 3.7833191684688593e-06, "loss": 0.4245, "step": 8167 }, { "epoch": 0.4900701985960281, "grad_norm": 1.3774734735488892, "learning_rate": 3.782641228563182e-06, "loss": 0.4754, "step": 8168 }, { "epoch": 0.49013019739605207, "grad_norm": 1.241585612297058, "learning_rate": 3.7819632779834333e-06, "loss": 0.4344, "step": 8169 }, { "epoch": 0.49019019619607607, "grad_norm": 1.2170921564102173, "learning_rate": 3.781285316755216e-06, "loss": 0.4257, "step": 8170 }, { "epoch": 0.49025019499610006, "grad_norm": 1.2987116575241089, "learning_rate": 3.7806073449041342e-06, "loss": 0.4532, "step": 8171 }, { "epoch": 0.49031019379612406, "grad_norm": 1.279405951499939, "learning_rate": 3.77992936245579e-06, "loss": 0.3957, "step": 8172 }, { "epoch": 0.49037019259614806, "grad_norm": 1.377852439880371, "learning_rate": 3.779251369435791e-06, "loss": 0.3898, "step": 8173 }, { "epoch": 0.49043019139617205, "grad_norm": 1.3121105432510376, "learning_rate": 3.7785733658697385e-06, "loss": 0.4024, "step": 8174 }, { "epoch": 0.49049019019619605, "grad_norm": 1.3827753067016602, "learning_rate": 3.777895351783241e-06, "loss": 0.373, "step": 8175 }, { "epoch": 0.49055018899622005, "grad_norm": 1.2984933853149414, "learning_rate": 3.7772173272019003e-06, "loss": 0.3628, "step": 8176 }, { "epoch": 0.4906101877962441, "grad_norm": 1.2010002136230469, "learning_rate": 3.776539292151326e-06, "loss": 0.3882, "step": 8177 }, { "epoch": 0.4906701865962681, "grad_norm": 1.2860876321792603, "learning_rate": 3.7758612466571213e-06, "loss": 0.4041, "step": 8178 }, { "epoch": 0.4907301853962921, "grad_norm": 1.4100722074508667, "learning_rate": 3.775183190744893e-06, "loss": 0.4141, "step": 8179 }, { "epoch": 0.4907901841963161, "grad_norm": 1.4365391731262207, "learning_rate": 3.7745051244402517e-06, "loss": 0.3799, "step": 8180 }, { "epoch": 0.4908501829963401, "grad_norm": 1.3433387279510498, "learning_rate": 3.7738270477688007e-06, "loss": 0.3555, "step": 8181 }, { "epoch": 0.4909101817963641, "grad_norm": 1.0826855897903442, "learning_rate": 3.7731489607561515e-06, "loss": 0.3852, "step": 8182 }, { "epoch": 0.4909701805963881, "grad_norm": 1.135379672050476, "learning_rate": 3.7724708634279085e-06, "loss": 0.39, "step": 8183 }, { "epoch": 0.4910301793964121, "grad_norm": 1.2853593826293945, "learning_rate": 3.7717927558096827e-06, "loss": 0.4244, "step": 8184 }, { "epoch": 0.4910901781964361, "grad_norm": 1.3878710269927979, "learning_rate": 3.7711146379270837e-06, "loss": 0.4188, "step": 8185 }, { "epoch": 0.4911501769964601, "grad_norm": 1.34322190284729, "learning_rate": 3.7704365098057203e-06, "loss": 0.4176, "step": 8186 }, { "epoch": 0.49121017579648407, "grad_norm": 1.128114938735962, "learning_rate": 3.7697583714712012e-06, "loss": 0.3972, "step": 8187 }, { "epoch": 0.49127017459650807, "grad_norm": 1.201033115386963, "learning_rate": 3.769080222949138e-06, "loss": 0.3889, "step": 8188 }, { "epoch": 0.49133017339653207, "grad_norm": 1.3853752613067627, "learning_rate": 3.7684020642651387e-06, "loss": 0.4457, "step": 8189 }, { "epoch": 0.49139017219655606, "grad_norm": 1.2576863765716553, "learning_rate": 3.767723895444819e-06, "loss": 0.3949, "step": 8190 }, { "epoch": 0.49145017099658006, "grad_norm": 1.154932975769043, "learning_rate": 3.7670457165137855e-06, "loss": 0.3733, "step": 8191 }, { "epoch": 0.49151016979660406, "grad_norm": 1.3906567096710205, "learning_rate": 3.7663675274976536e-06, "loss": 0.4289, "step": 8192 }, { "epoch": 0.49157016859662805, "grad_norm": 1.298570990562439, "learning_rate": 3.765689328422033e-06, "loss": 0.4456, "step": 8193 }, { "epoch": 0.49163016739665205, "grad_norm": 1.2322101593017578, "learning_rate": 3.765011119312537e-06, "loss": 0.3927, "step": 8194 }, { "epoch": 0.49169016619667605, "grad_norm": 1.338395118713379, "learning_rate": 3.7643329001947774e-06, "loss": 0.3802, "step": 8195 }, { "epoch": 0.49175016499670005, "grad_norm": 1.4075980186462402, "learning_rate": 3.7636546710943705e-06, "loss": 0.4554, "step": 8196 }, { "epoch": 0.49181016379672404, "grad_norm": 1.2430503368377686, "learning_rate": 3.762976432036926e-06, "loss": 0.3805, "step": 8197 }, { "epoch": 0.49187016259674804, "grad_norm": 1.2373300790786743, "learning_rate": 3.762298183048061e-06, "loss": 0.3868, "step": 8198 }, { "epoch": 0.4919301613967721, "grad_norm": 1.3635188341140747, "learning_rate": 3.761619924153388e-06, "loss": 0.4151, "step": 8199 }, { "epoch": 0.4919901601967961, "grad_norm": 1.4062966108322144, "learning_rate": 3.7609416553785232e-06, "loss": 0.4329, "step": 8200 }, { "epoch": 0.4920501589968201, "grad_norm": 1.3530793190002441, "learning_rate": 3.7602633767490803e-06, "loss": 0.4585, "step": 8201 }, { "epoch": 0.4921101577968441, "grad_norm": 1.2853994369506836, "learning_rate": 3.759585088290675e-06, "loss": 0.3404, "step": 8202 }, { "epoch": 0.4921701565968681, "grad_norm": 1.2741854190826416, "learning_rate": 3.758906790028925e-06, "loss": 0.4172, "step": 8203 }, { "epoch": 0.4922301553968921, "grad_norm": 1.3503570556640625, "learning_rate": 3.7582284819894435e-06, "loss": 0.43, "step": 8204 }, { "epoch": 0.4922901541969161, "grad_norm": 1.2461820840835571, "learning_rate": 3.7575501641978503e-06, "loss": 0.3662, "step": 8205 }, { "epoch": 0.49235015299694007, "grad_norm": 1.24001145362854, "learning_rate": 3.7568718366797595e-06, "loss": 0.4228, "step": 8206 }, { "epoch": 0.49241015179696407, "grad_norm": 1.162475347518921, "learning_rate": 3.7561934994607897e-06, "loss": 0.402, "step": 8207 }, { "epoch": 0.49247015059698807, "grad_norm": 1.2602115869522095, "learning_rate": 3.755515152566559e-06, "loss": 0.3663, "step": 8208 }, { "epoch": 0.49253014939701206, "grad_norm": 1.1925073862075806, "learning_rate": 3.754836796022685e-06, "loss": 0.3838, "step": 8209 }, { "epoch": 0.49259014819703606, "grad_norm": 1.248556137084961, "learning_rate": 3.7541584298547865e-06, "loss": 0.4392, "step": 8210 }, { "epoch": 0.49265014699706006, "grad_norm": 1.2330876588821411, "learning_rate": 3.7534800540884828e-06, "loss": 0.3637, "step": 8211 }, { "epoch": 0.49271014579708405, "grad_norm": 1.351862907409668, "learning_rate": 3.752801668749391e-06, "loss": 0.4643, "step": 8212 }, { "epoch": 0.49277014459710805, "grad_norm": 1.3404161930084229, "learning_rate": 3.752123273863132e-06, "loss": 0.4237, "step": 8213 }, { "epoch": 0.49283014339713205, "grad_norm": 1.0952107906341553, "learning_rate": 3.751444869455326e-06, "loss": 0.3853, "step": 8214 }, { "epoch": 0.49289014219715604, "grad_norm": 1.2887835502624512, "learning_rate": 3.750766455551593e-06, "loss": 0.3921, "step": 8215 }, { "epoch": 0.49295014099718004, "grad_norm": 1.3167784214019775, "learning_rate": 3.750088032177553e-06, "loss": 0.4265, "step": 8216 }, { "epoch": 0.49301013979720404, "grad_norm": 1.3299516439437866, "learning_rate": 3.749409599358828e-06, "loss": 0.3938, "step": 8217 }, { "epoch": 0.49307013859722804, "grad_norm": 1.2119271755218506, "learning_rate": 3.7487311571210385e-06, "loss": 0.3722, "step": 8218 }, { "epoch": 0.49313013739725203, "grad_norm": 1.2340182065963745, "learning_rate": 3.7480527054898066e-06, "loss": 0.3449, "step": 8219 }, { "epoch": 0.49319013619727603, "grad_norm": 1.4146687984466553, "learning_rate": 3.747374244490753e-06, "loss": 0.4437, "step": 8220 }, { "epoch": 0.4932501349973, "grad_norm": 1.2497698068618774, "learning_rate": 3.746695774149503e-06, "loss": 0.4363, "step": 8221 }, { "epoch": 0.4933101337973241, "grad_norm": 1.089775562286377, "learning_rate": 3.746017294491677e-06, "loss": 0.4159, "step": 8222 }, { "epoch": 0.4933701325973481, "grad_norm": 1.2255187034606934, "learning_rate": 3.7453388055428986e-06, "loss": 0.436, "step": 8223 }, { "epoch": 0.4934301313973721, "grad_norm": 1.349544882774353, "learning_rate": 3.7446603073287907e-06, "loss": 0.3742, "step": 8224 }, { "epoch": 0.49349013019739607, "grad_norm": 1.262972116470337, "learning_rate": 3.743981799874978e-06, "loss": 0.3574, "step": 8225 }, { "epoch": 0.49355012899742007, "grad_norm": 1.2769291400909424, "learning_rate": 3.7433032832070843e-06, "loss": 0.42, "step": 8226 }, { "epoch": 0.49361012779744406, "grad_norm": 1.2300126552581787, "learning_rate": 3.7426247573507338e-06, "loss": 0.4077, "step": 8227 }, { "epoch": 0.49367012659746806, "grad_norm": 1.1852906942367554, "learning_rate": 3.7419462223315527e-06, "loss": 0.3995, "step": 8228 }, { "epoch": 0.49373012539749206, "grad_norm": 1.245489239692688, "learning_rate": 3.741267678175164e-06, "loss": 0.3571, "step": 8229 }, { "epoch": 0.49379012419751606, "grad_norm": 1.2861707210540771, "learning_rate": 3.740589124907194e-06, "loss": 0.4059, "step": 8230 }, { "epoch": 0.49385012299754005, "grad_norm": 1.1503632068634033, "learning_rate": 3.7399105625532696e-06, "loss": 0.4011, "step": 8231 }, { "epoch": 0.49391012179756405, "grad_norm": 1.3644812107086182, "learning_rate": 3.7392319911390166e-06, "loss": 0.3986, "step": 8232 }, { "epoch": 0.49397012059758805, "grad_norm": 1.1913893222808838, "learning_rate": 3.7385534106900602e-06, "loss": 0.4003, "step": 8233 }, { "epoch": 0.49403011939761204, "grad_norm": 1.1945362091064453, "learning_rate": 3.7378748212320293e-06, "loss": 0.3763, "step": 8234 }, { "epoch": 0.49409011819763604, "grad_norm": 1.330208659172058, "learning_rate": 3.73719622279055e-06, "loss": 0.3737, "step": 8235 }, { "epoch": 0.49415011699766004, "grad_norm": 1.2726658582687378, "learning_rate": 3.736517615391249e-06, "loss": 0.3873, "step": 8236 }, { "epoch": 0.49421011579768404, "grad_norm": 1.2282501459121704, "learning_rate": 3.735838999059756e-06, "loss": 0.427, "step": 8237 }, { "epoch": 0.49427011459770803, "grad_norm": 1.2971261739730835, "learning_rate": 3.7351603738216994e-06, "loss": 0.3996, "step": 8238 }, { "epoch": 0.49433011339773203, "grad_norm": 1.264167070388794, "learning_rate": 3.7344817397027052e-06, "loss": 0.4351, "step": 8239 }, { "epoch": 0.494390112197756, "grad_norm": 1.2341830730438232, "learning_rate": 3.7338030967284055e-06, "loss": 0.4221, "step": 8240 }, { "epoch": 0.49445011099778, "grad_norm": 1.2145320177078247, "learning_rate": 3.733124444924427e-06, "loss": 0.4175, "step": 8241 }, { "epoch": 0.494510109797804, "grad_norm": 1.1730799674987793, "learning_rate": 3.7324457843164004e-06, "loss": 0.4108, "step": 8242 }, { "epoch": 0.494570108597828, "grad_norm": 1.3101816177368164, "learning_rate": 3.7317671149299553e-06, "loss": 0.3732, "step": 8243 }, { "epoch": 0.49463010739785207, "grad_norm": 1.2274420261383057, "learning_rate": 3.731088436790723e-06, "loss": 0.4038, "step": 8244 }, { "epoch": 0.49469010619787607, "grad_norm": 1.1511894464492798, "learning_rate": 3.730409749924333e-06, "loss": 0.3979, "step": 8245 }, { "epoch": 0.49475010499790006, "grad_norm": 1.4182192087173462, "learning_rate": 3.729731054356417e-06, "loss": 0.4434, "step": 8246 }, { "epoch": 0.49481010379792406, "grad_norm": 1.2436761856079102, "learning_rate": 3.729052350112605e-06, "loss": 0.3912, "step": 8247 }, { "epoch": 0.49487010259794806, "grad_norm": 1.2323225736618042, "learning_rate": 3.7283736372185287e-06, "loss": 0.3673, "step": 8248 }, { "epoch": 0.49493010139797206, "grad_norm": 1.4869639873504639, "learning_rate": 3.7276949156998226e-06, "loss": 0.3799, "step": 8249 }, { "epoch": 0.49499010019799605, "grad_norm": 1.5032247304916382, "learning_rate": 3.727016185582115e-06, "loss": 0.4217, "step": 8250 }, { "epoch": 0.49505009899802005, "grad_norm": 1.2875796556472778, "learning_rate": 3.7263374468910417e-06, "loss": 0.4464, "step": 8251 }, { "epoch": 0.49511009779804405, "grad_norm": 1.1881850957870483, "learning_rate": 3.7256586996522335e-06, "loss": 0.422, "step": 8252 }, { "epoch": 0.49517009659806804, "grad_norm": 1.2467422485351562, "learning_rate": 3.7249799438913244e-06, "loss": 0.37, "step": 8253 }, { "epoch": 0.49523009539809204, "grad_norm": 1.320794701576233, "learning_rate": 3.724301179633947e-06, "loss": 0.4048, "step": 8254 }, { "epoch": 0.49529009419811604, "grad_norm": 1.4186904430389404, "learning_rate": 3.723622406905737e-06, "loss": 0.3859, "step": 8255 }, { "epoch": 0.49535009299814003, "grad_norm": 1.3026570081710815, "learning_rate": 3.7229436257323275e-06, "loss": 0.4, "step": 8256 }, { "epoch": 0.49541009179816403, "grad_norm": 1.2119823694229126, "learning_rate": 3.722264836139353e-06, "loss": 0.3649, "step": 8257 }, { "epoch": 0.49547009059818803, "grad_norm": 1.3917649984359741, "learning_rate": 3.7215860381524473e-06, "loss": 0.4391, "step": 8258 }, { "epoch": 0.495530089398212, "grad_norm": 1.206824779510498, "learning_rate": 3.7209072317972485e-06, "loss": 0.4239, "step": 8259 }, { "epoch": 0.495590088198236, "grad_norm": 1.3505710363388062, "learning_rate": 3.7202284170993875e-06, "loss": 0.4001, "step": 8260 }, { "epoch": 0.49565008699826, "grad_norm": 1.2819242477416992, "learning_rate": 3.719549594084504e-06, "loss": 0.3817, "step": 8261 }, { "epoch": 0.495710085798284, "grad_norm": 1.269901990890503, "learning_rate": 3.7188707627782324e-06, "loss": 0.397, "step": 8262 }, { "epoch": 0.495770084598308, "grad_norm": 1.3252826929092407, "learning_rate": 3.7181919232062096e-06, "loss": 0.4283, "step": 8263 }, { "epoch": 0.495830083398332, "grad_norm": 1.1482402086257935, "learning_rate": 3.717513075394071e-06, "loss": 0.3953, "step": 8264 }, { "epoch": 0.495890082198356, "grad_norm": 1.3199564218521118, "learning_rate": 3.716834219367456e-06, "loss": 0.3844, "step": 8265 }, { "epoch": 0.49595008099838, "grad_norm": 1.4617037773132324, "learning_rate": 3.7161553551519976e-06, "loss": 0.4889, "step": 8266 }, { "epoch": 0.49601007979840406, "grad_norm": 1.272139310836792, "learning_rate": 3.7154764827733392e-06, "loss": 0.4379, "step": 8267 }, { "epoch": 0.49607007859842805, "grad_norm": 1.1897878646850586, "learning_rate": 3.7147976022571147e-06, "loss": 0.3958, "step": 8268 }, { "epoch": 0.49613007739845205, "grad_norm": 1.3877865076065063, "learning_rate": 3.714118713628963e-06, "loss": 0.3894, "step": 8269 }, { "epoch": 0.49619007619847605, "grad_norm": 1.3574522733688354, "learning_rate": 3.7134398169145236e-06, "loss": 0.3819, "step": 8270 }, { "epoch": 0.49625007499850005, "grad_norm": 1.2724272012710571, "learning_rate": 3.7127609121394343e-06, "loss": 0.3736, "step": 8271 }, { "epoch": 0.49631007379852404, "grad_norm": 1.4284183979034424, "learning_rate": 3.7120819993293353e-06, "loss": 0.4597, "step": 8272 }, { "epoch": 0.49637007259854804, "grad_norm": 1.3087462186813354, "learning_rate": 3.7114030785098652e-06, "loss": 0.4265, "step": 8273 }, { "epoch": 0.49643007139857204, "grad_norm": 1.2599526643753052, "learning_rate": 3.710724149706664e-06, "loss": 0.4109, "step": 8274 }, { "epoch": 0.49649007019859603, "grad_norm": 1.2546296119689941, "learning_rate": 3.710045212945371e-06, "loss": 0.3959, "step": 8275 }, { "epoch": 0.49655006899862003, "grad_norm": 1.3507243394851685, "learning_rate": 3.7093662682516277e-06, "loss": 0.4294, "step": 8276 }, { "epoch": 0.49661006779864403, "grad_norm": 1.2630270719528198, "learning_rate": 3.708687315651074e-06, "loss": 0.3735, "step": 8277 }, { "epoch": 0.496670066598668, "grad_norm": 1.1841150522232056, "learning_rate": 3.708008355169352e-06, "loss": 0.3821, "step": 8278 }, { "epoch": 0.496730065398692, "grad_norm": 1.3166420459747314, "learning_rate": 3.707329386832102e-06, "loss": 0.4291, "step": 8279 }, { "epoch": 0.496790064198716, "grad_norm": 1.3249406814575195, "learning_rate": 3.706650410664966e-06, "loss": 0.4056, "step": 8280 }, { "epoch": 0.49685006299874, "grad_norm": 1.2196897268295288, "learning_rate": 3.705971426693584e-06, "loss": 0.3913, "step": 8281 }, { "epoch": 0.496910061798764, "grad_norm": 1.3082232475280762, "learning_rate": 3.7052924349436013e-06, "loss": 0.3911, "step": 8282 }, { "epoch": 0.496970060598788, "grad_norm": 1.2057265043258667, "learning_rate": 3.704613435440657e-06, "loss": 0.3815, "step": 8283 }, { "epoch": 0.497030059398812, "grad_norm": 1.3061351776123047, "learning_rate": 3.7039344282103964e-06, "loss": 0.406, "step": 8284 }, { "epoch": 0.497090058198836, "grad_norm": 1.1874845027923584, "learning_rate": 3.7032554132784618e-06, "loss": 0.4167, "step": 8285 }, { "epoch": 0.49715005699886, "grad_norm": 1.4031898975372314, "learning_rate": 3.7025763906704956e-06, "loss": 0.3667, "step": 8286 }, { "epoch": 0.497210055798884, "grad_norm": 1.25217604637146, "learning_rate": 3.7018973604121423e-06, "loss": 0.3795, "step": 8287 }, { "epoch": 0.497270054598908, "grad_norm": 1.148942470550537, "learning_rate": 3.701218322529046e-06, "loss": 0.3815, "step": 8288 }, { "epoch": 0.49733005339893205, "grad_norm": 1.3132678270339966, "learning_rate": 3.7005392770468494e-06, "loss": 0.3745, "step": 8289 }, { "epoch": 0.49739005219895605, "grad_norm": 1.3386775255203247, "learning_rate": 3.6998602239911985e-06, "loss": 0.432, "step": 8290 }, { "epoch": 0.49745005099898004, "grad_norm": 1.219801425933838, "learning_rate": 3.6991811633877374e-06, "loss": 0.3943, "step": 8291 }, { "epoch": 0.49751004979900404, "grad_norm": 1.151161551475525, "learning_rate": 3.6985020952621126e-06, "loss": 0.3573, "step": 8292 }, { "epoch": 0.49757004859902804, "grad_norm": 1.2705693244934082, "learning_rate": 3.6978230196399657e-06, "loss": 0.3742, "step": 8293 }, { "epoch": 0.49763004739905203, "grad_norm": 1.2957347631454468, "learning_rate": 3.6971439365469464e-06, "loss": 0.4244, "step": 8294 }, { "epoch": 0.49769004619907603, "grad_norm": 1.398598551750183, "learning_rate": 3.6964648460086976e-06, "loss": 0.4258, "step": 8295 }, { "epoch": 0.4977500449991, "grad_norm": 1.2168357372283936, "learning_rate": 3.6957857480508675e-06, "loss": 0.4331, "step": 8296 }, { "epoch": 0.497810043799124, "grad_norm": 1.4828038215637207, "learning_rate": 3.695106642699102e-06, "loss": 0.4509, "step": 8297 }, { "epoch": 0.497870042599148, "grad_norm": 1.308118224143982, "learning_rate": 3.694427529979047e-06, "loss": 0.4456, "step": 8298 }, { "epoch": 0.497930041399172, "grad_norm": 1.056221604347229, "learning_rate": 3.693748409916349e-06, "loss": 0.3685, "step": 8299 }, { "epoch": 0.497990040199196, "grad_norm": 1.2595731019973755, "learning_rate": 3.6930692825366576e-06, "loss": 0.4056, "step": 8300 }, { "epoch": 0.49805003899922, "grad_norm": 1.3872698545455933, "learning_rate": 3.692390147865619e-06, "loss": 0.4578, "step": 8301 }, { "epoch": 0.498110037799244, "grad_norm": 1.2393330335617065, "learning_rate": 3.69171100592888e-06, "loss": 0.5091, "step": 8302 }, { "epoch": 0.498170036599268, "grad_norm": 1.2708230018615723, "learning_rate": 3.6910318567520914e-06, "loss": 0.3715, "step": 8303 }, { "epoch": 0.498230035399292, "grad_norm": 1.3213948011398315, "learning_rate": 3.690352700360898e-06, "loss": 0.4235, "step": 8304 }, { "epoch": 0.498290034199316, "grad_norm": 1.2555707693099976, "learning_rate": 3.689673536780952e-06, "loss": 0.392, "step": 8305 }, { "epoch": 0.49835003299934, "grad_norm": 1.229536533355713, "learning_rate": 3.688994366037899e-06, "loss": 0.4075, "step": 8306 }, { "epoch": 0.498410031799364, "grad_norm": 1.2527570724487305, "learning_rate": 3.6883151881573914e-06, "loss": 0.4134, "step": 8307 }, { "epoch": 0.498470030599388, "grad_norm": 1.4386837482452393, "learning_rate": 3.6876360031650765e-06, "loss": 0.4173, "step": 8308 }, { "epoch": 0.498530029399412, "grad_norm": 1.515480875968933, "learning_rate": 3.6869568110866046e-06, "loss": 0.4241, "step": 8309 }, { "epoch": 0.498590028199436, "grad_norm": 1.2270029783248901, "learning_rate": 3.6862776119476253e-06, "loss": 0.3991, "step": 8310 }, { "epoch": 0.49865002699946, "grad_norm": 1.2940526008605957, "learning_rate": 3.6855984057737904e-06, "loss": 0.3942, "step": 8311 }, { "epoch": 0.49871002579948404, "grad_norm": 1.2646358013153076, "learning_rate": 3.6849191925907473e-06, "loss": 0.4238, "step": 8312 }, { "epoch": 0.49877002459950803, "grad_norm": 1.2990713119506836, "learning_rate": 3.684239972424151e-06, "loss": 0.3988, "step": 8313 }, { "epoch": 0.49883002339953203, "grad_norm": 1.204577922821045, "learning_rate": 3.683560745299649e-06, "loss": 0.3976, "step": 8314 }, { "epoch": 0.498890022199556, "grad_norm": 1.3404134511947632, "learning_rate": 3.6828815112428946e-06, "loss": 0.3772, "step": 8315 }, { "epoch": 0.49895002099958, "grad_norm": 1.198927402496338, "learning_rate": 3.6822022702795383e-06, "loss": 0.4319, "step": 8316 }, { "epoch": 0.499010019799604, "grad_norm": 1.212315559387207, "learning_rate": 3.6815230224352327e-06, "loss": 0.3685, "step": 8317 }, { "epoch": 0.499070018599628, "grad_norm": 1.234580159187317, "learning_rate": 3.6808437677356295e-06, "loss": 0.3954, "step": 8318 }, { "epoch": 0.499130017399652, "grad_norm": 1.2300859689712524, "learning_rate": 3.6801645062063806e-06, "loss": 0.4131, "step": 8319 }, { "epoch": 0.499190016199676, "grad_norm": 1.255486249923706, "learning_rate": 3.67948523787314e-06, "loss": 0.3972, "step": 8320 }, { "epoch": 0.4992500149997, "grad_norm": 1.34064519405365, "learning_rate": 3.6788059627615593e-06, "loss": 0.4473, "step": 8321 }, { "epoch": 0.499310013799724, "grad_norm": 1.2642425298690796, "learning_rate": 3.6781266808972914e-06, "loss": 0.4065, "step": 8322 }, { "epoch": 0.499370012599748, "grad_norm": 1.229200839996338, "learning_rate": 3.6774473923059916e-06, "loss": 0.3585, "step": 8323 }, { "epoch": 0.499430011399772, "grad_norm": 1.422356367111206, "learning_rate": 3.676768097013312e-06, "loss": 0.4121, "step": 8324 }, { "epoch": 0.499490010199796, "grad_norm": 1.3684502840042114, "learning_rate": 3.6760887950449054e-06, "loss": 0.3863, "step": 8325 }, { "epoch": 0.49955000899982, "grad_norm": 1.3553190231323242, "learning_rate": 3.675409486426429e-06, "loss": 0.4676, "step": 8326 }, { "epoch": 0.499610007799844, "grad_norm": 1.4334602355957031, "learning_rate": 3.674730171183534e-06, "loss": 0.4403, "step": 8327 }, { "epoch": 0.499670006599868, "grad_norm": 1.2982451915740967, "learning_rate": 3.6740508493418773e-06, "loss": 0.4278, "step": 8328 }, { "epoch": 0.499730005399892, "grad_norm": 1.510180115699768, "learning_rate": 3.673371520927113e-06, "loss": 0.4202, "step": 8329 }, { "epoch": 0.499790004199916, "grad_norm": 1.2238578796386719, "learning_rate": 3.672692185964896e-06, "loss": 0.4266, "step": 8330 }, { "epoch": 0.49985000299994, "grad_norm": 1.3676011562347412, "learning_rate": 3.672012844480881e-06, "loss": 0.3654, "step": 8331 }, { "epoch": 0.499910001799964, "grad_norm": 1.3180972337722778, "learning_rate": 3.6713334965007268e-06, "loss": 0.4091, "step": 8332 }, { "epoch": 0.499970000599988, "grad_norm": 1.2608141899108887, "learning_rate": 3.670654142050085e-06, "loss": 0.4309, "step": 8333 }, { "epoch": 0.500029999400012, "grad_norm": 1.2787424325942993, "learning_rate": 3.669974781154615e-06, "loss": 0.4255, "step": 8334 }, { "epoch": 0.500089998200036, "grad_norm": 1.2545404434204102, "learning_rate": 3.669295413839971e-06, "loss": 0.3579, "step": 8335 }, { "epoch": 0.50014999700006, "grad_norm": 1.5446126461029053, "learning_rate": 3.668616040131811e-06, "loss": 0.4971, "step": 8336 }, { "epoch": 0.500209995800084, "grad_norm": 1.2081427574157715, "learning_rate": 3.6679366600557903e-06, "loss": 0.3959, "step": 8337 }, { "epoch": 0.500269994600108, "grad_norm": 1.2027710676193237, "learning_rate": 3.6672572736375693e-06, "loss": 0.4127, "step": 8338 }, { "epoch": 0.500329993400132, "grad_norm": 1.4412165880203247, "learning_rate": 3.6665778809028016e-06, "loss": 0.4055, "step": 8339 }, { "epoch": 0.500389992200156, "grad_norm": 1.3135489225387573, "learning_rate": 3.6658984818771473e-06, "loss": 0.3782, "step": 8340 }, { "epoch": 0.50044999100018, "grad_norm": 1.27223801612854, "learning_rate": 3.6652190765862614e-06, "loss": 0.3903, "step": 8341 }, { "epoch": 0.500509989800204, "grad_norm": 1.1653571128845215, "learning_rate": 3.6645396650558047e-06, "loss": 0.3536, "step": 8342 }, { "epoch": 0.5005699886002279, "grad_norm": 1.2750505208969116, "learning_rate": 3.663860247311435e-06, "loss": 0.3815, "step": 8343 }, { "epoch": 0.500629987400252, "grad_norm": 1.2650145292282104, "learning_rate": 3.66318082337881e-06, "loss": 0.3548, "step": 8344 }, { "epoch": 0.500689986200276, "grad_norm": 1.2395999431610107, "learning_rate": 3.662501393283588e-06, "loss": 0.4015, "step": 8345 }, { "epoch": 0.5007499850003, "grad_norm": 1.2353836297988892, "learning_rate": 3.6618219570514295e-06, "loss": 0.4213, "step": 8346 }, { "epoch": 0.500809983800324, "grad_norm": 1.1300299167633057, "learning_rate": 3.6611425147079923e-06, "loss": 0.389, "step": 8347 }, { "epoch": 0.500869982600348, "grad_norm": 1.3132002353668213, "learning_rate": 3.6604630662789373e-06, "loss": 0.4706, "step": 8348 }, { "epoch": 0.500929981400372, "grad_norm": 1.2600774765014648, "learning_rate": 3.659783611789923e-06, "loss": 0.415, "step": 8349 }, { "epoch": 0.500989980200396, "grad_norm": 1.2181251049041748, "learning_rate": 3.6591041512666097e-06, "loss": 0.3981, "step": 8350 }, { "epoch": 0.50104997900042, "grad_norm": 1.2915027141571045, "learning_rate": 3.6584246847346577e-06, "loss": 0.3991, "step": 8351 }, { "epoch": 0.501109977800444, "grad_norm": 1.3684505224227905, "learning_rate": 3.6577452122197265e-06, "loss": 0.4123, "step": 8352 }, { "epoch": 0.501169976600468, "grad_norm": 1.1591875553131104, "learning_rate": 3.657065733747477e-06, "loss": 0.3911, "step": 8353 }, { "epoch": 0.501229975400492, "grad_norm": 1.3111010789871216, "learning_rate": 3.6563862493435716e-06, "loss": 0.4014, "step": 8354 }, { "epoch": 0.501289974200516, "grad_norm": 1.3049525022506714, "learning_rate": 3.6557067590336696e-06, "loss": 0.4334, "step": 8355 }, { "epoch": 0.50134997300054, "grad_norm": 1.1478939056396484, "learning_rate": 3.655027262843432e-06, "loss": 0.3465, "step": 8356 }, { "epoch": 0.501409971800564, "grad_norm": 1.2534537315368652, "learning_rate": 3.654347760798522e-06, "loss": 0.3734, "step": 8357 }, { "epoch": 0.501469970600588, "grad_norm": 1.2495293617248535, "learning_rate": 3.6536682529245994e-06, "loss": 0.4067, "step": 8358 }, { "epoch": 0.501529969400612, "grad_norm": 1.2123655080795288, "learning_rate": 3.6529887392473276e-06, "loss": 0.3929, "step": 8359 }, { "epoch": 0.501589968200636, "grad_norm": 1.385454535484314, "learning_rate": 3.6523092197923674e-06, "loss": 0.4293, "step": 8360 }, { "epoch": 0.50164996700066, "grad_norm": 1.3788976669311523, "learning_rate": 3.6516296945853835e-06, "loss": 0.4056, "step": 8361 }, { "epoch": 0.5017099658006839, "grad_norm": 1.2375445365905762, "learning_rate": 3.650950163652035e-06, "loss": 0.4081, "step": 8362 }, { "epoch": 0.501769964600708, "grad_norm": 1.259631872177124, "learning_rate": 3.650270627017988e-06, "loss": 0.3802, "step": 8363 }, { "epoch": 0.5018299634007319, "grad_norm": 1.2811391353607178, "learning_rate": 3.6495910847089035e-06, "loss": 0.4156, "step": 8364 }, { "epoch": 0.501889962200756, "grad_norm": 1.2612015008926392, "learning_rate": 3.6489115367504448e-06, "loss": 0.4059, "step": 8365 }, { "epoch": 0.5019499610007799, "grad_norm": 1.2346080541610718, "learning_rate": 3.6482319831682764e-06, "loss": 0.3699, "step": 8366 }, { "epoch": 0.502009959800804, "grad_norm": 1.4426438808441162, "learning_rate": 3.6475524239880613e-06, "loss": 0.436, "step": 8367 }, { "epoch": 0.502069958600828, "grad_norm": 1.276497483253479, "learning_rate": 3.6468728592354625e-06, "loss": 0.3622, "step": 8368 }, { "epoch": 0.502129957400852, "grad_norm": 1.4715229272842407, "learning_rate": 3.6461932889361465e-06, "loss": 0.4275, "step": 8369 }, { "epoch": 0.502189956200876, "grad_norm": 1.158403992652893, "learning_rate": 3.6455137131157743e-06, "loss": 0.4077, "step": 8370 }, { "epoch": 0.5022499550009, "grad_norm": 1.240616798400879, "learning_rate": 3.644834131800013e-06, "loss": 0.3929, "step": 8371 }, { "epoch": 0.502309953800924, "grad_norm": 1.3410029411315918, "learning_rate": 3.644154545014527e-06, "loss": 0.4331, "step": 8372 }, { "epoch": 0.502369952600948, "grad_norm": 1.2685130834579468, "learning_rate": 3.6434749527849803e-06, "loss": 0.3293, "step": 8373 }, { "epoch": 0.502429951400972, "grad_norm": 1.3824794292449951, "learning_rate": 3.642795355137038e-06, "loss": 0.4286, "step": 8374 }, { "epoch": 0.502489950200996, "grad_norm": 1.3242121934890747, "learning_rate": 3.642115752096365e-06, "loss": 0.4293, "step": 8375 }, { "epoch": 0.50254994900102, "grad_norm": 1.2913590669631958, "learning_rate": 3.6414361436886296e-06, "loss": 0.4119, "step": 8376 }, { "epoch": 0.502609947801044, "grad_norm": 1.2986645698547363, "learning_rate": 3.6407565299394922e-06, "loss": 0.4346, "step": 8377 }, { "epoch": 0.502669946601068, "grad_norm": 1.2808138132095337, "learning_rate": 3.640076910874625e-06, "loss": 0.3858, "step": 8378 }, { "epoch": 0.502729945401092, "grad_norm": 1.4249483346939087, "learning_rate": 3.6393972865196894e-06, "loss": 0.4191, "step": 8379 }, { "epoch": 0.502789944201116, "grad_norm": 1.2658900022506714, "learning_rate": 3.6387176569003546e-06, "loss": 0.3669, "step": 8380 }, { "epoch": 0.5028499430011399, "grad_norm": 1.4190081357955933, "learning_rate": 3.6380380220422845e-06, "loss": 0.4771, "step": 8381 }, { "epoch": 0.502909941801164, "grad_norm": 1.0281122922897339, "learning_rate": 3.6373583819711485e-06, "loss": 0.3649, "step": 8382 }, { "epoch": 0.5029699406011879, "grad_norm": 1.1249415874481201, "learning_rate": 3.6366787367126107e-06, "loss": 0.3353, "step": 8383 }, { "epoch": 0.503029939401212, "grad_norm": 1.2971216440200806, "learning_rate": 3.6359990862923406e-06, "loss": 0.4314, "step": 8384 }, { "epoch": 0.5030899382012359, "grad_norm": 1.2880700826644897, "learning_rate": 3.635319430736005e-06, "loss": 0.4393, "step": 8385 }, { "epoch": 0.50314993700126, "grad_norm": 1.232387661933899, "learning_rate": 3.634639770069271e-06, "loss": 0.3661, "step": 8386 }, { "epoch": 0.5032099358012839, "grad_norm": 1.3962376117706299, "learning_rate": 3.6339601043178055e-06, "loss": 0.4082, "step": 8387 }, { "epoch": 0.503269934601308, "grad_norm": 1.320792317390442, "learning_rate": 3.6332804335072775e-06, "loss": 0.418, "step": 8388 }, { "epoch": 0.5033299334013319, "grad_norm": 1.26884925365448, "learning_rate": 3.6326007576633554e-06, "loss": 0.4537, "step": 8389 }, { "epoch": 0.503389932201356, "grad_norm": 1.3816035985946655, "learning_rate": 3.631921076811706e-06, "loss": 0.4459, "step": 8390 }, { "epoch": 0.50344993100138, "grad_norm": 1.1899455785751343, "learning_rate": 3.6312413909779987e-06, "loss": 0.3529, "step": 8391 }, { "epoch": 0.503509929801404, "grad_norm": 1.41835355758667, "learning_rate": 3.630561700187903e-06, "loss": 0.4296, "step": 8392 }, { "epoch": 0.503569928601428, "grad_norm": 1.2826354503631592, "learning_rate": 3.6298820044670854e-06, "loss": 0.3955, "step": 8393 }, { "epoch": 0.503629927401452, "grad_norm": 1.3442072868347168, "learning_rate": 3.6292023038412163e-06, "loss": 0.411, "step": 8394 }, { "epoch": 0.503689926201476, "grad_norm": 1.1956275701522827, "learning_rate": 3.6285225983359656e-06, "loss": 0.3913, "step": 8395 }, { "epoch": 0.5037499250015, "grad_norm": 1.2684202194213867, "learning_rate": 3.6278428879770017e-06, "loss": 0.415, "step": 8396 }, { "epoch": 0.503809923801524, "grad_norm": 1.3823965787887573, "learning_rate": 3.627163172789994e-06, "loss": 0.4217, "step": 8397 }, { "epoch": 0.503869922601548, "grad_norm": 1.32467782497406, "learning_rate": 3.6264834528006126e-06, "loss": 0.43, "step": 8398 }, { "epoch": 0.503929921401572, "grad_norm": 1.2253105640411377, "learning_rate": 3.625803728034528e-06, "loss": 0.4046, "step": 8399 }, { "epoch": 0.5039899202015959, "grad_norm": 1.1700283288955688, "learning_rate": 3.6251239985174086e-06, "loss": 0.3901, "step": 8400 }, { "epoch": 0.50404991900162, "grad_norm": 1.4676940441131592, "learning_rate": 3.6244442642749274e-06, "loss": 0.4485, "step": 8401 }, { "epoch": 0.5041099178016439, "grad_norm": 1.3823429346084595, "learning_rate": 3.623764525332752e-06, "loss": 0.3985, "step": 8402 }, { "epoch": 0.504169916601668, "grad_norm": 1.251667857170105, "learning_rate": 3.623084781716556e-06, "loss": 0.4516, "step": 8403 }, { "epoch": 0.5042299154016919, "grad_norm": 1.1883180141448975, "learning_rate": 3.6224050334520075e-06, "loss": 0.3908, "step": 8404 }, { "epoch": 0.504289914201716, "grad_norm": 1.2584360837936401, "learning_rate": 3.621725280564778e-06, "loss": 0.3821, "step": 8405 }, { "epoch": 0.5043499130017399, "grad_norm": 1.3314409255981445, "learning_rate": 3.6210455230805395e-06, "loss": 0.3889, "step": 8406 }, { "epoch": 0.504409911801764, "grad_norm": 1.1738953590393066, "learning_rate": 3.6203657610249632e-06, "loss": 0.4256, "step": 8407 }, { "epoch": 0.5044699106017879, "grad_norm": 1.2373154163360596, "learning_rate": 3.619685994423721e-06, "loss": 0.4089, "step": 8408 }, { "epoch": 0.504529909401812, "grad_norm": 1.1108102798461914, "learning_rate": 3.6190062233024846e-06, "loss": 0.3721, "step": 8409 }, { "epoch": 0.5045899082018359, "grad_norm": 1.2878762483596802, "learning_rate": 3.6183264476869234e-06, "loss": 0.4274, "step": 8410 }, { "epoch": 0.50464990700186, "grad_norm": 1.1237034797668457, "learning_rate": 3.617646667602713e-06, "loss": 0.3997, "step": 8411 }, { "epoch": 0.5047099058018839, "grad_norm": 1.2842538356781006, "learning_rate": 3.6169668830755224e-06, "loss": 0.4034, "step": 8412 }, { "epoch": 0.504769904601908, "grad_norm": 1.2380690574645996, "learning_rate": 3.616287094131027e-06, "loss": 0.4303, "step": 8413 }, { "epoch": 0.504829903401932, "grad_norm": 1.274794101715088, "learning_rate": 3.6156073007948968e-06, "loss": 0.3865, "step": 8414 }, { "epoch": 0.504889902201956, "grad_norm": 1.149412989616394, "learning_rate": 3.614927503092807e-06, "loss": 0.3903, "step": 8415 }, { "epoch": 0.50494990100198, "grad_norm": 1.30026376247406, "learning_rate": 3.6142477010504283e-06, "loss": 0.4438, "step": 8416 }, { "epoch": 0.505009899802004, "grad_norm": 1.333203673362732, "learning_rate": 3.6135678946934343e-06, "loss": 0.4444, "step": 8417 }, { "epoch": 0.505069898602028, "grad_norm": 1.2931402921676636, "learning_rate": 3.612888084047498e-06, "loss": 0.3825, "step": 8418 }, { "epoch": 0.5051298974020519, "grad_norm": 1.2765145301818848, "learning_rate": 3.6122082691382935e-06, "loss": 0.4071, "step": 8419 }, { "epoch": 0.505189896202076, "grad_norm": 1.0994549989700317, "learning_rate": 3.611528449991494e-06, "loss": 0.377, "step": 8420 }, { "epoch": 0.5052498950020999, "grad_norm": 1.4326578378677368, "learning_rate": 3.6108486266327724e-06, "loss": 0.4359, "step": 8421 }, { "epoch": 0.505309893802124, "grad_norm": 1.265340805053711, "learning_rate": 3.610168799087805e-06, "loss": 0.3776, "step": 8422 }, { "epoch": 0.5053698926021479, "grad_norm": 1.2630404233932495, "learning_rate": 3.6094889673822615e-06, "loss": 0.3839, "step": 8423 }, { "epoch": 0.505429891402172, "grad_norm": 1.2551038265228271, "learning_rate": 3.60880913154182e-06, "loss": 0.3985, "step": 8424 }, { "epoch": 0.5054898902021959, "grad_norm": 1.333556890487671, "learning_rate": 3.608129291592153e-06, "loss": 0.3879, "step": 8425 }, { "epoch": 0.50554988900222, "grad_norm": 1.3458971977233887, "learning_rate": 3.6074494475589353e-06, "loss": 0.439, "step": 8426 }, { "epoch": 0.5056098878022439, "grad_norm": 1.6226389408111572, "learning_rate": 3.606769599467841e-06, "loss": 0.4743, "step": 8427 }, { "epoch": 0.505669886602268, "grad_norm": 1.280381441116333, "learning_rate": 3.6060897473445466e-06, "loss": 0.3991, "step": 8428 }, { "epoch": 0.5057298854022919, "grad_norm": 1.2144538164138794, "learning_rate": 3.605409891214724e-06, "loss": 0.3805, "step": 8429 }, { "epoch": 0.505789884202316, "grad_norm": 1.283284306526184, "learning_rate": 3.6047300311040514e-06, "loss": 0.4798, "step": 8430 }, { "epoch": 0.5058498830023399, "grad_norm": 1.1128473281860352, "learning_rate": 3.6040501670382026e-06, "loss": 0.3596, "step": 8431 }, { "epoch": 0.505909881802364, "grad_norm": 1.3005682229995728, "learning_rate": 3.6033702990428527e-06, "loss": 0.4021, "step": 8432 }, { "epoch": 0.5059698806023879, "grad_norm": 1.4038574695587158, "learning_rate": 3.6026904271436775e-06, "loss": 0.3985, "step": 8433 }, { "epoch": 0.506029879402412, "grad_norm": 1.2109405994415283, "learning_rate": 3.6020105513663543e-06, "loss": 0.3918, "step": 8434 }, { "epoch": 0.506089878202436, "grad_norm": 1.2569469213485718, "learning_rate": 3.6013306717365546e-06, "loss": 0.3988, "step": 8435 }, { "epoch": 0.50614987700246, "grad_norm": 1.220825433731079, "learning_rate": 3.6006507882799587e-06, "loss": 0.374, "step": 8436 }, { "epoch": 0.506209875802484, "grad_norm": 1.1474629640579224, "learning_rate": 3.59997090102224e-06, "loss": 0.3779, "step": 8437 }, { "epoch": 0.5062698746025079, "grad_norm": 1.2555705308914185, "learning_rate": 3.599291009989078e-06, "loss": 0.3974, "step": 8438 }, { "epoch": 0.506329873402532, "grad_norm": 1.2918168306350708, "learning_rate": 3.5986111152061453e-06, "loss": 0.4091, "step": 8439 }, { "epoch": 0.5063898722025559, "grad_norm": 1.262091875076294, "learning_rate": 3.5979312166991207e-06, "loss": 0.4235, "step": 8440 }, { "epoch": 0.50644987100258, "grad_norm": 1.2919684648513794, "learning_rate": 3.5972513144936797e-06, "loss": 0.4232, "step": 8441 }, { "epoch": 0.5065098698026039, "grad_norm": 1.2588616609573364, "learning_rate": 3.5965714086154995e-06, "loss": 0.3765, "step": 8442 }, { "epoch": 0.506569868602628, "grad_norm": 1.3377265930175781, "learning_rate": 3.595891499090258e-06, "loss": 0.4121, "step": 8443 }, { "epoch": 0.5066298674026519, "grad_norm": 1.2848504781723022, "learning_rate": 3.5952115859436306e-06, "loss": 0.3898, "step": 8444 }, { "epoch": 0.506689866202676, "grad_norm": 1.2880975008010864, "learning_rate": 3.594531669201296e-06, "loss": 0.4288, "step": 8445 }, { "epoch": 0.5067498650026999, "grad_norm": 1.2577708959579468, "learning_rate": 3.5938517488889305e-06, "loss": 0.3879, "step": 8446 }, { "epoch": 0.506809863802724, "grad_norm": 1.157317876815796, "learning_rate": 3.5931718250322117e-06, "loss": 0.3769, "step": 8447 }, { "epoch": 0.5068698626027479, "grad_norm": 1.4481799602508545, "learning_rate": 3.592491897656818e-06, "loss": 0.4218, "step": 8448 }, { "epoch": 0.506929861402772, "grad_norm": 1.2033765316009521, "learning_rate": 3.591811966788427e-06, "loss": 0.3388, "step": 8449 }, { "epoch": 0.5069898602027959, "grad_norm": 1.2525984048843384, "learning_rate": 3.5911320324527158e-06, "loss": 0.4336, "step": 8450 }, { "epoch": 0.50704985900282, "grad_norm": 1.1958993673324585, "learning_rate": 3.5904520946753638e-06, "loss": 0.3862, "step": 8451 }, { "epoch": 0.5071098578028439, "grad_norm": 1.234130859375, "learning_rate": 3.5897721534820473e-06, "loss": 0.4053, "step": 8452 }, { "epoch": 0.507169856602868, "grad_norm": 1.322397232055664, "learning_rate": 3.5890922088984455e-06, "loss": 0.4102, "step": 8453 }, { "epoch": 0.5072298554028919, "grad_norm": 1.3045660257339478, "learning_rate": 3.588412260950237e-06, "loss": 0.3991, "step": 8454 }, { "epoch": 0.507289854202916, "grad_norm": 1.295982837677002, "learning_rate": 3.587732309663101e-06, "loss": 0.4287, "step": 8455 }, { "epoch": 0.5073498530029399, "grad_norm": 1.1810061931610107, "learning_rate": 3.587052355062715e-06, "loss": 0.4466, "step": 8456 }, { "epoch": 0.5074098518029639, "grad_norm": 1.414380669593811, "learning_rate": 3.586372397174759e-06, "loss": 0.4012, "step": 8457 }, { "epoch": 0.507469850602988, "grad_norm": 1.3327440023422241, "learning_rate": 3.585692436024909e-06, "loss": 0.3742, "step": 8458 }, { "epoch": 0.5075298494030119, "grad_norm": 1.3053258657455444, "learning_rate": 3.585012471638847e-06, "loss": 0.4474, "step": 8459 }, { "epoch": 0.507589848203036, "grad_norm": 1.433484673500061, "learning_rate": 3.5843325040422516e-06, "loss": 0.4216, "step": 8460 }, { "epoch": 0.5076498470030599, "grad_norm": 1.329864501953125, "learning_rate": 3.5836525332608025e-06, "loss": 0.3916, "step": 8461 }, { "epoch": 0.507709845803084, "grad_norm": 1.3343901634216309, "learning_rate": 3.5829725593201763e-06, "loss": 0.4322, "step": 8462 }, { "epoch": 0.5077698446031079, "grad_norm": 1.2894538640975952, "learning_rate": 3.5822925822460567e-06, "loss": 0.3721, "step": 8463 }, { "epoch": 0.507829843403132, "grad_norm": 1.190176248550415, "learning_rate": 3.5816126020641195e-06, "loss": 0.4276, "step": 8464 }, { "epoch": 0.5078898422031559, "grad_norm": 1.212647795677185, "learning_rate": 3.5809326188000474e-06, "loss": 0.4306, "step": 8465 }, { "epoch": 0.50794984100318, "grad_norm": 1.381600260734558, "learning_rate": 3.5802526324795184e-06, "loss": 0.4343, "step": 8466 }, { "epoch": 0.5080098398032039, "grad_norm": 1.3457986116409302, "learning_rate": 3.5795726431282132e-06, "loss": 0.4303, "step": 8467 }, { "epoch": 0.508069838603228, "grad_norm": 1.3116763830184937, "learning_rate": 3.578892650771812e-06, "loss": 0.4308, "step": 8468 }, { "epoch": 0.5081298374032519, "grad_norm": 1.3357517719268799, "learning_rate": 3.5782126554359943e-06, "loss": 0.4647, "step": 8469 }, { "epoch": 0.508189836203276, "grad_norm": 1.179307460784912, "learning_rate": 3.5775326571464417e-06, "loss": 0.3774, "step": 8470 }, { "epoch": 0.5082498350032999, "grad_norm": 1.304340124130249, "learning_rate": 3.5768526559288325e-06, "loss": 0.3663, "step": 8471 }, { "epoch": 0.508309833803324, "grad_norm": 1.4174535274505615, "learning_rate": 3.57617265180885e-06, "loss": 0.4516, "step": 8472 }, { "epoch": 0.5083698326033479, "grad_norm": 1.251733422279358, "learning_rate": 3.575492644812173e-06, "loss": 0.4226, "step": 8473 }, { "epoch": 0.508429831403372, "grad_norm": 1.1315776109695435, "learning_rate": 3.5748126349644833e-06, "loss": 0.3616, "step": 8474 }, { "epoch": 0.5084898302033959, "grad_norm": 1.3350701332092285, "learning_rate": 3.5741326222914604e-06, "loss": 0.4293, "step": 8475 }, { "epoch": 0.5085498290034199, "grad_norm": 1.266265869140625, "learning_rate": 3.573452606818786e-06, "loss": 0.4051, "step": 8476 }, { "epoch": 0.5086098278034439, "grad_norm": 1.3180147409439087, "learning_rate": 3.5727725885721415e-06, "loss": 0.4091, "step": 8477 }, { "epoch": 0.5086698266034679, "grad_norm": 1.1623338460922241, "learning_rate": 3.5720925675772084e-06, "loss": 0.3579, "step": 8478 }, { "epoch": 0.5087298254034919, "grad_norm": 1.3410929441452026, "learning_rate": 3.571412543859667e-06, "loss": 0.4034, "step": 8479 }, { "epoch": 0.5087898242035159, "grad_norm": 1.2121837139129639, "learning_rate": 3.5707325174451993e-06, "loss": 0.3878, "step": 8480 }, { "epoch": 0.50884982300354, "grad_norm": 1.1163016557693481, "learning_rate": 3.570052488359487e-06, "loss": 0.3864, "step": 8481 }, { "epoch": 0.5089098218035639, "grad_norm": 1.2615947723388672, "learning_rate": 3.56937245662821e-06, "loss": 0.393, "step": 8482 }, { "epoch": 0.508969820603588, "grad_norm": 1.238045334815979, "learning_rate": 3.568692422277053e-06, "loss": 0.4669, "step": 8483 }, { "epoch": 0.5090298194036119, "grad_norm": 1.3883724212646484, "learning_rate": 3.568012385331696e-06, "loss": 0.4242, "step": 8484 }, { "epoch": 0.509089818203636, "grad_norm": 1.3753564357757568, "learning_rate": 3.56733234581782e-06, "loss": 0.399, "step": 8485 }, { "epoch": 0.5091498170036599, "grad_norm": 1.2899729013442993, "learning_rate": 3.5666523037611095e-06, "loss": 0.3864, "step": 8486 }, { "epoch": 0.509209815803684, "grad_norm": 1.2386209964752197, "learning_rate": 3.565972259187244e-06, "loss": 0.3771, "step": 8487 }, { "epoch": 0.5092698146037079, "grad_norm": 1.2830077409744263, "learning_rate": 3.5652922121219076e-06, "loss": 0.3845, "step": 8488 }, { "epoch": 0.509329813403732, "grad_norm": 1.3919779062271118, "learning_rate": 3.5646121625907813e-06, "loss": 0.4314, "step": 8489 }, { "epoch": 0.5093898122037559, "grad_norm": 1.24211585521698, "learning_rate": 3.5639321106195484e-06, "loss": 0.3812, "step": 8490 }, { "epoch": 0.50944981100378, "grad_norm": 1.261452555656433, "learning_rate": 3.5632520562338908e-06, "loss": 0.3996, "step": 8491 }, { "epoch": 0.5095098098038039, "grad_norm": 1.336716651916504, "learning_rate": 3.562571999459491e-06, "loss": 0.4327, "step": 8492 }, { "epoch": 0.509569808603828, "grad_norm": 1.363124966621399, "learning_rate": 3.561891940322032e-06, "loss": 0.4008, "step": 8493 }, { "epoch": 0.5096298074038519, "grad_norm": 1.1710081100463867, "learning_rate": 3.5612118788471956e-06, "loss": 0.412, "step": 8494 }, { "epoch": 0.5096898062038759, "grad_norm": 1.3840506076812744, "learning_rate": 3.5605318150606666e-06, "loss": 0.4141, "step": 8495 }, { "epoch": 0.5097498050038999, "grad_norm": 1.2771365642547607, "learning_rate": 3.559851748988126e-06, "loss": 0.4463, "step": 8496 }, { "epoch": 0.5098098038039239, "grad_norm": 1.3865240812301636, "learning_rate": 3.5591716806552583e-06, "loss": 0.4335, "step": 8497 }, { "epoch": 0.5098698026039479, "grad_norm": 1.412605881690979, "learning_rate": 3.5584916100877456e-06, "loss": 0.4362, "step": 8498 }, { "epoch": 0.5099298014039719, "grad_norm": 1.200283169746399, "learning_rate": 3.5578115373112714e-06, "loss": 0.3953, "step": 8499 }, { "epoch": 0.5099898002039959, "grad_norm": 1.1785277128219604, "learning_rate": 3.557131462351517e-06, "loss": 0.3565, "step": 8500 }, { "epoch": 0.5100497990040199, "grad_norm": 1.2246017456054688, "learning_rate": 3.5564513852341698e-06, "loss": 0.4149, "step": 8501 }, { "epoch": 0.510109797804044, "grad_norm": 1.3528498411178589, "learning_rate": 3.5557713059849095e-06, "loss": 0.4198, "step": 8502 }, { "epoch": 0.5101697966040679, "grad_norm": 1.2181493043899536, "learning_rate": 3.555091224629422e-06, "loss": 0.4079, "step": 8503 }, { "epoch": 0.510229795404092, "grad_norm": 1.451287031173706, "learning_rate": 3.55441114119339e-06, "loss": 0.4488, "step": 8504 }, { "epoch": 0.5102897942041159, "grad_norm": 1.3730043172836304, "learning_rate": 3.5537310557024966e-06, "loss": 0.4043, "step": 8505 }, { "epoch": 0.51034979300414, "grad_norm": 1.3647270202636719, "learning_rate": 3.553050968182426e-06, "loss": 0.4041, "step": 8506 }, { "epoch": 0.5104097918041639, "grad_norm": 1.2731683254241943, "learning_rate": 3.5523708786588626e-06, "loss": 0.4522, "step": 8507 }, { "epoch": 0.510469790604188, "grad_norm": 1.2143878936767578, "learning_rate": 3.5516907871574888e-06, "loss": 0.441, "step": 8508 }, { "epoch": 0.5105297894042119, "grad_norm": 1.265360713005066, "learning_rate": 3.5510106937039904e-06, "loss": 0.3902, "step": 8509 }, { "epoch": 0.510589788204236, "grad_norm": 1.3523029088974, "learning_rate": 3.55033059832405e-06, "loss": 0.4274, "step": 8510 }, { "epoch": 0.5106497870042599, "grad_norm": 1.2934436798095703, "learning_rate": 3.549650501043352e-06, "loss": 0.3917, "step": 8511 }, { "epoch": 0.5107097858042839, "grad_norm": 1.4371057748794556, "learning_rate": 3.5489704018875816e-06, "loss": 0.4547, "step": 8512 }, { "epoch": 0.5107697846043079, "grad_norm": 1.2338007688522339, "learning_rate": 3.5482903008824215e-06, "loss": 0.4122, "step": 8513 }, { "epoch": 0.5108297834043319, "grad_norm": 1.3455455303192139, "learning_rate": 3.5476101980535575e-06, "loss": 0.4219, "step": 8514 }, { "epoch": 0.5108897822043559, "grad_norm": 1.3536911010742188, "learning_rate": 3.546930093426673e-06, "loss": 0.3781, "step": 8515 }, { "epoch": 0.5109497810043799, "grad_norm": 1.2344589233398438, "learning_rate": 3.5462499870274525e-06, "loss": 0.3912, "step": 8516 }, { "epoch": 0.5110097798044039, "grad_norm": 1.3438072204589844, "learning_rate": 3.5455698788815794e-06, "loss": 0.452, "step": 8517 }, { "epoch": 0.5110697786044279, "grad_norm": 1.3107413053512573, "learning_rate": 3.544889769014741e-06, "loss": 0.4545, "step": 8518 }, { "epoch": 0.5111297774044519, "grad_norm": 1.188655138015747, "learning_rate": 3.54420965745262e-06, "loss": 0.3792, "step": 8519 }, { "epoch": 0.5111897762044759, "grad_norm": 1.294297218322754, "learning_rate": 3.543529544220903e-06, "loss": 0.3867, "step": 8520 }, { "epoch": 0.5112497750044999, "grad_norm": 1.3646647930145264, "learning_rate": 3.5428494293452716e-06, "loss": 0.4399, "step": 8521 }, { "epoch": 0.5113097738045239, "grad_norm": 1.3624930381774902, "learning_rate": 3.5421693128514144e-06, "loss": 0.3885, "step": 8522 }, { "epoch": 0.5113697726045479, "grad_norm": 1.2641404867172241, "learning_rate": 3.5414891947650116e-06, "loss": 0.3684, "step": 8523 }, { "epoch": 0.5114297714045719, "grad_norm": 1.3099488019943237, "learning_rate": 3.5408090751117538e-06, "loss": 0.3906, "step": 8524 }, { "epoch": 0.511489770204596, "grad_norm": 1.3420970439910889, "learning_rate": 3.540128953917321e-06, "loss": 0.4214, "step": 8525 }, { "epoch": 0.5115497690046199, "grad_norm": 1.3374264240264893, "learning_rate": 3.5394488312074025e-06, "loss": 0.4696, "step": 8526 }, { "epoch": 0.511609767804644, "grad_norm": 1.3852989673614502, "learning_rate": 3.53876870700768e-06, "loss": 0.3976, "step": 8527 }, { "epoch": 0.5116697666046679, "grad_norm": 1.3709081411361694, "learning_rate": 3.5380885813438408e-06, "loss": 0.451, "step": 8528 }, { "epoch": 0.511729765404692, "grad_norm": 1.2621320486068726, "learning_rate": 3.5374084542415676e-06, "loss": 0.4075, "step": 8529 }, { "epoch": 0.5117897642047159, "grad_norm": 1.1951227188110352, "learning_rate": 3.53672832572655e-06, "loss": 0.3877, "step": 8530 }, { "epoch": 0.5118497630047399, "grad_norm": 1.2599329948425293, "learning_rate": 3.5360481958244694e-06, "loss": 0.3945, "step": 8531 }, { "epoch": 0.5119097618047639, "grad_norm": 1.2756267786026, "learning_rate": 3.535368064561013e-06, "loss": 0.4283, "step": 8532 }, { "epoch": 0.5119697606047879, "grad_norm": 1.3259618282318115, "learning_rate": 3.534687931961866e-06, "loss": 0.387, "step": 8533 }, { "epoch": 0.5120297594048119, "grad_norm": 1.2527769804000854, "learning_rate": 3.534007798052714e-06, "loss": 0.395, "step": 8534 }, { "epoch": 0.5120897582048359, "grad_norm": 1.307013988494873, "learning_rate": 3.533327662859242e-06, "loss": 0.4053, "step": 8535 }, { "epoch": 0.5121497570048599, "grad_norm": 1.3509414196014404, "learning_rate": 3.5326475264071362e-06, "loss": 0.4856, "step": 8536 }, { "epoch": 0.5122097558048839, "grad_norm": 1.3863836526870728, "learning_rate": 3.531967388722082e-06, "loss": 0.4109, "step": 8537 }, { "epoch": 0.5122697546049079, "grad_norm": 1.2849297523498535, "learning_rate": 3.5312872498297655e-06, "loss": 0.4765, "step": 8538 }, { "epoch": 0.5123297534049319, "grad_norm": 1.3160558938980103, "learning_rate": 3.530607109755872e-06, "loss": 0.4087, "step": 8539 }, { "epoch": 0.5123897522049559, "grad_norm": 1.2034823894500732, "learning_rate": 3.5299269685260874e-06, "loss": 0.4158, "step": 8540 }, { "epoch": 0.5124497510049799, "grad_norm": 1.290663719177246, "learning_rate": 3.529246826166097e-06, "loss": 0.3776, "step": 8541 }, { "epoch": 0.5125097498050039, "grad_norm": 1.3624011278152466, "learning_rate": 3.5285666827015877e-06, "loss": 0.4207, "step": 8542 }, { "epoch": 0.5125697486050279, "grad_norm": 1.3063031435012817, "learning_rate": 3.5278865381582455e-06, "loss": 0.4002, "step": 8543 }, { "epoch": 0.5126297474050518, "grad_norm": 1.320967197418213, "learning_rate": 3.5272063925617546e-06, "loss": 0.4483, "step": 8544 }, { "epoch": 0.5126897462050759, "grad_norm": 1.2919262647628784, "learning_rate": 3.526526245937803e-06, "loss": 0.3726, "step": 8545 }, { "epoch": 0.5127497450050998, "grad_norm": 1.1815789937973022, "learning_rate": 3.5258460983120752e-06, "loss": 0.4066, "step": 8546 }, { "epoch": 0.5128097438051239, "grad_norm": 1.1291879415512085, "learning_rate": 3.525165949710259e-06, "loss": 0.3409, "step": 8547 }, { "epoch": 0.512869742605148, "grad_norm": 1.272335410118103, "learning_rate": 3.5244858001580386e-06, "loss": 0.3557, "step": 8548 }, { "epoch": 0.5129297414051719, "grad_norm": 1.290770411491394, "learning_rate": 3.5238056496811027e-06, "loss": 0.4196, "step": 8549 }, { "epoch": 0.5129897402051959, "grad_norm": 1.4483014345169067, "learning_rate": 3.523125498305134e-06, "loss": 0.4305, "step": 8550 }, { "epoch": 0.5130497390052199, "grad_norm": 1.278775930404663, "learning_rate": 3.522445346055822e-06, "loss": 0.3911, "step": 8551 }, { "epoch": 0.5131097378052439, "grad_norm": 1.2292115688323975, "learning_rate": 3.5217651929588504e-06, "loss": 0.4057, "step": 8552 }, { "epoch": 0.5131697366052679, "grad_norm": 1.2390570640563965, "learning_rate": 3.521085039039907e-06, "loss": 0.3918, "step": 8553 }, { "epoch": 0.5132297354052919, "grad_norm": 1.321847915649414, "learning_rate": 3.520404884324678e-06, "loss": 0.4244, "step": 8554 }, { "epoch": 0.5132897342053159, "grad_norm": 1.344853162765503, "learning_rate": 3.5197247288388496e-06, "loss": 0.4148, "step": 8555 }, { "epoch": 0.5133497330053399, "grad_norm": 1.2533525228500366, "learning_rate": 3.519044572608108e-06, "loss": 0.4, "step": 8556 }, { "epoch": 0.5134097318053639, "grad_norm": 1.2472875118255615, "learning_rate": 3.5183644156581398e-06, "loss": 0.3815, "step": 8557 }, { "epoch": 0.5134697306053879, "grad_norm": 1.2918312549591064, "learning_rate": 3.51768425801463e-06, "loss": 0.3778, "step": 8558 }, { "epoch": 0.5135297294054119, "grad_norm": 1.2057384252548218, "learning_rate": 3.5170040997032674e-06, "loss": 0.4164, "step": 8559 }, { "epoch": 0.5135897282054359, "grad_norm": 1.2166825532913208, "learning_rate": 3.5163239407497382e-06, "loss": 0.4247, "step": 8560 }, { "epoch": 0.5136497270054599, "grad_norm": 1.3884004354476929, "learning_rate": 3.5156437811797266e-06, "loss": 0.3978, "step": 8561 }, { "epoch": 0.5137097258054839, "grad_norm": 1.1865942478179932, "learning_rate": 3.514963621018921e-06, "loss": 0.3952, "step": 8562 }, { "epoch": 0.5137697246055078, "grad_norm": 1.1609177589416504, "learning_rate": 3.5142834602930087e-06, "loss": 0.4022, "step": 8563 }, { "epoch": 0.5138297234055319, "grad_norm": 1.1479041576385498, "learning_rate": 3.513603299027674e-06, "loss": 0.3652, "step": 8564 }, { "epoch": 0.5138897222055558, "grad_norm": 1.257095217704773, "learning_rate": 3.512923137248605e-06, "loss": 0.3931, "step": 8565 }, { "epoch": 0.5139497210055799, "grad_norm": 1.1732878684997559, "learning_rate": 3.5122429749814887e-06, "loss": 0.3976, "step": 8566 }, { "epoch": 0.5140097198056038, "grad_norm": 1.2470842599868774, "learning_rate": 3.51156281225201e-06, "loss": 0.395, "step": 8567 }, { "epoch": 0.5140697186056279, "grad_norm": 1.1983675956726074, "learning_rate": 3.5108826490858576e-06, "loss": 0.3591, "step": 8568 }, { "epoch": 0.5141297174056518, "grad_norm": 1.428524374961853, "learning_rate": 3.5102024855087156e-06, "loss": 0.4, "step": 8569 }, { "epoch": 0.5141897162056759, "grad_norm": 1.1913702487945557, "learning_rate": 3.509522321546273e-06, "loss": 0.3781, "step": 8570 }, { "epoch": 0.5142497150056999, "grad_norm": 1.3376924991607666, "learning_rate": 3.5088421572242152e-06, "loss": 0.4135, "step": 8571 }, { "epoch": 0.5143097138057239, "grad_norm": 1.3006541728973389, "learning_rate": 3.5081619925682307e-06, "loss": 0.4174, "step": 8572 }, { "epoch": 0.5143697126057479, "grad_norm": 1.1902129650115967, "learning_rate": 3.5074818276040037e-06, "loss": 0.3715, "step": 8573 }, { "epoch": 0.5144297114057719, "grad_norm": 1.155029296875, "learning_rate": 3.5068016623572234e-06, "loss": 0.3801, "step": 8574 }, { "epoch": 0.5144897102057959, "grad_norm": 1.3984684944152832, "learning_rate": 3.5061214968535744e-06, "loss": 0.3713, "step": 8575 }, { "epoch": 0.5145497090058199, "grad_norm": 1.3615001440048218, "learning_rate": 3.5054413311187443e-06, "loss": 0.41, "step": 8576 }, { "epoch": 0.5146097078058439, "grad_norm": 1.116532564163208, "learning_rate": 3.50476116517842e-06, "loss": 0.3827, "step": 8577 }, { "epoch": 0.5146697066058679, "grad_norm": 1.2253204584121704, "learning_rate": 3.504080999058289e-06, "loss": 0.3698, "step": 8578 }, { "epoch": 0.5147297054058919, "grad_norm": 1.382804036140442, "learning_rate": 3.503400832784036e-06, "loss": 0.4013, "step": 8579 }, { "epoch": 0.5147897042059159, "grad_norm": 1.3432425260543823, "learning_rate": 3.5027206663813496e-06, "loss": 0.415, "step": 8580 }, { "epoch": 0.5148497030059399, "grad_norm": 1.233344316482544, "learning_rate": 3.502040499875916e-06, "loss": 0.3914, "step": 8581 }, { "epoch": 0.5149097018059638, "grad_norm": 1.4527697563171387, "learning_rate": 3.501360333293422e-06, "loss": 0.4121, "step": 8582 }, { "epoch": 0.5149697006059879, "grad_norm": 1.299648404121399, "learning_rate": 3.500680166659555e-06, "loss": 0.4025, "step": 8583 }, { "epoch": 0.5150296994060118, "grad_norm": 1.296264886856079, "learning_rate": 3.5e-06, "loss": 0.4365, "step": 8584 }, { "epoch": 0.5150896982060359, "grad_norm": 1.4058808088302612, "learning_rate": 3.4993198333404454e-06, "loss": 0.4705, "step": 8585 }, { "epoch": 0.5151496970060598, "grad_norm": 1.2420798540115356, "learning_rate": 3.4986396667065783e-06, "loss": 0.3904, "step": 8586 }, { "epoch": 0.5152096958060839, "grad_norm": 1.3169362545013428, "learning_rate": 3.4979595001240842e-06, "loss": 0.4019, "step": 8587 }, { "epoch": 0.5152696946061078, "grad_norm": 1.2099717855453491, "learning_rate": 3.497279333618651e-06, "loss": 0.3896, "step": 8588 }, { "epoch": 0.5153296934061319, "grad_norm": 1.281181812286377, "learning_rate": 3.4965991672159648e-06, "loss": 0.4354, "step": 8589 }, { "epoch": 0.5153896922061558, "grad_norm": 1.2665705680847168, "learning_rate": 3.495919000941711e-06, "loss": 0.4161, "step": 8590 }, { "epoch": 0.5154496910061799, "grad_norm": 1.2018368244171143, "learning_rate": 3.495238834821581e-06, "loss": 0.4257, "step": 8591 }, { "epoch": 0.5155096898062039, "grad_norm": 1.3185793161392212, "learning_rate": 3.4945586688812556e-06, "loss": 0.4157, "step": 8592 }, { "epoch": 0.5155696886062279, "grad_norm": 1.2792460918426514, "learning_rate": 3.4938785031464268e-06, "loss": 0.3923, "step": 8593 }, { "epoch": 0.5156296874062519, "grad_norm": 1.2783900499343872, "learning_rate": 3.493198337642777e-06, "loss": 0.4016, "step": 8594 }, { "epoch": 0.5156896862062759, "grad_norm": 1.2928314208984375, "learning_rate": 3.492518172395997e-06, "loss": 0.4349, "step": 8595 }, { "epoch": 0.5157496850062999, "grad_norm": 1.2362269163131714, "learning_rate": 3.4918380074317696e-06, "loss": 0.4005, "step": 8596 }, { "epoch": 0.5158096838063239, "grad_norm": 1.2550417184829712, "learning_rate": 3.4911578427757842e-06, "loss": 0.3922, "step": 8597 }, { "epoch": 0.5158696826063479, "grad_norm": 1.1822625398635864, "learning_rate": 3.490477678453727e-06, "loss": 0.3516, "step": 8598 }, { "epoch": 0.5159296814063719, "grad_norm": 1.2449841499328613, "learning_rate": 3.4897975144912847e-06, "loss": 0.3892, "step": 8599 }, { "epoch": 0.5159896802063959, "grad_norm": 1.3200173377990723, "learning_rate": 3.489117350914144e-06, "loss": 0.377, "step": 8600 }, { "epoch": 0.5160496790064198, "grad_norm": 1.3259197473526, "learning_rate": 3.4884371877479905e-06, "loss": 0.3991, "step": 8601 }, { "epoch": 0.5161096778064439, "grad_norm": 1.3009284734725952, "learning_rate": 3.487757025018512e-06, "loss": 0.389, "step": 8602 }, { "epoch": 0.5161696766064678, "grad_norm": 1.3662116527557373, "learning_rate": 3.4870768627513955e-06, "loss": 0.4326, "step": 8603 }, { "epoch": 0.5162296754064919, "grad_norm": 1.231278896331787, "learning_rate": 3.486396700972326e-06, "loss": 0.4031, "step": 8604 }, { "epoch": 0.5162896742065158, "grad_norm": 1.321421504020691, "learning_rate": 3.4857165397069925e-06, "loss": 0.3882, "step": 8605 }, { "epoch": 0.5163496730065399, "grad_norm": 1.1972466707229614, "learning_rate": 3.485036378981079e-06, "loss": 0.3972, "step": 8606 }, { "epoch": 0.5164096718065638, "grad_norm": 1.2777537107467651, "learning_rate": 3.4843562188202746e-06, "loss": 0.3954, "step": 8607 }, { "epoch": 0.5164696706065879, "grad_norm": 1.5660110712051392, "learning_rate": 3.483676059250262e-06, "loss": 0.4494, "step": 8608 }, { "epoch": 0.5165296694066118, "grad_norm": 1.2511910200119019, "learning_rate": 3.482995900296732e-06, "loss": 0.3884, "step": 8609 }, { "epoch": 0.5165896682066359, "grad_norm": 1.1649361848831177, "learning_rate": 3.48231574198537e-06, "loss": 0.38, "step": 8610 }, { "epoch": 0.5166496670066598, "grad_norm": 1.3182613849639893, "learning_rate": 3.481635584341861e-06, "loss": 0.4476, "step": 8611 }, { "epoch": 0.5167096658066839, "grad_norm": 1.2089449167251587, "learning_rate": 3.480955427391893e-06, "loss": 0.4355, "step": 8612 }, { "epoch": 0.5167696646067078, "grad_norm": 1.1762778759002686, "learning_rate": 3.4802752711611507e-06, "loss": 0.3837, "step": 8613 }, { "epoch": 0.5168296634067319, "grad_norm": 1.2809077501296997, "learning_rate": 3.479595115675322e-06, "loss": 0.4329, "step": 8614 }, { "epoch": 0.5168896622067559, "grad_norm": 1.3943240642547607, "learning_rate": 3.478914960960093e-06, "loss": 0.4111, "step": 8615 }, { "epoch": 0.5169496610067799, "grad_norm": 1.3323222398757935, "learning_rate": 3.4782348070411494e-06, "loss": 0.4223, "step": 8616 }, { "epoch": 0.5170096598068039, "grad_norm": 1.1840057373046875, "learning_rate": 3.4775546539441793e-06, "loss": 0.3954, "step": 8617 }, { "epoch": 0.5170696586068279, "grad_norm": 1.1855944395065308, "learning_rate": 3.476874501694866e-06, "loss": 0.3649, "step": 8618 }, { "epoch": 0.5171296574068519, "grad_norm": 1.2567789554595947, "learning_rate": 3.476194350318899e-06, "loss": 0.4407, "step": 8619 }, { "epoch": 0.5171896562068758, "grad_norm": 1.464516520500183, "learning_rate": 3.4755141998419613e-06, "loss": 0.4186, "step": 8620 }, { "epoch": 0.5172496550068999, "grad_norm": 1.2535860538482666, "learning_rate": 3.4748340502897406e-06, "loss": 0.4154, "step": 8621 }, { "epoch": 0.5173096538069238, "grad_norm": 1.261815071105957, "learning_rate": 3.474153901687925e-06, "loss": 0.4412, "step": 8622 }, { "epoch": 0.5173696526069479, "grad_norm": 1.2997432947158813, "learning_rate": 3.473473754062197e-06, "loss": 0.4425, "step": 8623 }, { "epoch": 0.5174296514069718, "grad_norm": 1.3408995866775513, "learning_rate": 3.472793607438246e-06, "loss": 0.4353, "step": 8624 }, { "epoch": 0.5174896502069959, "grad_norm": 1.261951208114624, "learning_rate": 3.4721134618417553e-06, "loss": 0.3857, "step": 8625 }, { "epoch": 0.5175496490070198, "grad_norm": 1.230853796005249, "learning_rate": 3.471433317298412e-06, "loss": 0.3333, "step": 8626 }, { "epoch": 0.5176096478070439, "grad_norm": 1.145317792892456, "learning_rate": 3.470753173833903e-06, "loss": 0.3435, "step": 8627 }, { "epoch": 0.5176696466070678, "grad_norm": 1.1322060823440552, "learning_rate": 3.4700730314739125e-06, "loss": 0.3597, "step": 8628 }, { "epoch": 0.5177296454070919, "grad_norm": 1.3267894983291626, "learning_rate": 3.4693928902441285e-06, "loss": 0.4235, "step": 8629 }, { "epoch": 0.5177896442071158, "grad_norm": 1.16392183303833, "learning_rate": 3.468712750170235e-06, "loss": 0.3866, "step": 8630 }, { "epoch": 0.5178496430071399, "grad_norm": 1.2636463642120361, "learning_rate": 3.4680326112779178e-06, "loss": 0.4164, "step": 8631 }, { "epoch": 0.5179096418071638, "grad_norm": 1.1457021236419678, "learning_rate": 3.4673524735928645e-06, "loss": 0.4003, "step": 8632 }, { "epoch": 0.5179696406071879, "grad_norm": 1.3674182891845703, "learning_rate": 3.466672337140758e-06, "loss": 0.4072, "step": 8633 }, { "epoch": 0.5180296394072118, "grad_norm": 1.2789093255996704, "learning_rate": 3.465992201947287e-06, "loss": 0.3645, "step": 8634 }, { "epoch": 0.5180896382072359, "grad_norm": 1.2642388343811035, "learning_rate": 3.465312068038134e-06, "loss": 0.4353, "step": 8635 }, { "epoch": 0.5181496370072598, "grad_norm": 1.2743440866470337, "learning_rate": 3.4646319354389877e-06, "loss": 0.3923, "step": 8636 }, { "epoch": 0.5182096358072839, "grad_norm": 1.3506466150283813, "learning_rate": 3.4639518041755314e-06, "loss": 0.4255, "step": 8637 }, { "epoch": 0.5182696346073079, "grad_norm": 1.3703161478042603, "learning_rate": 3.46327167427345e-06, "loss": 0.4517, "step": 8638 }, { "epoch": 0.5183296334073318, "grad_norm": 1.1494288444519043, "learning_rate": 3.4625915457584323e-06, "loss": 0.3616, "step": 8639 }, { "epoch": 0.5183896322073559, "grad_norm": 1.1571491956710815, "learning_rate": 3.4619114186561595e-06, "loss": 0.3526, "step": 8640 }, { "epoch": 0.5184496310073798, "grad_norm": 1.324021339416504, "learning_rate": 3.4612312929923216e-06, "loss": 0.4126, "step": 8641 }, { "epoch": 0.5185096298074039, "grad_norm": 1.1143971681594849, "learning_rate": 3.460551168792598e-06, "loss": 0.3766, "step": 8642 }, { "epoch": 0.5185696286074278, "grad_norm": 1.4066580533981323, "learning_rate": 3.4598710460826784e-06, "loss": 0.4241, "step": 8643 }, { "epoch": 0.5186296274074519, "grad_norm": 1.3403000831604004, "learning_rate": 3.459190924888247e-06, "loss": 0.4158, "step": 8644 }, { "epoch": 0.5186896262074758, "grad_norm": 1.3097474575042725, "learning_rate": 3.458510805234988e-06, "loss": 0.4503, "step": 8645 }, { "epoch": 0.5187496250074999, "grad_norm": 1.4806277751922607, "learning_rate": 3.457830687148587e-06, "loss": 0.429, "step": 8646 }, { "epoch": 0.5188096238075238, "grad_norm": 1.2008017301559448, "learning_rate": 3.4571505706547283e-06, "loss": 0.3902, "step": 8647 }, { "epoch": 0.5188696226075479, "grad_norm": 1.2722554206848145, "learning_rate": 3.4564704557790983e-06, "loss": 0.4097, "step": 8648 }, { "epoch": 0.5189296214075718, "grad_norm": 1.3326356410980225, "learning_rate": 3.4557903425473797e-06, "loss": 0.4315, "step": 8649 }, { "epoch": 0.5189896202075959, "grad_norm": 1.222293496131897, "learning_rate": 3.455110230985259e-06, "loss": 0.3766, "step": 8650 }, { "epoch": 0.5190496190076198, "grad_norm": 1.2936235666275024, "learning_rate": 3.4544301211184205e-06, "loss": 0.4023, "step": 8651 }, { "epoch": 0.5191096178076439, "grad_norm": 1.2917225360870361, "learning_rate": 3.4537500129725478e-06, "loss": 0.3927, "step": 8652 }, { "epoch": 0.5191696166076678, "grad_norm": 1.2232084274291992, "learning_rate": 3.4530699065733286e-06, "loss": 0.4615, "step": 8653 }, { "epoch": 0.5192296154076919, "grad_norm": 1.3688762187957764, "learning_rate": 3.4523898019464428e-06, "loss": 0.4031, "step": 8654 }, { "epoch": 0.5192896142077158, "grad_norm": 1.2596884965896606, "learning_rate": 3.451709699117578e-06, "loss": 0.441, "step": 8655 }, { "epoch": 0.5193496130077399, "grad_norm": 1.378105640411377, "learning_rate": 3.4510295981124187e-06, "loss": 0.4422, "step": 8656 }, { "epoch": 0.5194096118077638, "grad_norm": 1.3004930019378662, "learning_rate": 3.4503494989566475e-06, "loss": 0.3653, "step": 8657 }, { "epoch": 0.5194696106077878, "grad_norm": 1.1915576457977295, "learning_rate": 3.44966940167595e-06, "loss": 0.3863, "step": 8658 }, { "epoch": 0.5195296094078119, "grad_norm": 1.2661056518554688, "learning_rate": 3.4489893062960095e-06, "loss": 0.3937, "step": 8659 }, { "epoch": 0.5195896082078358, "grad_norm": 1.1167997121810913, "learning_rate": 3.448309212842511e-06, "loss": 0.3531, "step": 8660 }, { "epoch": 0.5196496070078599, "grad_norm": 1.265793800354004, "learning_rate": 3.4476291213411378e-06, "loss": 0.4023, "step": 8661 }, { "epoch": 0.5197096058078838, "grad_norm": 1.2141168117523193, "learning_rate": 3.446949031817574e-06, "loss": 0.4087, "step": 8662 }, { "epoch": 0.5197696046079079, "grad_norm": 1.2729644775390625, "learning_rate": 3.446268944297504e-06, "loss": 0.4648, "step": 8663 }, { "epoch": 0.5198296034079318, "grad_norm": 1.2426973581314087, "learning_rate": 3.4455888588066105e-06, "loss": 0.3361, "step": 8664 }, { "epoch": 0.5198896022079559, "grad_norm": 1.2288758754730225, "learning_rate": 3.4449087753705786e-06, "loss": 0.3674, "step": 8665 }, { "epoch": 0.5199496010079798, "grad_norm": 1.2073506116867065, "learning_rate": 3.4442286940150904e-06, "loss": 0.4172, "step": 8666 }, { "epoch": 0.5200095998080039, "grad_norm": 1.249847650527954, "learning_rate": 3.44354861476583e-06, "loss": 0.4025, "step": 8667 }, { "epoch": 0.5200695986080278, "grad_norm": 1.2506352663040161, "learning_rate": 3.442868537648483e-06, "loss": 0.3545, "step": 8668 }, { "epoch": 0.5201295974080519, "grad_norm": 1.410447359085083, "learning_rate": 3.4421884626887294e-06, "loss": 0.3614, "step": 8669 }, { "epoch": 0.5201895962080758, "grad_norm": 1.3139493465423584, "learning_rate": 3.441508389912256e-06, "loss": 0.3917, "step": 8670 }, { "epoch": 0.5202495950080999, "grad_norm": 1.3873876333236694, "learning_rate": 3.4408283193447416e-06, "loss": 0.3747, "step": 8671 }, { "epoch": 0.5203095938081238, "grad_norm": 1.3495984077453613, "learning_rate": 3.4401482510118734e-06, "loss": 0.4573, "step": 8672 }, { "epoch": 0.5203695926081479, "grad_norm": 1.3905445337295532, "learning_rate": 3.4394681849393338e-06, "loss": 0.3838, "step": 8673 }, { "epoch": 0.5204295914081718, "grad_norm": 1.3839783668518066, "learning_rate": 3.4387881211528035e-06, "loss": 0.4241, "step": 8674 }, { "epoch": 0.5204895902081959, "grad_norm": 1.2514530420303345, "learning_rate": 3.4381080596779684e-06, "loss": 0.422, "step": 8675 }, { "epoch": 0.5205495890082198, "grad_norm": 1.1011985540390015, "learning_rate": 3.4374280005405093e-06, "loss": 0.3662, "step": 8676 }, { "epoch": 0.5206095878082438, "grad_norm": 1.2206764221191406, "learning_rate": 3.4367479437661104e-06, "loss": 0.4408, "step": 8677 }, { "epoch": 0.5206695866082678, "grad_norm": 1.2986884117126465, "learning_rate": 3.4360678893804524e-06, "loss": 0.4212, "step": 8678 }, { "epoch": 0.5207295854082918, "grad_norm": 1.53617525100708, "learning_rate": 3.435387837409219e-06, "loss": 0.4409, "step": 8679 }, { "epoch": 0.5207895842083158, "grad_norm": 1.3613814115524292, "learning_rate": 3.434707787878093e-06, "loss": 0.4596, "step": 8680 }, { "epoch": 0.5208495830083398, "grad_norm": 1.1897447109222412, "learning_rate": 3.434027740812756e-06, "loss": 0.3631, "step": 8681 }, { "epoch": 0.5209095818083639, "grad_norm": 1.2585524320602417, "learning_rate": 3.4333476962388917e-06, "loss": 0.4172, "step": 8682 }, { "epoch": 0.5209695806083878, "grad_norm": 1.2479749917984009, "learning_rate": 3.43266765418218e-06, "loss": 0.4136, "step": 8683 }, { "epoch": 0.5210295794084119, "grad_norm": 1.4106441736221313, "learning_rate": 3.4319876146683036e-06, "loss": 0.3959, "step": 8684 }, { "epoch": 0.5210895782084358, "grad_norm": 1.2598038911819458, "learning_rate": 3.4313075777229476e-06, "loss": 0.366, "step": 8685 }, { "epoch": 0.5211495770084599, "grad_norm": 1.2338991165161133, "learning_rate": 3.430627543371789e-06, "loss": 0.4013, "step": 8686 }, { "epoch": 0.5212095758084838, "grad_norm": 1.290441632270813, "learning_rate": 3.4299475116405145e-06, "loss": 0.4458, "step": 8687 }, { "epoch": 0.5212695746085079, "grad_norm": 1.2340373992919922, "learning_rate": 3.4292674825548006e-06, "loss": 0.3951, "step": 8688 }, { "epoch": 0.5213295734085318, "grad_norm": 1.3749033212661743, "learning_rate": 3.4285874561403334e-06, "loss": 0.4001, "step": 8689 }, { "epoch": 0.5213895722085559, "grad_norm": 1.3201185464859009, "learning_rate": 3.4279074324227915e-06, "loss": 0.3958, "step": 8690 }, { "epoch": 0.5214495710085798, "grad_norm": 1.1976710557937622, "learning_rate": 3.4272274114278584e-06, "loss": 0.4238, "step": 8691 }, { "epoch": 0.5215095698086039, "grad_norm": 1.1796963214874268, "learning_rate": 3.4265473931812142e-06, "loss": 0.4138, "step": 8692 }, { "epoch": 0.5215695686086278, "grad_norm": 1.3496750593185425, "learning_rate": 3.42586737770854e-06, "loss": 0.3596, "step": 8693 }, { "epoch": 0.5216295674086519, "grad_norm": 1.33619225025177, "learning_rate": 3.4251873650355183e-06, "loss": 0.3974, "step": 8694 }, { "epoch": 0.5216895662086758, "grad_norm": 1.3838610649108887, "learning_rate": 3.4245073551878275e-06, "loss": 0.3708, "step": 8695 }, { "epoch": 0.5217495650086998, "grad_norm": 1.2540411949157715, "learning_rate": 3.42382734819115e-06, "loss": 0.3931, "step": 8696 }, { "epoch": 0.5218095638087238, "grad_norm": 1.1705143451690674, "learning_rate": 3.423147344071168e-06, "loss": 0.338, "step": 8697 }, { "epoch": 0.5218695626087478, "grad_norm": 1.2613950967788696, "learning_rate": 3.4224673428535582e-06, "loss": 0.4024, "step": 8698 }, { "epoch": 0.5219295614087718, "grad_norm": 1.2205743789672852, "learning_rate": 3.421787344564007e-06, "loss": 0.4007, "step": 8699 }, { "epoch": 0.5219895602087958, "grad_norm": 1.3103832006454468, "learning_rate": 3.421107349228188e-06, "loss": 0.3793, "step": 8700 }, { "epoch": 0.5220495590088198, "grad_norm": 1.2606598138809204, "learning_rate": 3.420427356871788e-06, "loss": 0.4025, "step": 8701 }, { "epoch": 0.5221095578088438, "grad_norm": 1.2159074544906616, "learning_rate": 3.419747367520482e-06, "loss": 0.3987, "step": 8702 }, { "epoch": 0.5221695566088678, "grad_norm": 1.2115305662155151, "learning_rate": 3.4190673811999525e-06, "loss": 0.3361, "step": 8703 }, { "epoch": 0.5222295554088918, "grad_norm": 1.3269189596176147, "learning_rate": 3.4183873979358804e-06, "loss": 0.4138, "step": 8704 }, { "epoch": 0.5222895542089159, "grad_norm": 1.3805689811706543, "learning_rate": 3.4177074177539436e-06, "loss": 0.4461, "step": 8705 }, { "epoch": 0.5223495530089398, "grad_norm": 1.2435224056243896, "learning_rate": 3.4170274406798236e-06, "loss": 0.4073, "step": 8706 }, { "epoch": 0.5224095518089639, "grad_norm": 1.3009443283081055, "learning_rate": 3.4163474667391983e-06, "loss": 0.4146, "step": 8707 }, { "epoch": 0.5224695506089878, "grad_norm": 1.2788686752319336, "learning_rate": 3.415667495957748e-06, "loss": 0.3981, "step": 8708 }, { "epoch": 0.5225295494090119, "grad_norm": 1.2891712188720703, "learning_rate": 3.4149875283611532e-06, "loss": 0.3824, "step": 8709 }, { "epoch": 0.5225895482090358, "grad_norm": 1.0965704917907715, "learning_rate": 3.414307563975091e-06, "loss": 0.3503, "step": 8710 }, { "epoch": 0.5226495470090599, "grad_norm": 1.405059576034546, "learning_rate": 3.413627602825243e-06, "loss": 0.4287, "step": 8711 }, { "epoch": 0.5227095458090838, "grad_norm": 1.2919795513153076, "learning_rate": 3.412947644937285e-06, "loss": 0.4267, "step": 8712 }, { "epoch": 0.5227695446091079, "grad_norm": 1.3257020711898804, "learning_rate": 3.4122676903368983e-06, "loss": 0.4616, "step": 8713 }, { "epoch": 0.5228295434091318, "grad_norm": 1.4225444793701172, "learning_rate": 3.4115877390497634e-06, "loss": 0.3877, "step": 8714 }, { "epoch": 0.5228895422091558, "grad_norm": 1.4452191591262817, "learning_rate": 3.4109077911015536e-06, "loss": 0.466, "step": 8715 }, { "epoch": 0.5229495410091798, "grad_norm": 1.221623420715332, "learning_rate": 3.410227846517954e-06, "loss": 0.4388, "step": 8716 }, { "epoch": 0.5230095398092038, "grad_norm": 1.2970917224884033, "learning_rate": 3.4095479053246366e-06, "loss": 0.4092, "step": 8717 }, { "epoch": 0.5230695386092278, "grad_norm": 1.2598317861557007, "learning_rate": 3.408867967547285e-06, "loss": 0.3822, "step": 8718 }, { "epoch": 0.5231295374092518, "grad_norm": 1.204168677330017, "learning_rate": 3.4081880332115735e-06, "loss": 0.3496, "step": 8719 }, { "epoch": 0.5231895362092758, "grad_norm": 1.2759642601013184, "learning_rate": 3.4075081023431817e-06, "loss": 0.4007, "step": 8720 }, { "epoch": 0.5232495350092998, "grad_norm": 1.3389335870742798, "learning_rate": 3.4068281749677886e-06, "loss": 0.3742, "step": 8721 }, { "epoch": 0.5233095338093238, "grad_norm": 1.3503713607788086, "learning_rate": 3.40614825111107e-06, "loss": 0.3772, "step": 8722 }, { "epoch": 0.5233695326093478, "grad_norm": 1.2261052131652832, "learning_rate": 3.405468330798705e-06, "loss": 0.4151, "step": 8723 }, { "epoch": 0.5234295314093718, "grad_norm": 1.2187796831130981, "learning_rate": 3.40478841405637e-06, "loss": 0.4165, "step": 8724 }, { "epoch": 0.5234895302093958, "grad_norm": 1.288271188735962, "learning_rate": 3.4041085009097427e-06, "loss": 0.4009, "step": 8725 }, { "epoch": 0.5235495290094198, "grad_norm": 1.2902966737747192, "learning_rate": 3.4034285913845012e-06, "loss": 0.4549, "step": 8726 }, { "epoch": 0.5236095278094438, "grad_norm": 1.2775412797927856, "learning_rate": 3.4027486855063207e-06, "loss": 0.4689, "step": 8727 }, { "epoch": 0.5236695266094679, "grad_norm": 1.2957154512405396, "learning_rate": 3.4020687833008805e-06, "loss": 0.4155, "step": 8728 }, { "epoch": 0.5237295254094918, "grad_norm": 1.224440574645996, "learning_rate": 3.4013888847938554e-06, "loss": 0.3896, "step": 8729 }, { "epoch": 0.5237895242095159, "grad_norm": 1.2213659286499023, "learning_rate": 3.4007089900109234e-06, "loss": 0.3644, "step": 8730 }, { "epoch": 0.5238495230095398, "grad_norm": 1.380361557006836, "learning_rate": 3.4000290989777597e-06, "loss": 0.4455, "step": 8731 }, { "epoch": 0.5239095218095638, "grad_norm": 1.3203941583633423, "learning_rate": 3.399349211720041e-06, "loss": 0.3833, "step": 8732 }, { "epoch": 0.5239695206095878, "grad_norm": 1.400265097618103, "learning_rate": 3.3986693282634458e-06, "loss": 0.4182, "step": 8733 }, { "epoch": 0.5240295194096118, "grad_norm": 1.336020827293396, "learning_rate": 3.3979894486336465e-06, "loss": 0.3703, "step": 8734 }, { "epoch": 0.5240895182096358, "grad_norm": 1.477632761001587, "learning_rate": 3.3973095728563224e-06, "loss": 0.4543, "step": 8735 }, { "epoch": 0.5241495170096598, "grad_norm": 1.5510977506637573, "learning_rate": 3.396629700957147e-06, "loss": 0.3909, "step": 8736 }, { "epoch": 0.5242095158096838, "grad_norm": 1.2078773975372314, "learning_rate": 3.3959498329617973e-06, "loss": 0.3737, "step": 8737 }, { "epoch": 0.5242695146097078, "grad_norm": 1.2891161441802979, "learning_rate": 3.3952699688959485e-06, "loss": 0.3699, "step": 8738 }, { "epoch": 0.5243295134097318, "grad_norm": 1.2470660209655762, "learning_rate": 3.3945901087852755e-06, "loss": 0.4127, "step": 8739 }, { "epoch": 0.5243895122097558, "grad_norm": 1.2665760517120361, "learning_rate": 3.3939102526554546e-06, "loss": 0.3829, "step": 8740 }, { "epoch": 0.5244495110097798, "grad_norm": 1.1320189237594604, "learning_rate": 3.393230400532159e-06, "loss": 0.3274, "step": 8741 }, { "epoch": 0.5245095098098038, "grad_norm": 1.2626150846481323, "learning_rate": 3.392550552441066e-06, "loss": 0.3694, "step": 8742 }, { "epoch": 0.5245695086098278, "grad_norm": 1.2068730592727661, "learning_rate": 3.3918707084078478e-06, "loss": 0.3762, "step": 8743 }, { "epoch": 0.5246295074098518, "grad_norm": 1.1721519231796265, "learning_rate": 3.3911908684581798e-06, "loss": 0.3926, "step": 8744 }, { "epoch": 0.5246895062098758, "grad_norm": 1.3734138011932373, "learning_rate": 3.390511032617739e-06, "loss": 0.4198, "step": 8745 }, { "epoch": 0.5247495050098998, "grad_norm": 1.3416906595230103, "learning_rate": 3.389831200912196e-06, "loss": 0.4242, "step": 8746 }, { "epoch": 0.5248095038099237, "grad_norm": 1.2766103744506836, "learning_rate": 3.3891513733672284e-06, "loss": 0.3656, "step": 8747 }, { "epoch": 0.5248695026099478, "grad_norm": 1.2519630193710327, "learning_rate": 3.3884715500085065e-06, "loss": 0.3701, "step": 8748 }, { "epoch": 0.5249295014099719, "grad_norm": 1.4441217184066772, "learning_rate": 3.3877917308617064e-06, "loss": 0.3852, "step": 8749 }, { "epoch": 0.5249895002099958, "grad_norm": 1.2507057189941406, "learning_rate": 3.387111915952502e-06, "loss": 0.4085, "step": 8750 }, { "epoch": 0.5250494990100198, "grad_norm": 1.2478396892547607, "learning_rate": 3.386432105306566e-06, "loss": 0.3608, "step": 8751 }, { "epoch": 0.5251094978100438, "grad_norm": 1.3734807968139648, "learning_rate": 3.3857522989495724e-06, "loss": 0.3992, "step": 8752 }, { "epoch": 0.5251694966100678, "grad_norm": 1.305975317955017, "learning_rate": 3.3850724969071932e-06, "loss": 0.4266, "step": 8753 }, { "epoch": 0.5252294954100918, "grad_norm": 1.2447022199630737, "learning_rate": 3.3843926992051023e-06, "loss": 0.4316, "step": 8754 }, { "epoch": 0.5252894942101158, "grad_norm": 1.1945915222167969, "learning_rate": 3.3837129058689732e-06, "loss": 0.4179, "step": 8755 }, { "epoch": 0.5253494930101398, "grad_norm": 1.4207110404968262, "learning_rate": 3.383033116924477e-06, "loss": 0.4348, "step": 8756 }, { "epoch": 0.5254094918101638, "grad_norm": 1.2278172969818115, "learning_rate": 3.382353332397288e-06, "loss": 0.3702, "step": 8757 }, { "epoch": 0.5254694906101878, "grad_norm": 1.3937149047851562, "learning_rate": 3.3816735523130764e-06, "loss": 0.4384, "step": 8758 }, { "epoch": 0.5255294894102118, "grad_norm": 1.2351739406585693, "learning_rate": 3.380993776697517e-06, "loss": 0.394, "step": 8759 }, { "epoch": 0.5255894882102358, "grad_norm": 1.209750771522522, "learning_rate": 3.3803140055762796e-06, "loss": 0.4016, "step": 8760 }, { "epoch": 0.5256494870102598, "grad_norm": 1.1884607076644897, "learning_rate": 3.379634238975036e-06, "loss": 0.3801, "step": 8761 }, { "epoch": 0.5257094858102838, "grad_norm": 1.2128889560699463, "learning_rate": 3.378954476919461e-06, "loss": 0.4213, "step": 8762 }, { "epoch": 0.5257694846103078, "grad_norm": 1.2227784395217896, "learning_rate": 3.378274719435222e-06, "loss": 0.3977, "step": 8763 }, { "epoch": 0.5258294834103318, "grad_norm": 1.3071364164352417, "learning_rate": 3.377594966547994e-06, "loss": 0.4157, "step": 8764 }, { "epoch": 0.5258894822103558, "grad_norm": 1.2427706718444824, "learning_rate": 3.3769152182834445e-06, "loss": 0.4185, "step": 8765 }, { "epoch": 0.5259494810103797, "grad_norm": 1.4058643579483032, "learning_rate": 3.3762354746672473e-06, "loss": 0.4442, "step": 8766 }, { "epoch": 0.5260094798104038, "grad_norm": 1.1292608976364136, "learning_rate": 3.375555735725073e-06, "loss": 0.406, "step": 8767 }, { "epoch": 0.5260694786104277, "grad_norm": 1.2310703992843628, "learning_rate": 3.374876001482591e-06, "loss": 0.3836, "step": 8768 }, { "epoch": 0.5261294774104518, "grad_norm": 1.2663071155548096, "learning_rate": 3.3741962719654727e-06, "loss": 0.4309, "step": 8769 }, { "epoch": 0.5261894762104757, "grad_norm": 1.3282395601272583, "learning_rate": 3.3735165471993877e-06, "loss": 0.3829, "step": 8770 }, { "epoch": 0.5262494750104998, "grad_norm": 1.2442508935928345, "learning_rate": 3.372836827210007e-06, "loss": 0.3808, "step": 8771 }, { "epoch": 0.5263094738105238, "grad_norm": 1.209079384803772, "learning_rate": 3.372157112022999e-06, "loss": 0.3726, "step": 8772 }, { "epoch": 0.5263694726105478, "grad_norm": 1.239824652671814, "learning_rate": 3.3714774016640343e-06, "loss": 0.4211, "step": 8773 }, { "epoch": 0.5264294714105718, "grad_norm": 1.2548717260360718, "learning_rate": 3.370797696158784e-06, "loss": 0.4306, "step": 8774 }, { "epoch": 0.5264894702105958, "grad_norm": 1.3160395622253418, "learning_rate": 3.3701179955329153e-06, "loss": 0.382, "step": 8775 }, { "epoch": 0.5265494690106198, "grad_norm": 1.473497748374939, "learning_rate": 3.3694382998120988e-06, "loss": 0.436, "step": 8776 }, { "epoch": 0.5266094678106438, "grad_norm": 1.35752272605896, "learning_rate": 3.3687586090220016e-06, "loss": 0.4382, "step": 8777 }, { "epoch": 0.5266694666106678, "grad_norm": 1.243170976638794, "learning_rate": 3.3680789231882936e-06, "loss": 0.4027, "step": 8778 }, { "epoch": 0.5267294654106918, "grad_norm": 1.35272216796875, "learning_rate": 3.367399242336645e-06, "loss": 0.4466, "step": 8779 }, { "epoch": 0.5267894642107158, "grad_norm": 1.3470228910446167, "learning_rate": 3.3667195664927224e-06, "loss": 0.4322, "step": 8780 }, { "epoch": 0.5268494630107398, "grad_norm": 1.3159573078155518, "learning_rate": 3.3660398956821944e-06, "loss": 0.36, "step": 8781 }, { "epoch": 0.5269094618107638, "grad_norm": 1.2066181898117065, "learning_rate": 3.365360229930729e-06, "loss": 0.3894, "step": 8782 }, { "epoch": 0.5269694606107878, "grad_norm": 1.236118197441101, "learning_rate": 3.3646805692639955e-06, "loss": 0.4228, "step": 8783 }, { "epoch": 0.5270294594108118, "grad_norm": 1.3259010314941406, "learning_rate": 3.3640009137076593e-06, "loss": 0.4062, "step": 8784 }, { "epoch": 0.5270894582108357, "grad_norm": 1.333543062210083, "learning_rate": 3.3633212632873887e-06, "loss": 0.4281, "step": 8785 }, { "epoch": 0.5271494570108598, "grad_norm": 1.237311840057373, "learning_rate": 3.3626416180288523e-06, "loss": 0.4153, "step": 8786 }, { "epoch": 0.5272094558108837, "grad_norm": 1.37112295627594, "learning_rate": 3.361961977957716e-06, "loss": 0.4591, "step": 8787 }, { "epoch": 0.5272694546109078, "grad_norm": 1.2591395378112793, "learning_rate": 3.3612823430996466e-06, "loss": 0.393, "step": 8788 }, { "epoch": 0.5273294534109317, "grad_norm": 1.4239463806152344, "learning_rate": 3.3606027134803105e-06, "loss": 0.4232, "step": 8789 }, { "epoch": 0.5273894522109558, "grad_norm": 1.1730635166168213, "learning_rate": 3.359923089125375e-06, "loss": 0.4592, "step": 8790 }, { "epoch": 0.5274494510109797, "grad_norm": 1.3276208639144897, "learning_rate": 3.3592434700605077e-06, "loss": 0.4175, "step": 8791 }, { "epoch": 0.5275094498110038, "grad_norm": 1.189231276512146, "learning_rate": 3.358563856311371e-06, "loss": 0.404, "step": 8792 }, { "epoch": 0.5275694486110277, "grad_norm": 1.2001301050186157, "learning_rate": 3.3578842479036357e-06, "loss": 0.3551, "step": 8793 }, { "epoch": 0.5276294474110518, "grad_norm": 1.2472865581512451, "learning_rate": 3.3572046448629624e-06, "loss": 0.4208, "step": 8794 }, { "epoch": 0.5276894462110758, "grad_norm": 1.3449171781539917, "learning_rate": 3.3565250472150196e-06, "loss": 0.4419, "step": 8795 }, { "epoch": 0.5277494450110998, "grad_norm": 1.205113410949707, "learning_rate": 3.3558454549854733e-06, "loss": 0.4099, "step": 8796 }, { "epoch": 0.5278094438111238, "grad_norm": 1.489047646522522, "learning_rate": 3.355165868199986e-06, "loss": 0.4291, "step": 8797 }, { "epoch": 0.5278694426111478, "grad_norm": 1.170390248298645, "learning_rate": 3.3544862868842256e-06, "loss": 0.4071, "step": 8798 }, { "epoch": 0.5279294414111718, "grad_norm": 1.3028678894042969, "learning_rate": 3.353806711063854e-06, "loss": 0.4314, "step": 8799 }, { "epoch": 0.5279894402111958, "grad_norm": 1.358290195465088, "learning_rate": 3.3531271407645373e-06, "loss": 0.4364, "step": 8800 }, { "epoch": 0.5280494390112198, "grad_norm": 1.264074444770813, "learning_rate": 3.3524475760119394e-06, "loss": 0.3755, "step": 8801 }, { "epoch": 0.5281094378112438, "grad_norm": 1.3072586059570312, "learning_rate": 3.351768016831724e-06, "loss": 0.3355, "step": 8802 }, { "epoch": 0.5281694366112678, "grad_norm": 1.1880407333374023, "learning_rate": 3.3510884632495556e-06, "loss": 0.37, "step": 8803 }, { "epoch": 0.5282294354112917, "grad_norm": 1.2970155477523804, "learning_rate": 3.350408915291097e-06, "loss": 0.4107, "step": 8804 }, { "epoch": 0.5282894342113158, "grad_norm": 1.1558815240859985, "learning_rate": 3.349729372982013e-06, "loss": 0.382, "step": 8805 }, { "epoch": 0.5283494330113397, "grad_norm": 1.312406301498413, "learning_rate": 3.3490498363479653e-06, "loss": 0.4306, "step": 8806 }, { "epoch": 0.5284094318113638, "grad_norm": 1.428021788597107, "learning_rate": 3.3483703054146164e-06, "loss": 0.42, "step": 8807 }, { "epoch": 0.5284694306113877, "grad_norm": 1.1486361026763916, "learning_rate": 3.3476907802076325e-06, "loss": 0.377, "step": 8808 }, { "epoch": 0.5285294294114118, "grad_norm": 1.2233877182006836, "learning_rate": 3.347011260752672e-06, "loss": 0.4113, "step": 8809 }, { "epoch": 0.5285894282114357, "grad_norm": 1.2995045185089111, "learning_rate": 3.3463317470754013e-06, "loss": 0.4478, "step": 8810 }, { "epoch": 0.5286494270114598, "grad_norm": 1.2770850658416748, "learning_rate": 3.3456522392014783e-06, "loss": 0.3859, "step": 8811 }, { "epoch": 0.5287094258114837, "grad_norm": 1.2111577987670898, "learning_rate": 3.344972737156569e-06, "loss": 0.3966, "step": 8812 }, { "epoch": 0.5287694246115078, "grad_norm": 1.2678524255752563, "learning_rate": 3.3442932409663307e-06, "loss": 0.4192, "step": 8813 }, { "epoch": 0.5288294234115317, "grad_norm": 1.2528462409973145, "learning_rate": 3.3436137506564283e-06, "loss": 0.387, "step": 8814 }, { "epoch": 0.5288894222115558, "grad_norm": 1.0715304613113403, "learning_rate": 3.342934266252523e-06, "loss": 0.3285, "step": 8815 }, { "epoch": 0.5289494210115798, "grad_norm": 1.2390437126159668, "learning_rate": 3.342254787780274e-06, "loss": 0.4039, "step": 8816 }, { "epoch": 0.5290094198116038, "grad_norm": 1.2814522981643677, "learning_rate": 3.3415753152653435e-06, "loss": 0.4384, "step": 8817 }, { "epoch": 0.5290694186116278, "grad_norm": 1.3407461643218994, "learning_rate": 3.3408958487333906e-06, "loss": 0.3918, "step": 8818 }, { "epoch": 0.5291294174116518, "grad_norm": 1.3103233575820923, "learning_rate": 3.3402163882100773e-06, "loss": 0.4704, "step": 8819 }, { "epoch": 0.5291894162116758, "grad_norm": 1.1751223802566528, "learning_rate": 3.3395369337210635e-06, "loss": 0.4013, "step": 8820 }, { "epoch": 0.5292494150116998, "grad_norm": 1.3789008855819702, "learning_rate": 3.338857485292008e-06, "loss": 0.4034, "step": 8821 }, { "epoch": 0.5293094138117238, "grad_norm": 1.145166277885437, "learning_rate": 3.3381780429485713e-06, "loss": 0.3363, "step": 8822 }, { "epoch": 0.5293694126117477, "grad_norm": 1.2108246088027954, "learning_rate": 3.3374986067164125e-06, "loss": 0.3306, "step": 8823 }, { "epoch": 0.5294294114117718, "grad_norm": 1.124896764755249, "learning_rate": 3.33681917662119e-06, "loss": 0.3881, "step": 8824 }, { "epoch": 0.5294894102117957, "grad_norm": 1.4370691776275635, "learning_rate": 3.3361397526885653e-06, "loss": 0.4241, "step": 8825 }, { "epoch": 0.5295494090118198, "grad_norm": 1.2496196031570435, "learning_rate": 3.3354603349441952e-06, "loss": 0.404, "step": 8826 }, { "epoch": 0.5296094078118437, "grad_norm": 1.3471561670303345, "learning_rate": 3.334780923413739e-06, "loss": 0.4171, "step": 8827 }, { "epoch": 0.5296694066118678, "grad_norm": 1.2416446208953857, "learning_rate": 3.3341015181228534e-06, "loss": 0.3895, "step": 8828 }, { "epoch": 0.5297294054118917, "grad_norm": 1.3523143529891968, "learning_rate": 3.333422119097199e-06, "loss": 0.3803, "step": 8829 }, { "epoch": 0.5297894042119158, "grad_norm": 1.1875829696655273, "learning_rate": 3.332742726362431e-06, "loss": 0.3601, "step": 8830 }, { "epoch": 0.5298494030119397, "grad_norm": 1.2636756896972656, "learning_rate": 3.3320633399442087e-06, "loss": 0.3919, "step": 8831 }, { "epoch": 0.5299094018119638, "grad_norm": 1.2473673820495605, "learning_rate": 3.3313839598681896e-06, "loss": 0.3662, "step": 8832 }, { "epoch": 0.5299694006119877, "grad_norm": 1.490386962890625, "learning_rate": 3.330704586160029e-06, "loss": 0.4275, "step": 8833 }, { "epoch": 0.5300293994120118, "grad_norm": 1.2912189960479736, "learning_rate": 3.3300252188453864e-06, "loss": 0.3903, "step": 8834 }, { "epoch": 0.5300893982120357, "grad_norm": 1.3732398748397827, "learning_rate": 3.329345857949915e-06, "loss": 0.476, "step": 8835 }, { "epoch": 0.5301493970120598, "grad_norm": 1.3274017572402954, "learning_rate": 3.328666503499273e-06, "loss": 0.4067, "step": 8836 }, { "epoch": 0.5302093958120837, "grad_norm": 1.261718511581421, "learning_rate": 3.3279871555191192e-06, "loss": 0.3878, "step": 8837 }, { "epoch": 0.5302693946121078, "grad_norm": 1.2187453508377075, "learning_rate": 3.327307814035104e-06, "loss": 0.3366, "step": 8838 }, { "epoch": 0.5303293934121318, "grad_norm": 1.2845547199249268, "learning_rate": 3.326628479072888e-06, "loss": 0.3977, "step": 8839 }, { "epoch": 0.5303893922121558, "grad_norm": 1.2473139762878418, "learning_rate": 3.3259491506581226e-06, "loss": 0.4194, "step": 8840 }, { "epoch": 0.5304493910121798, "grad_norm": 1.2281556129455566, "learning_rate": 3.325269828816467e-06, "loss": 0.4134, "step": 8841 }, { "epoch": 0.5305093898122037, "grad_norm": 1.3622493743896484, "learning_rate": 3.324590513573572e-06, "loss": 0.433, "step": 8842 }, { "epoch": 0.5305693886122278, "grad_norm": 1.2134206295013428, "learning_rate": 3.3239112049550945e-06, "loss": 0.397, "step": 8843 }, { "epoch": 0.5306293874122517, "grad_norm": 1.2665783166885376, "learning_rate": 3.323231902986689e-06, "loss": 0.415, "step": 8844 }, { "epoch": 0.5306893862122758, "grad_norm": 1.2907602787017822, "learning_rate": 3.3225526076940087e-06, "loss": 0.4004, "step": 8845 }, { "epoch": 0.5307493850122997, "grad_norm": 1.3904606103897095, "learning_rate": 3.3218733191027085e-06, "loss": 0.4323, "step": 8846 }, { "epoch": 0.5308093838123238, "grad_norm": 1.2748863697052002, "learning_rate": 3.321194037238441e-06, "loss": 0.4227, "step": 8847 }, { "epoch": 0.5308693826123477, "grad_norm": 1.273924469947815, "learning_rate": 3.32051476212686e-06, "loss": 0.3931, "step": 8848 }, { "epoch": 0.5309293814123718, "grad_norm": 1.3217576742172241, "learning_rate": 3.3198354937936193e-06, "loss": 0.3926, "step": 8849 }, { "epoch": 0.5309893802123957, "grad_norm": 1.345937728881836, "learning_rate": 3.319156232264371e-06, "loss": 0.3875, "step": 8850 }, { "epoch": 0.5310493790124198, "grad_norm": 1.1722556352615356, "learning_rate": 3.318476977564768e-06, "loss": 0.3611, "step": 8851 }, { "epoch": 0.5311093778124437, "grad_norm": 1.2455209493637085, "learning_rate": 3.3177977297204616e-06, "loss": 0.3935, "step": 8852 }, { "epoch": 0.5311693766124678, "grad_norm": 1.3453714847564697, "learning_rate": 3.3171184887571065e-06, "loss": 0.4239, "step": 8853 }, { "epoch": 0.5312293754124917, "grad_norm": 1.4415090084075928, "learning_rate": 3.3164392547003514e-06, "loss": 0.4567, "step": 8854 }, { "epoch": 0.5312893742125158, "grad_norm": 1.3559023141860962, "learning_rate": 3.3157600275758486e-06, "loss": 0.4114, "step": 8855 }, { "epoch": 0.5313493730125397, "grad_norm": 1.3518626689910889, "learning_rate": 3.3150808074092526e-06, "loss": 0.4478, "step": 8856 }, { "epoch": 0.5314093718125638, "grad_norm": 1.458327054977417, "learning_rate": 3.3144015942262104e-06, "loss": 0.4238, "step": 8857 }, { "epoch": 0.5314693706125877, "grad_norm": 1.325760841369629, "learning_rate": 3.3137223880523755e-06, "loss": 0.438, "step": 8858 }, { "epoch": 0.5315293694126118, "grad_norm": 1.2427414655685425, "learning_rate": 3.3130431889133957e-06, "loss": 0.3819, "step": 8859 }, { "epoch": 0.5315893682126357, "grad_norm": 1.225414752960205, "learning_rate": 3.3123639968349234e-06, "loss": 0.36, "step": 8860 }, { "epoch": 0.5316493670126597, "grad_norm": 1.166246771812439, "learning_rate": 3.311684811842609e-06, "loss": 0.4277, "step": 8861 }, { "epoch": 0.5317093658126838, "grad_norm": 1.3073261976242065, "learning_rate": 3.311005633962101e-06, "loss": 0.4231, "step": 8862 }, { "epoch": 0.5317693646127077, "grad_norm": 1.3181560039520264, "learning_rate": 3.3103264632190493e-06, "loss": 0.4114, "step": 8863 }, { "epoch": 0.5318293634127318, "grad_norm": 1.2730004787445068, "learning_rate": 3.3096472996391022e-06, "loss": 0.369, "step": 8864 }, { "epoch": 0.5318893622127557, "grad_norm": 1.334878921508789, "learning_rate": 3.3089681432479093e-06, "loss": 0.4139, "step": 8865 }, { "epoch": 0.5319493610127798, "grad_norm": 1.3962392807006836, "learning_rate": 3.30828899407112e-06, "loss": 0.392, "step": 8866 }, { "epoch": 0.5320093598128037, "grad_norm": 1.2188704013824463, "learning_rate": 3.307609852134382e-06, "loss": 0.3966, "step": 8867 }, { "epoch": 0.5320693586128278, "grad_norm": 1.2309932708740234, "learning_rate": 3.306930717463343e-06, "loss": 0.3737, "step": 8868 }, { "epoch": 0.5321293574128517, "grad_norm": 1.384381651878357, "learning_rate": 3.306251590083651e-06, "loss": 0.4185, "step": 8869 }, { "epoch": 0.5321893562128758, "grad_norm": 1.3487496376037598, "learning_rate": 3.3055724700209548e-06, "loss": 0.4359, "step": 8870 }, { "epoch": 0.5322493550128997, "grad_norm": 1.4226478338241577, "learning_rate": 3.3048933573008986e-06, "loss": 0.4169, "step": 8871 }, { "epoch": 0.5323093538129238, "grad_norm": 1.3172626495361328, "learning_rate": 3.304214251949132e-06, "loss": 0.3899, "step": 8872 }, { "epoch": 0.5323693526129477, "grad_norm": 1.3334015607833862, "learning_rate": 3.3035351539913023e-06, "loss": 0.495, "step": 8873 }, { "epoch": 0.5324293514129718, "grad_norm": 1.2698205709457397, "learning_rate": 3.302856063453054e-06, "loss": 0.4273, "step": 8874 }, { "epoch": 0.5324893502129957, "grad_norm": 1.3042669296264648, "learning_rate": 3.302176980360034e-06, "loss": 0.434, "step": 8875 }, { "epoch": 0.5325493490130198, "grad_norm": 1.1664230823516846, "learning_rate": 3.301497904737888e-06, "loss": 0.382, "step": 8876 }, { "epoch": 0.5326093478130437, "grad_norm": 1.147657871246338, "learning_rate": 3.300818836612262e-06, "loss": 0.3805, "step": 8877 }, { "epoch": 0.5326693466130678, "grad_norm": 1.255428671836853, "learning_rate": 3.3001397760088014e-06, "loss": 0.4418, "step": 8878 }, { "epoch": 0.5327293454130917, "grad_norm": 1.3147132396697998, "learning_rate": 3.2994607229531505e-06, "loss": 0.391, "step": 8879 }, { "epoch": 0.5327893442131157, "grad_norm": 1.4066121578216553, "learning_rate": 3.298781677470955e-06, "loss": 0.5304, "step": 8880 }, { "epoch": 0.5328493430131397, "grad_norm": 1.1241711378097534, "learning_rate": 3.298102639587858e-06, "loss": 0.3385, "step": 8881 }, { "epoch": 0.5329093418131637, "grad_norm": 1.2384426593780518, "learning_rate": 3.297423609329505e-06, "loss": 0.4136, "step": 8882 }, { "epoch": 0.5329693406131877, "grad_norm": 1.4774020910263062, "learning_rate": 3.296744586721539e-06, "loss": 0.416, "step": 8883 }, { "epoch": 0.5330293394132117, "grad_norm": 1.2277772426605225, "learning_rate": 3.296065571789603e-06, "loss": 0.3629, "step": 8884 }, { "epoch": 0.5330893382132358, "grad_norm": 1.2642369270324707, "learning_rate": 3.2953865645593435e-06, "loss": 0.399, "step": 8885 }, { "epoch": 0.5331493370132597, "grad_norm": 1.4027748107910156, "learning_rate": 3.294707565056399e-06, "loss": 0.3964, "step": 8886 }, { "epoch": 0.5332093358132838, "grad_norm": 1.2015846967697144, "learning_rate": 3.2940285733064166e-06, "loss": 0.4011, "step": 8887 }, { "epoch": 0.5332693346133077, "grad_norm": 1.2826473712921143, "learning_rate": 3.2933495893350346e-06, "loss": 0.4381, "step": 8888 }, { "epoch": 0.5333293334133318, "grad_norm": 1.25347101688385, "learning_rate": 3.292670613167898e-06, "loss": 0.3875, "step": 8889 }, { "epoch": 0.5333893322133557, "grad_norm": 1.2220255136489868, "learning_rate": 3.291991644830648e-06, "loss": 0.3886, "step": 8890 }, { "epoch": 0.5334493310133798, "grad_norm": 1.32355535030365, "learning_rate": 3.2913126843489255e-06, "loss": 0.3627, "step": 8891 }, { "epoch": 0.5335093298134037, "grad_norm": 1.3172489404678345, "learning_rate": 3.290633731748372e-06, "loss": 0.4061, "step": 8892 }, { "epoch": 0.5335693286134278, "grad_norm": 1.371421456336975, "learning_rate": 3.2899547870546292e-06, "loss": 0.3762, "step": 8893 }, { "epoch": 0.5336293274134517, "grad_norm": 1.2859408855438232, "learning_rate": 3.289275850293337e-06, "loss": 0.3713, "step": 8894 }, { "epoch": 0.5336893262134758, "grad_norm": 1.4318315982818604, "learning_rate": 3.2885969214901355e-06, "loss": 0.4092, "step": 8895 }, { "epoch": 0.5337493250134997, "grad_norm": 1.1295857429504395, "learning_rate": 3.287918000670665e-06, "loss": 0.3665, "step": 8896 }, { "epoch": 0.5338093238135238, "grad_norm": 1.2920715808868408, "learning_rate": 3.287239087860566e-06, "loss": 0.3723, "step": 8897 }, { "epoch": 0.5338693226135477, "grad_norm": 1.1702892780303955, "learning_rate": 3.2865601830854767e-06, "loss": 0.3678, "step": 8898 }, { "epoch": 0.5339293214135717, "grad_norm": 1.3332736492156982, "learning_rate": 3.2858812863710375e-06, "loss": 0.4049, "step": 8899 }, { "epoch": 0.5339893202135957, "grad_norm": 1.1528635025024414, "learning_rate": 3.285202397742886e-06, "loss": 0.3984, "step": 8900 }, { "epoch": 0.5340493190136197, "grad_norm": 1.1670124530792236, "learning_rate": 3.2845235172266602e-06, "loss": 0.3519, "step": 8901 }, { "epoch": 0.5341093178136437, "grad_norm": 1.1958703994750977, "learning_rate": 3.2838446448480023e-06, "loss": 0.3566, "step": 8902 }, { "epoch": 0.5341693166136677, "grad_norm": 1.2944408655166626, "learning_rate": 3.283165780632545e-06, "loss": 0.3883, "step": 8903 }, { "epoch": 0.5342293154136917, "grad_norm": 1.3664230108261108, "learning_rate": 3.28248692460593e-06, "loss": 0.393, "step": 8904 }, { "epoch": 0.5342893142137157, "grad_norm": 1.2260823249816895, "learning_rate": 3.281808076793791e-06, "loss": 0.4047, "step": 8905 }, { "epoch": 0.5343493130137398, "grad_norm": 1.1950920820236206, "learning_rate": 3.2811292372217675e-06, "loss": 0.4262, "step": 8906 }, { "epoch": 0.5344093118137637, "grad_norm": 1.3960694074630737, "learning_rate": 3.2804504059154962e-06, "loss": 0.4094, "step": 8907 }, { "epoch": 0.5344693106137878, "grad_norm": 1.259067416191101, "learning_rate": 3.2797715829006124e-06, "loss": 0.4012, "step": 8908 }, { "epoch": 0.5345293094138117, "grad_norm": 1.3155369758605957, "learning_rate": 3.279092768202753e-06, "loss": 0.4199, "step": 8909 }, { "epoch": 0.5345893082138358, "grad_norm": 1.4112114906311035, "learning_rate": 3.278413961847553e-06, "loss": 0.4588, "step": 8910 }, { "epoch": 0.5346493070138597, "grad_norm": 1.2911821603775024, "learning_rate": 3.2777351638606483e-06, "loss": 0.4417, "step": 8911 }, { "epoch": 0.5347093058138838, "grad_norm": 1.3227232694625854, "learning_rate": 3.2770563742676733e-06, "loss": 0.3549, "step": 8912 }, { "epoch": 0.5347693046139077, "grad_norm": 1.3225544691085815, "learning_rate": 3.2763775930942633e-06, "loss": 0.402, "step": 8913 }, { "epoch": 0.5348293034139318, "grad_norm": 1.2904152870178223, "learning_rate": 3.2756988203660535e-06, "loss": 0.3961, "step": 8914 }, { "epoch": 0.5348893022139557, "grad_norm": 1.2876057624816895, "learning_rate": 3.275020056108676e-06, "loss": 0.4139, "step": 8915 }, { "epoch": 0.5349493010139797, "grad_norm": 1.3351075649261475, "learning_rate": 3.274341300347768e-06, "loss": 0.4066, "step": 8916 }, { "epoch": 0.5350092998140037, "grad_norm": 1.3546415567398071, "learning_rate": 3.273662553108959e-06, "loss": 0.4366, "step": 8917 }, { "epoch": 0.5350692986140277, "grad_norm": 1.1992123126983643, "learning_rate": 3.2729838144178848e-06, "loss": 0.3632, "step": 8918 }, { "epoch": 0.5351292974140517, "grad_norm": 1.1621005535125732, "learning_rate": 3.272305084300178e-06, "loss": 0.395, "step": 8919 }, { "epoch": 0.5351892962140757, "grad_norm": 1.2893091440200806, "learning_rate": 3.2716263627814703e-06, "loss": 0.3983, "step": 8920 }, { "epoch": 0.5352492950140997, "grad_norm": 1.1778281927108765, "learning_rate": 3.2709476498873956e-06, "loss": 0.3664, "step": 8921 }, { "epoch": 0.5353092938141237, "grad_norm": 1.2670025825500488, "learning_rate": 3.2702689456435834e-06, "loss": 0.4029, "step": 8922 }, { "epoch": 0.5353692926141477, "grad_norm": 1.3218865394592285, "learning_rate": 3.269590250075667e-06, "loss": 0.4138, "step": 8923 }, { "epoch": 0.5354292914141717, "grad_norm": 1.501306414604187, "learning_rate": 3.268911563209277e-06, "loss": 0.4572, "step": 8924 }, { "epoch": 0.5354892902141957, "grad_norm": 1.1748545169830322, "learning_rate": 3.2682328850700438e-06, "loss": 0.4069, "step": 8925 }, { "epoch": 0.5355492890142197, "grad_norm": 1.307222604751587, "learning_rate": 3.2675542156836e-06, "loss": 0.3721, "step": 8926 }, { "epoch": 0.5356092878142437, "grad_norm": 1.257749319076538, "learning_rate": 3.266875555075573e-06, "loss": 0.3828, "step": 8927 }, { "epoch": 0.5356692866142677, "grad_norm": 1.2056833505630493, "learning_rate": 3.2661969032715957e-06, "loss": 0.3454, "step": 8928 }, { "epoch": 0.5357292854142918, "grad_norm": 1.531380295753479, "learning_rate": 3.265518260297295e-06, "loss": 0.4189, "step": 8929 }, { "epoch": 0.5357892842143157, "grad_norm": 1.3334449529647827, "learning_rate": 3.2648396261783005e-06, "loss": 0.3979, "step": 8930 }, { "epoch": 0.5358492830143398, "grad_norm": 1.3323583602905273, "learning_rate": 3.2641610009402443e-06, "loss": 0.4436, "step": 8931 }, { "epoch": 0.5359092818143637, "grad_norm": 1.3058171272277832, "learning_rate": 3.2634823846087505e-06, "loss": 0.4084, "step": 8932 }, { "epoch": 0.5359692806143878, "grad_norm": 1.145967721939087, "learning_rate": 3.2628037772094513e-06, "loss": 0.3557, "step": 8933 }, { "epoch": 0.5360292794144117, "grad_norm": 1.2914061546325684, "learning_rate": 3.2621251787679706e-06, "loss": 0.4125, "step": 8934 }, { "epoch": 0.5360892782144357, "grad_norm": 1.285460352897644, "learning_rate": 3.261446589309941e-06, "loss": 0.3844, "step": 8935 }, { "epoch": 0.5361492770144597, "grad_norm": 1.2324641942977905, "learning_rate": 3.2607680088609838e-06, "loss": 0.3534, "step": 8936 }, { "epoch": 0.5362092758144837, "grad_norm": 1.3068829774856567, "learning_rate": 3.2600894374467303e-06, "loss": 0.3913, "step": 8937 }, { "epoch": 0.5362692746145077, "grad_norm": 1.2105575799942017, "learning_rate": 3.259410875092806e-06, "loss": 0.4051, "step": 8938 }, { "epoch": 0.5363292734145317, "grad_norm": 1.303229570388794, "learning_rate": 3.258732321824836e-06, "loss": 0.3996, "step": 8939 }, { "epoch": 0.5363892722145557, "grad_norm": 1.3819596767425537, "learning_rate": 3.2580537776684485e-06, "loss": 0.4447, "step": 8940 }, { "epoch": 0.5364492710145797, "grad_norm": 1.2653307914733887, "learning_rate": 3.2573752426492665e-06, "loss": 0.3353, "step": 8941 }, { "epoch": 0.5365092698146037, "grad_norm": 1.2638499736785889, "learning_rate": 3.2566967167929156e-06, "loss": 0.3905, "step": 8942 }, { "epoch": 0.5365692686146277, "grad_norm": 1.3324698209762573, "learning_rate": 3.2560182001250227e-06, "loss": 0.4527, "step": 8943 }, { "epoch": 0.5366292674146517, "grad_norm": 1.2500494718551636, "learning_rate": 3.2553396926712096e-06, "loss": 0.4194, "step": 8944 }, { "epoch": 0.5366892662146757, "grad_norm": 1.4370901584625244, "learning_rate": 3.2546611944571026e-06, "loss": 0.3937, "step": 8945 }, { "epoch": 0.5367492650146997, "grad_norm": 1.254123330116272, "learning_rate": 3.253982705508324e-06, "loss": 0.4056, "step": 8946 }, { "epoch": 0.5368092638147237, "grad_norm": 1.2050940990447998, "learning_rate": 3.2533042258504967e-06, "loss": 0.3511, "step": 8947 }, { "epoch": 0.5368692626147477, "grad_norm": 1.3659427165985107, "learning_rate": 3.252625755509247e-06, "loss": 0.3926, "step": 8948 }, { "epoch": 0.5369292614147717, "grad_norm": 1.3532532453536987, "learning_rate": 3.2519472945101933e-06, "loss": 0.3976, "step": 8949 }, { "epoch": 0.5369892602147956, "grad_norm": 1.2916873693466187, "learning_rate": 3.2512688428789627e-06, "loss": 0.3671, "step": 8950 }, { "epoch": 0.5370492590148197, "grad_norm": 1.3502851724624634, "learning_rate": 3.250590400641172e-06, "loss": 0.4293, "step": 8951 }, { "epoch": 0.5371092578148438, "grad_norm": 1.173170566558838, "learning_rate": 3.2499119678224473e-06, "loss": 0.3592, "step": 8952 }, { "epoch": 0.5371692566148677, "grad_norm": 1.3263300657272339, "learning_rate": 3.2492335444484073e-06, "loss": 0.4192, "step": 8953 }, { "epoch": 0.5372292554148917, "grad_norm": 1.3329696655273438, "learning_rate": 3.2485551305446737e-06, "loss": 0.3694, "step": 8954 }, { "epoch": 0.5372892542149157, "grad_norm": 1.2978217601776123, "learning_rate": 3.2478767261368686e-06, "loss": 0.3981, "step": 8955 }, { "epoch": 0.5373492530149397, "grad_norm": 1.2439513206481934, "learning_rate": 3.2471983312506095e-06, "loss": 0.4349, "step": 8956 }, { "epoch": 0.5374092518149637, "grad_norm": 1.305732250213623, "learning_rate": 3.246519945911519e-06, "loss": 0.3757, "step": 8957 }, { "epoch": 0.5374692506149877, "grad_norm": 1.2432314157485962, "learning_rate": 3.2458415701452138e-06, "loss": 0.4079, "step": 8958 }, { "epoch": 0.5375292494150117, "grad_norm": 1.2868417501449585, "learning_rate": 3.245163203977315e-06, "loss": 0.3994, "step": 8959 }, { "epoch": 0.5375892482150357, "grad_norm": 1.3901926279067993, "learning_rate": 3.244484847433442e-06, "loss": 0.4273, "step": 8960 }, { "epoch": 0.5376492470150597, "grad_norm": 1.483819842338562, "learning_rate": 3.2438065005392098e-06, "loss": 0.459, "step": 8961 }, { "epoch": 0.5377092458150837, "grad_norm": 1.2484642267227173, "learning_rate": 3.243128163320242e-06, "loss": 0.3712, "step": 8962 }, { "epoch": 0.5377692446151077, "grad_norm": 1.249455213546753, "learning_rate": 3.2424498358021505e-06, "loss": 0.4429, "step": 8963 }, { "epoch": 0.5378292434151317, "grad_norm": 1.3070356845855713, "learning_rate": 3.2417715180105577e-06, "loss": 0.4432, "step": 8964 }, { "epoch": 0.5378892422151557, "grad_norm": 1.1669354438781738, "learning_rate": 3.241093209971076e-06, "loss": 0.3729, "step": 8965 }, { "epoch": 0.5379492410151797, "grad_norm": 1.400638461112976, "learning_rate": 3.2404149117093244e-06, "loss": 0.4107, "step": 8966 }, { "epoch": 0.5380092398152037, "grad_norm": 1.4169563055038452, "learning_rate": 3.23973662325092e-06, "loss": 0.4141, "step": 8967 }, { "epoch": 0.5380692386152277, "grad_norm": 1.4129762649536133, "learning_rate": 3.239058344621477e-06, "loss": 0.3953, "step": 8968 }, { "epoch": 0.5381292374152516, "grad_norm": 1.160805106163025, "learning_rate": 3.238380075846612e-06, "loss": 0.3706, "step": 8969 }, { "epoch": 0.5381892362152757, "grad_norm": 1.343692660331726, "learning_rate": 3.2377018169519394e-06, "loss": 0.346, "step": 8970 }, { "epoch": 0.5382492350152996, "grad_norm": 1.2486231327056885, "learning_rate": 3.237023567963073e-06, "loss": 0.3774, "step": 8971 }, { "epoch": 0.5383092338153237, "grad_norm": 1.3666940927505493, "learning_rate": 3.23634532890563e-06, "loss": 0.3628, "step": 8972 }, { "epoch": 0.5383692326153477, "grad_norm": 1.2263659238815308, "learning_rate": 3.2356670998052217e-06, "loss": 0.4041, "step": 8973 }, { "epoch": 0.5384292314153717, "grad_norm": 1.477095365524292, "learning_rate": 3.2349888806874636e-06, "loss": 0.3861, "step": 8974 }, { "epoch": 0.5384892302153957, "grad_norm": 1.2326133251190186, "learning_rate": 3.2343106715779674e-06, "loss": 0.4342, "step": 8975 }, { "epoch": 0.5385492290154197, "grad_norm": 1.4218926429748535, "learning_rate": 3.2336324725023476e-06, "loss": 0.4612, "step": 8976 }, { "epoch": 0.5386092278154437, "grad_norm": 1.3754864931106567, "learning_rate": 3.2329542834862144e-06, "loss": 0.378, "step": 8977 }, { "epoch": 0.5386692266154677, "grad_norm": 1.3168525695800781, "learning_rate": 3.2322761045551806e-06, "loss": 0.4092, "step": 8978 }, { "epoch": 0.5387292254154917, "grad_norm": 1.4277138710021973, "learning_rate": 3.231597935734861e-06, "loss": 0.3862, "step": 8979 }, { "epoch": 0.5387892242155157, "grad_norm": 1.2817355394363403, "learning_rate": 3.2309197770508625e-06, "loss": 0.3944, "step": 8980 }, { "epoch": 0.5388492230155397, "grad_norm": 1.3395012617111206, "learning_rate": 3.2302416285288004e-06, "loss": 0.4136, "step": 8981 }, { "epoch": 0.5389092218155637, "grad_norm": 1.2727835178375244, "learning_rate": 3.2295634901942804e-06, "loss": 0.3979, "step": 8982 }, { "epoch": 0.5389692206155877, "grad_norm": 1.340511441230774, "learning_rate": 3.228885362072916e-06, "loss": 0.3976, "step": 8983 }, { "epoch": 0.5390292194156117, "grad_norm": 1.256426215171814, "learning_rate": 3.228207244190317e-06, "loss": 0.4112, "step": 8984 }, { "epoch": 0.5390892182156357, "grad_norm": 1.2034701108932495, "learning_rate": 3.227529136572092e-06, "loss": 0.3781, "step": 8985 }, { "epoch": 0.5391492170156597, "grad_norm": 1.2328851222991943, "learning_rate": 3.22685103924385e-06, "loss": 0.393, "step": 8986 }, { "epoch": 0.5392092158156837, "grad_norm": 1.3052700757980347, "learning_rate": 3.226172952231199e-06, "loss": 0.3712, "step": 8987 }, { "epoch": 0.5392692146157076, "grad_norm": 1.3650028705596924, "learning_rate": 3.225494875559749e-06, "loss": 0.4226, "step": 8988 }, { "epoch": 0.5393292134157317, "grad_norm": 1.2460451126098633, "learning_rate": 3.2248168092551067e-06, "loss": 0.3653, "step": 8989 }, { "epoch": 0.5393892122157556, "grad_norm": 1.1754130125045776, "learning_rate": 3.2241387533428794e-06, "loss": 0.3894, "step": 8990 }, { "epoch": 0.5394492110157797, "grad_norm": 1.429214358329773, "learning_rate": 3.2234607078486754e-06, "loss": 0.4137, "step": 8991 }, { "epoch": 0.5395092098158036, "grad_norm": 1.3920190334320068, "learning_rate": 3.2227826727980996e-06, "loss": 0.3929, "step": 8992 }, { "epoch": 0.5395692086158277, "grad_norm": 1.2228643894195557, "learning_rate": 3.2221046482167603e-06, "loss": 0.4028, "step": 8993 }, { "epoch": 0.5396292074158516, "grad_norm": 1.227574110031128, "learning_rate": 3.2214266341302614e-06, "loss": 0.394, "step": 8994 }, { "epoch": 0.5396892062158757, "grad_norm": 1.2857420444488525, "learning_rate": 3.220748630564209e-06, "loss": 0.3912, "step": 8995 }, { "epoch": 0.5397492050158997, "grad_norm": 1.2966891527175903, "learning_rate": 3.22007063754421e-06, "loss": 0.3706, "step": 8996 }, { "epoch": 0.5398092038159237, "grad_norm": 1.274638295173645, "learning_rate": 3.2193926550958657e-06, "loss": 0.3892, "step": 8997 }, { "epoch": 0.5398692026159477, "grad_norm": 1.246913194656372, "learning_rate": 3.2187146832447843e-06, "loss": 0.393, "step": 8998 }, { "epoch": 0.5399292014159717, "grad_norm": 1.417988657951355, "learning_rate": 3.218036722016567e-06, "loss": 0.4148, "step": 8999 }, { "epoch": 0.5399892002159957, "grad_norm": 1.3088346719741821, "learning_rate": 3.217358771436818e-06, "loss": 0.3878, "step": 9000 }, { "epoch": 0.5400491990160197, "grad_norm": 1.263157844543457, "learning_rate": 3.2166808315311406e-06, "loss": 0.3708, "step": 9001 }, { "epoch": 0.5401091978160437, "grad_norm": 1.5068399906158447, "learning_rate": 3.2160029023251373e-06, "loss": 0.4004, "step": 9002 }, { "epoch": 0.5401691966160677, "grad_norm": 1.3012326955795288, "learning_rate": 3.2153249838444114e-06, "loss": 0.4351, "step": 9003 }, { "epoch": 0.5402291954160917, "grad_norm": 1.3500988483428955, "learning_rate": 3.2146470761145627e-06, "loss": 0.3966, "step": 9004 }, { "epoch": 0.5402891942161157, "grad_norm": 1.34079909324646, "learning_rate": 3.2139691791611948e-06, "loss": 0.4298, "step": 9005 }, { "epoch": 0.5403491930161397, "grad_norm": 1.2834503650665283, "learning_rate": 3.2132912930099068e-06, "loss": 0.3688, "step": 9006 }, { "epoch": 0.5404091918161636, "grad_norm": 1.1948883533477783, "learning_rate": 3.2126134176863002e-06, "loss": 0.4229, "step": 9007 }, { "epoch": 0.5404691906161877, "grad_norm": 1.2032239437103271, "learning_rate": 3.2119355532159775e-06, "loss": 0.3845, "step": 9008 }, { "epoch": 0.5405291894162116, "grad_norm": 1.362709879875183, "learning_rate": 3.211257699624534e-06, "loss": 0.3781, "step": 9009 }, { "epoch": 0.5405891882162357, "grad_norm": 1.2402679920196533, "learning_rate": 3.210579856937574e-06, "loss": 0.3757, "step": 9010 }, { "epoch": 0.5406491870162596, "grad_norm": 1.3804453611373901, "learning_rate": 3.2099020251806925e-06, "loss": 0.4357, "step": 9011 }, { "epoch": 0.5407091858162837, "grad_norm": 1.146626353263855, "learning_rate": 3.20922420437949e-06, "loss": 0.416, "step": 9012 }, { "epoch": 0.5407691846163076, "grad_norm": 1.2776236534118652, "learning_rate": 3.2085463945595654e-06, "loss": 0.3285, "step": 9013 }, { "epoch": 0.5408291834163317, "grad_norm": 1.4314706325531006, "learning_rate": 3.207868595746515e-06, "loss": 0.4347, "step": 9014 }, { "epoch": 0.5408891822163556, "grad_norm": 1.1984972953796387, "learning_rate": 3.2071908079659375e-06, "loss": 0.3368, "step": 9015 }, { "epoch": 0.5409491810163797, "grad_norm": 1.1955212354660034, "learning_rate": 3.2065130312434284e-06, "loss": 0.3682, "step": 9016 }, { "epoch": 0.5410091798164036, "grad_norm": 1.301005244255066, "learning_rate": 3.2058352656045856e-06, "loss": 0.3966, "step": 9017 }, { "epoch": 0.5410691786164277, "grad_norm": 1.2964717149734497, "learning_rate": 3.205157511075004e-06, "loss": 0.4224, "step": 9018 }, { "epoch": 0.5411291774164517, "grad_norm": 1.3838053941726685, "learning_rate": 3.2044797676802796e-06, "loss": 0.4017, "step": 9019 }, { "epoch": 0.5411891762164757, "grad_norm": 1.4642605781555176, "learning_rate": 3.2038020354460094e-06, "loss": 0.4135, "step": 9020 }, { "epoch": 0.5412491750164997, "grad_norm": 1.4202253818511963, "learning_rate": 3.2031243143977855e-06, "loss": 0.3679, "step": 9021 }, { "epoch": 0.5413091738165237, "grad_norm": 1.207349181175232, "learning_rate": 3.2024466045612048e-06, "loss": 0.4297, "step": 9022 }, { "epoch": 0.5413691726165477, "grad_norm": 1.3074268102645874, "learning_rate": 3.2017689059618594e-06, "loss": 0.4035, "step": 9023 }, { "epoch": 0.5414291714165717, "grad_norm": 1.3282276391983032, "learning_rate": 3.2010912186253427e-06, "loss": 0.3745, "step": 9024 }, { "epoch": 0.5414891702165957, "grad_norm": 1.192086935043335, "learning_rate": 3.2004135425772514e-06, "loss": 0.3849, "step": 9025 }, { "epoch": 0.5415491690166196, "grad_norm": 1.2731146812438965, "learning_rate": 3.199735877843173e-06, "loss": 0.3981, "step": 9026 }, { "epoch": 0.5416091678166437, "grad_norm": 1.2941901683807373, "learning_rate": 3.199058224448705e-06, "loss": 0.3795, "step": 9027 }, { "epoch": 0.5416691666166676, "grad_norm": 1.4222652912139893, "learning_rate": 3.198380582419435e-06, "loss": 0.4391, "step": 9028 }, { "epoch": 0.5417291654166917, "grad_norm": 1.490922451019287, "learning_rate": 3.1977029517809564e-06, "loss": 0.4136, "step": 9029 }, { "epoch": 0.5417891642167156, "grad_norm": 1.3746951818466187, "learning_rate": 3.197025332558861e-06, "loss": 0.4475, "step": 9030 }, { "epoch": 0.5418491630167397, "grad_norm": 1.1659060716629028, "learning_rate": 3.1963477247787372e-06, "loss": 0.3206, "step": 9031 }, { "epoch": 0.5419091618167636, "grad_norm": 1.2861522436141968, "learning_rate": 3.195670128466178e-06, "loss": 0.4484, "step": 9032 }, { "epoch": 0.5419691606167877, "grad_norm": 1.2485178709030151, "learning_rate": 3.19499254364677e-06, "loss": 0.4219, "step": 9033 }, { "epoch": 0.5420291594168116, "grad_norm": 1.257599949836731, "learning_rate": 3.1943149703461052e-06, "loss": 0.4201, "step": 9034 }, { "epoch": 0.5420891582168357, "grad_norm": 1.178578495979309, "learning_rate": 3.1936374085897704e-06, "loss": 0.3468, "step": 9035 }, { "epoch": 0.5421491570168596, "grad_norm": 1.3860527276992798, "learning_rate": 3.192959858403355e-06, "loss": 0.3867, "step": 9036 }, { "epoch": 0.5422091558168837, "grad_norm": 1.225109338760376, "learning_rate": 3.192282319812447e-06, "loss": 0.3814, "step": 9037 }, { "epoch": 0.5422691546169076, "grad_norm": 1.2639110088348389, "learning_rate": 3.191604792842634e-06, "loss": 0.3926, "step": 9038 }, { "epoch": 0.5423291534169317, "grad_norm": 1.1999090909957886, "learning_rate": 3.190927277519503e-06, "loss": 0.357, "step": 9039 }, { "epoch": 0.5423891522169556, "grad_norm": 1.2653993368148804, "learning_rate": 3.1902497738686403e-06, "loss": 0.4021, "step": 9040 }, { "epoch": 0.5424491510169797, "grad_norm": 1.4250102043151855, "learning_rate": 3.1895722819156312e-06, "loss": 0.3858, "step": 9041 }, { "epoch": 0.5425091498170037, "grad_norm": 1.4102035760879517, "learning_rate": 3.1888948016860647e-06, "loss": 0.4329, "step": 9042 }, { "epoch": 0.5425691486170277, "grad_norm": 1.2825820446014404, "learning_rate": 3.1882173332055236e-06, "loss": 0.3976, "step": 9043 }, { "epoch": 0.5426291474170517, "grad_norm": 1.2464706897735596, "learning_rate": 3.1875398764995934e-06, "loss": 0.3937, "step": 9044 }, { "epoch": 0.5426891462170756, "grad_norm": 1.298356056213379, "learning_rate": 3.186862431593858e-06, "loss": 0.4533, "step": 9045 }, { "epoch": 0.5427491450170997, "grad_norm": 1.1855411529541016, "learning_rate": 3.1861849985139026e-06, "loss": 0.3668, "step": 9046 }, { "epoch": 0.5428091438171236, "grad_norm": 1.2688932418823242, "learning_rate": 3.1855075772853096e-06, "loss": 0.3539, "step": 9047 }, { "epoch": 0.5428691426171477, "grad_norm": 1.2785964012145996, "learning_rate": 3.1848301679336622e-06, "loss": 0.4157, "step": 9048 }, { "epoch": 0.5429291414171716, "grad_norm": 1.1414847373962402, "learning_rate": 3.1841527704845447e-06, "loss": 0.3971, "step": 9049 }, { "epoch": 0.5429891402171957, "grad_norm": 1.1897227764129639, "learning_rate": 3.183475384963537e-06, "loss": 0.3653, "step": 9050 }, { "epoch": 0.5430491390172196, "grad_norm": 1.279689073562622, "learning_rate": 3.182798011396223e-06, "loss": 0.376, "step": 9051 }, { "epoch": 0.5431091378172437, "grad_norm": 1.222745418548584, "learning_rate": 3.182120649808182e-06, "loss": 0.3968, "step": 9052 }, { "epoch": 0.5431691366172676, "grad_norm": 1.2676314115524292, "learning_rate": 3.1814433002249945e-06, "loss": 0.4281, "step": 9053 }, { "epoch": 0.5432291354172917, "grad_norm": 1.2763203382492065, "learning_rate": 3.180765962672245e-06, "loss": 0.4366, "step": 9054 }, { "epoch": 0.5432891342173156, "grad_norm": 1.2437219619750977, "learning_rate": 3.1800886371755082e-06, "loss": 0.411, "step": 9055 }, { "epoch": 0.5433491330173397, "grad_norm": 1.298336386680603, "learning_rate": 3.1794113237603684e-06, "loss": 0.3967, "step": 9056 }, { "epoch": 0.5434091318173636, "grad_norm": 1.418975591659546, "learning_rate": 3.1787340224524e-06, "loss": 0.4516, "step": 9057 }, { "epoch": 0.5434691306173877, "grad_norm": 1.2320290803909302, "learning_rate": 3.178056733277186e-06, "loss": 0.3625, "step": 9058 }, { "epoch": 0.5435291294174116, "grad_norm": 1.183439016342163, "learning_rate": 3.1773794562603007e-06, "loss": 0.3356, "step": 9059 }, { "epoch": 0.5435891282174357, "grad_norm": 1.2686281204223633, "learning_rate": 3.176702191427324e-06, "loss": 0.3826, "step": 9060 }, { "epoch": 0.5436491270174596, "grad_norm": 1.1793055534362793, "learning_rate": 3.1760249388038328e-06, "loss": 0.391, "step": 9061 }, { "epoch": 0.5437091258174837, "grad_norm": 1.3466851711273193, "learning_rate": 3.1753476984154035e-06, "loss": 0.4082, "step": 9062 }, { "epoch": 0.5437691246175077, "grad_norm": 1.3084986209869385, "learning_rate": 3.174670470287613e-06, "loss": 0.391, "step": 9063 }, { "epoch": 0.5438291234175316, "grad_norm": 1.1780599355697632, "learning_rate": 3.173993254446036e-06, "loss": 0.3835, "step": 9064 }, { "epoch": 0.5438891222175557, "grad_norm": 1.2529354095458984, "learning_rate": 3.173316050916249e-06, "loss": 0.4207, "step": 9065 }, { "epoch": 0.5439491210175796, "grad_norm": 1.2068092823028564, "learning_rate": 3.1726388597238264e-06, "loss": 0.3652, "step": 9066 }, { "epoch": 0.5440091198176037, "grad_norm": 1.3622292280197144, "learning_rate": 3.171961680894342e-06, "loss": 0.3843, "step": 9067 }, { "epoch": 0.5440691186176276, "grad_norm": 1.2894939184188843, "learning_rate": 3.1712845144533714e-06, "loss": 0.4043, "step": 9068 }, { "epoch": 0.5441291174176517, "grad_norm": 1.3225864171981812, "learning_rate": 3.1706073604264864e-06, "loss": 0.4383, "step": 9069 }, { "epoch": 0.5441891162176756, "grad_norm": 1.1892681121826172, "learning_rate": 3.169930218839259e-06, "loss": 0.3913, "step": 9070 }, { "epoch": 0.5442491150176997, "grad_norm": 1.356371283531189, "learning_rate": 3.1692530897172665e-06, "loss": 0.4134, "step": 9071 }, { "epoch": 0.5443091138177236, "grad_norm": 1.2858941555023193, "learning_rate": 3.1685759730860755e-06, "loss": 0.3896, "step": 9072 }, { "epoch": 0.5443691126177477, "grad_norm": 1.3709784746170044, "learning_rate": 3.1678988689712625e-06, "loss": 0.4041, "step": 9073 }, { "epoch": 0.5444291114177716, "grad_norm": 1.4234317541122437, "learning_rate": 3.1672217773983935e-06, "loss": 0.3734, "step": 9074 }, { "epoch": 0.5444891102177957, "grad_norm": 1.2843523025512695, "learning_rate": 3.1665446983930443e-06, "loss": 0.3657, "step": 9075 }, { "epoch": 0.5445491090178196, "grad_norm": 1.2762293815612793, "learning_rate": 3.1658676319807808e-06, "loss": 0.4095, "step": 9076 }, { "epoch": 0.5446091078178437, "grad_norm": 1.2924597263336182, "learning_rate": 3.1651905781871745e-06, "loss": 0.3733, "step": 9077 }, { "epoch": 0.5446691066178676, "grad_norm": 1.2609660625457764, "learning_rate": 3.1645135370377957e-06, "loss": 0.3957, "step": 9078 }, { "epoch": 0.5447291054178917, "grad_norm": 1.2639617919921875, "learning_rate": 3.1638365085582116e-06, "loss": 0.3812, "step": 9079 }, { "epoch": 0.5447891042179156, "grad_norm": 1.2747735977172852, "learning_rate": 3.1631594927739917e-06, "loss": 0.4047, "step": 9080 }, { "epoch": 0.5448491030179397, "grad_norm": 1.2266193628311157, "learning_rate": 3.1624824897107023e-06, "loss": 0.3853, "step": 9081 }, { "epoch": 0.5449091018179636, "grad_norm": 1.3308836221694946, "learning_rate": 3.161805499393912e-06, "loss": 0.3991, "step": 9082 }, { "epoch": 0.5449691006179876, "grad_norm": 1.3159754276275635, "learning_rate": 3.1611285218491875e-06, "loss": 0.4232, "step": 9083 }, { "epoch": 0.5450290994180116, "grad_norm": 1.3645130395889282, "learning_rate": 3.1604515571020937e-06, "loss": 0.461, "step": 9084 }, { "epoch": 0.5450890982180356, "grad_norm": 1.2294987440109253, "learning_rate": 3.1597746051781987e-06, "loss": 0.4147, "step": 9085 }, { "epoch": 0.5451490970180597, "grad_norm": 1.3335751295089722, "learning_rate": 3.159097666103066e-06, "loss": 0.3344, "step": 9086 }, { "epoch": 0.5452090958180836, "grad_norm": 1.1564794778823853, "learning_rate": 3.1584207399022625e-06, "loss": 0.3853, "step": 9087 }, { "epoch": 0.5452690946181077, "grad_norm": 1.199169635772705, "learning_rate": 3.157743826601349e-06, "loss": 0.3711, "step": 9088 }, { "epoch": 0.5453290934181316, "grad_norm": 1.2404755353927612, "learning_rate": 3.1570669262258934e-06, "loss": 0.4329, "step": 9089 }, { "epoch": 0.5453890922181557, "grad_norm": 1.2934324741363525, "learning_rate": 3.1563900388014584e-06, "loss": 0.386, "step": 9090 }, { "epoch": 0.5454490910181796, "grad_norm": 1.3728138208389282, "learning_rate": 3.155713164353605e-06, "loss": 0.4233, "step": 9091 }, { "epoch": 0.5455090898182037, "grad_norm": 1.1072393655776978, "learning_rate": 3.1550363029078974e-06, "loss": 0.3781, "step": 9092 }, { "epoch": 0.5455690886182276, "grad_norm": 1.1904888153076172, "learning_rate": 3.1543594544898967e-06, "loss": 0.357, "step": 9093 }, { "epoch": 0.5456290874182517, "grad_norm": 1.4612568616867065, "learning_rate": 3.153682619125164e-06, "loss": 0.3849, "step": 9094 }, { "epoch": 0.5456890862182756, "grad_norm": 1.263620138168335, "learning_rate": 3.1530057968392627e-06, "loss": 0.3878, "step": 9095 }, { "epoch": 0.5457490850182997, "grad_norm": 1.2358121871948242, "learning_rate": 3.1523289876577504e-06, "loss": 0.3919, "step": 9096 }, { "epoch": 0.5458090838183236, "grad_norm": 1.3526262044906616, "learning_rate": 3.1516521916061886e-06, "loss": 0.4804, "step": 9097 }, { "epoch": 0.5458690826183477, "grad_norm": 1.3183629512786865, "learning_rate": 3.150975408710136e-06, "loss": 0.3433, "step": 9098 }, { "epoch": 0.5459290814183716, "grad_norm": 1.3717148303985596, "learning_rate": 3.150298638995153e-06, "loss": 0.4226, "step": 9099 }, { "epoch": 0.5459890802183957, "grad_norm": 1.338801622390747, "learning_rate": 3.149621882486796e-06, "loss": 0.4376, "step": 9100 }, { "epoch": 0.5460490790184196, "grad_norm": 1.377404808998108, "learning_rate": 3.148945139210624e-06, "loss": 0.4361, "step": 9101 }, { "epoch": 0.5461090778184436, "grad_norm": 1.3835980892181396, "learning_rate": 3.148268409192196e-06, "loss": 0.41, "step": 9102 }, { "epoch": 0.5461690766184676, "grad_norm": 1.2492510080337524, "learning_rate": 3.1475916924570663e-06, "loss": 0.4635, "step": 9103 }, { "epoch": 0.5462290754184916, "grad_norm": 1.2331349849700928, "learning_rate": 3.146914989030795e-06, "loss": 0.3712, "step": 9104 }, { "epoch": 0.5462890742185156, "grad_norm": 1.2179843187332153, "learning_rate": 3.1462382989389333e-06, "loss": 0.372, "step": 9105 }, { "epoch": 0.5463490730185396, "grad_norm": 1.212720513343811, "learning_rate": 3.14556162220704e-06, "loss": 0.3793, "step": 9106 }, { "epoch": 0.5464090718185636, "grad_norm": 1.2565033435821533, "learning_rate": 3.1448849588606706e-06, "loss": 0.4058, "step": 9107 }, { "epoch": 0.5464690706185876, "grad_norm": 1.2546310424804688, "learning_rate": 3.1442083089253775e-06, "loss": 0.4167, "step": 9108 }, { "epoch": 0.5465290694186117, "grad_norm": 1.3831430673599243, "learning_rate": 3.1435316724267163e-06, "loss": 0.4204, "step": 9109 }, { "epoch": 0.5465890682186356, "grad_norm": 1.1991631984710693, "learning_rate": 3.142855049390239e-06, "loss": 0.4253, "step": 9110 }, { "epoch": 0.5466490670186597, "grad_norm": 1.3340367078781128, "learning_rate": 3.1421784398414993e-06, "loss": 0.3953, "step": 9111 }, { "epoch": 0.5467090658186836, "grad_norm": 1.2173655033111572, "learning_rate": 3.1415018438060505e-06, "loss": 0.3663, "step": 9112 }, { "epoch": 0.5467690646187077, "grad_norm": 1.2468029260635376, "learning_rate": 3.1408252613094433e-06, "loss": 0.394, "step": 9113 }, { "epoch": 0.5468290634187316, "grad_norm": 1.4883421659469604, "learning_rate": 3.1401486923772298e-06, "loss": 0.3996, "step": 9114 }, { "epoch": 0.5468890622187557, "grad_norm": 1.2628344297409058, "learning_rate": 3.1394721370349602e-06, "loss": 0.3474, "step": 9115 }, { "epoch": 0.5469490610187796, "grad_norm": 1.2059932947158813, "learning_rate": 3.138795595308186e-06, "loss": 0.3794, "step": 9116 }, { "epoch": 0.5470090598188037, "grad_norm": 1.1640729904174805, "learning_rate": 3.1381190672224557e-06, "loss": 0.4011, "step": 9117 }, { "epoch": 0.5470690586188276, "grad_norm": 1.3033345937728882, "learning_rate": 3.1374425528033193e-06, "loss": 0.3989, "step": 9118 }, { "epoch": 0.5471290574188516, "grad_norm": 1.3322371244430542, "learning_rate": 3.1367660520763277e-06, "loss": 0.395, "step": 9119 }, { "epoch": 0.5471890562188756, "grad_norm": 1.351299524307251, "learning_rate": 3.136089565067025e-06, "loss": 0.418, "step": 9120 }, { "epoch": 0.5472490550188996, "grad_norm": 1.2659556865692139, "learning_rate": 3.135413091800964e-06, "loss": 0.3446, "step": 9121 }, { "epoch": 0.5473090538189236, "grad_norm": 1.3143768310546875, "learning_rate": 3.1347366323036873e-06, "loss": 0.3722, "step": 9122 }, { "epoch": 0.5473690526189476, "grad_norm": 1.3662936687469482, "learning_rate": 3.1340601866007445e-06, "loss": 0.4253, "step": 9123 }, { "epoch": 0.5474290514189716, "grad_norm": 1.146573781967163, "learning_rate": 3.1333837547176818e-06, "loss": 0.3853, "step": 9124 }, { "epoch": 0.5474890502189956, "grad_norm": 1.3047044277191162, "learning_rate": 3.132707336680044e-06, "loss": 0.3814, "step": 9125 }, { "epoch": 0.5475490490190196, "grad_norm": 1.2507551908493042, "learning_rate": 3.1320309325133776e-06, "loss": 0.3835, "step": 9126 }, { "epoch": 0.5476090478190436, "grad_norm": 1.3907201290130615, "learning_rate": 3.1313545422432254e-06, "loss": 0.3978, "step": 9127 }, { "epoch": 0.5476690466190676, "grad_norm": 1.369302749633789, "learning_rate": 3.130678165895133e-06, "loss": 0.4102, "step": 9128 }, { "epoch": 0.5477290454190916, "grad_norm": 1.380201816558838, "learning_rate": 3.1300018034946435e-06, "loss": 0.4339, "step": 9129 }, { "epoch": 0.5477890442191157, "grad_norm": 1.5263758897781372, "learning_rate": 3.1293254550672997e-06, "loss": 0.4007, "step": 9130 }, { "epoch": 0.5478490430191396, "grad_norm": 1.3650124073028564, "learning_rate": 3.1286491206386454e-06, "loss": 0.3854, "step": 9131 }, { "epoch": 0.5479090418191637, "grad_norm": 1.2772752046585083, "learning_rate": 3.1279728002342215e-06, "loss": 0.423, "step": 9132 }, { "epoch": 0.5479690406191876, "grad_norm": 1.3401925563812256, "learning_rate": 3.1272964938795704e-06, "loss": 0.4232, "step": 9133 }, { "epoch": 0.5480290394192117, "grad_norm": 1.1888203620910645, "learning_rate": 3.1266202016002314e-06, "loss": 0.4127, "step": 9134 }, { "epoch": 0.5480890382192356, "grad_norm": 1.2715957164764404, "learning_rate": 3.1259439234217472e-06, "loss": 0.4273, "step": 9135 }, { "epoch": 0.5481490370192597, "grad_norm": 1.1822260618209839, "learning_rate": 3.1252676593696575e-06, "loss": 0.4133, "step": 9136 }, { "epoch": 0.5482090358192836, "grad_norm": 1.1458985805511475, "learning_rate": 3.1245914094695e-06, "loss": 0.3506, "step": 9137 }, { "epoch": 0.5482690346193076, "grad_norm": 1.2388722896575928, "learning_rate": 3.1239151737468156e-06, "loss": 0.3858, "step": 9138 }, { "epoch": 0.5483290334193316, "grad_norm": 1.182102084159851, "learning_rate": 3.123238952227141e-06, "loss": 0.3577, "step": 9139 }, { "epoch": 0.5483890322193556, "grad_norm": 1.2815974950790405, "learning_rate": 3.122562744936015e-06, "loss": 0.3992, "step": 9140 }, { "epoch": 0.5484490310193796, "grad_norm": 1.2496212720870972, "learning_rate": 3.121886551898974e-06, "loss": 0.3952, "step": 9141 }, { "epoch": 0.5485090298194036, "grad_norm": 1.2736737728118896, "learning_rate": 3.1212103731415554e-06, "loss": 0.3887, "step": 9142 }, { "epoch": 0.5485690286194276, "grad_norm": 1.2335093021392822, "learning_rate": 3.1205342086892955e-06, "loss": 0.4085, "step": 9143 }, { "epoch": 0.5486290274194516, "grad_norm": 1.2315884828567505, "learning_rate": 3.1198580585677294e-06, "loss": 0.3415, "step": 9144 }, { "epoch": 0.5486890262194756, "grad_norm": 1.26238214969635, "learning_rate": 3.119181922802393e-06, "loss": 0.3874, "step": 9145 }, { "epoch": 0.5487490250194996, "grad_norm": 1.2632317543029785, "learning_rate": 3.11850580141882e-06, "loss": 0.3676, "step": 9146 }, { "epoch": 0.5488090238195236, "grad_norm": 1.391770839691162, "learning_rate": 3.117829694442544e-06, "loss": 0.3946, "step": 9147 }, { "epoch": 0.5488690226195476, "grad_norm": 1.24155592918396, "learning_rate": 3.1171536018991015e-06, "loss": 0.4252, "step": 9148 }, { "epoch": 0.5489290214195716, "grad_norm": 1.282605528831482, "learning_rate": 3.1164775238140207e-06, "loss": 0.4234, "step": 9149 }, { "epoch": 0.5489890202195956, "grad_norm": 1.2505459785461426, "learning_rate": 3.1158014602128387e-06, "loss": 0.4016, "step": 9150 }, { "epoch": 0.5490490190196196, "grad_norm": 1.3822977542877197, "learning_rate": 3.115125411121083e-06, "loss": 0.436, "step": 9151 }, { "epoch": 0.5491090178196436, "grad_norm": 1.2467613220214844, "learning_rate": 3.114449376564288e-06, "loss": 0.3587, "step": 9152 }, { "epoch": 0.5491690166196677, "grad_norm": 1.350216031074524, "learning_rate": 3.113773356567984e-06, "loss": 0.3956, "step": 9153 }, { "epoch": 0.5492290154196916, "grad_norm": 1.2278386354446411, "learning_rate": 3.1130973511577e-06, "loss": 0.3914, "step": 9154 }, { "epoch": 0.5492890142197157, "grad_norm": 1.220449686050415, "learning_rate": 3.112421360358967e-06, "loss": 0.3444, "step": 9155 }, { "epoch": 0.5493490130197396, "grad_norm": 1.2776715755462646, "learning_rate": 3.111745384197313e-06, "loss": 0.384, "step": 9156 }, { "epoch": 0.5494090118197636, "grad_norm": 1.3204647302627563, "learning_rate": 3.1110694226982673e-06, "loss": 0.4121, "step": 9157 }, { "epoch": 0.5494690106197876, "grad_norm": 1.2468180656433105, "learning_rate": 3.110393475887357e-06, "loss": 0.4641, "step": 9158 }, { "epoch": 0.5495290094198116, "grad_norm": 1.3491032123565674, "learning_rate": 3.10971754379011e-06, "loss": 0.431, "step": 9159 }, { "epoch": 0.5495890082198356, "grad_norm": 1.2410917282104492, "learning_rate": 3.109041626432054e-06, "loss": 0.4085, "step": 9160 }, { "epoch": 0.5496490070198596, "grad_norm": 1.2850781679153442, "learning_rate": 3.108365723838714e-06, "loss": 0.4074, "step": 9161 }, { "epoch": 0.5497090058198836, "grad_norm": 1.1897644996643066, "learning_rate": 3.107689836035617e-06, "loss": 0.3914, "step": 9162 }, { "epoch": 0.5497690046199076, "grad_norm": 1.2389497756958008, "learning_rate": 3.107013963048287e-06, "loss": 0.3851, "step": 9163 }, { "epoch": 0.5498290034199316, "grad_norm": 1.306814193725586, "learning_rate": 3.1063381049022476e-06, "loss": 0.4189, "step": 9164 }, { "epoch": 0.5498890022199556, "grad_norm": 1.2878637313842773, "learning_rate": 3.1056622616230274e-06, "loss": 0.4427, "step": 9165 }, { "epoch": 0.5499490010199796, "grad_norm": 1.3781609535217285, "learning_rate": 3.1049864332361445e-06, "loss": 0.4032, "step": 9166 }, { "epoch": 0.5500089998200036, "grad_norm": 1.395459532737732, "learning_rate": 3.104310619767127e-06, "loss": 0.3672, "step": 9167 }, { "epoch": 0.5500689986200276, "grad_norm": 1.3057976961135864, "learning_rate": 3.103634821241492e-06, "loss": 0.4119, "step": 9168 }, { "epoch": 0.5501289974200516, "grad_norm": 1.2854282855987549, "learning_rate": 3.1029590376847653e-06, "loss": 0.4015, "step": 9169 }, { "epoch": 0.5501889962200756, "grad_norm": 1.3042467832565308, "learning_rate": 3.1022832691224663e-06, "loss": 0.4266, "step": 9170 }, { "epoch": 0.5502489950200996, "grad_norm": 1.2962133884429932, "learning_rate": 3.1016075155801163e-06, "loss": 0.3847, "step": 9171 }, { "epoch": 0.5503089938201235, "grad_norm": 1.2004660367965698, "learning_rate": 3.1009317770832356e-06, "loss": 0.3365, "step": 9172 }, { "epoch": 0.5503689926201476, "grad_norm": 1.4020603895187378, "learning_rate": 3.1002560536573436e-06, "loss": 0.4122, "step": 9173 }, { "epoch": 0.5504289914201715, "grad_norm": 1.2897104024887085, "learning_rate": 3.09958034532796e-06, "loss": 0.4206, "step": 9174 }, { "epoch": 0.5504889902201956, "grad_norm": 1.3342472314834595, "learning_rate": 3.0989046521206006e-06, "loss": 0.4038, "step": 9175 }, { "epoch": 0.5505489890202196, "grad_norm": 1.4389368295669556, "learning_rate": 3.0982289740607857e-06, "loss": 0.438, "step": 9176 }, { "epoch": 0.5506089878202436, "grad_norm": 1.1780160665512085, "learning_rate": 3.097553311174033e-06, "loss": 0.3643, "step": 9177 }, { "epoch": 0.5506689866202676, "grad_norm": 1.2389519214630127, "learning_rate": 3.0968776634858563e-06, "loss": 0.3995, "step": 9178 }, { "epoch": 0.5507289854202916, "grad_norm": 1.2226979732513428, "learning_rate": 3.0962020310217767e-06, "loss": 0.4036, "step": 9179 }, { "epoch": 0.5507889842203156, "grad_norm": 1.2794228792190552, "learning_rate": 3.0955264138073035e-06, "loss": 0.3974, "step": 9180 }, { "epoch": 0.5508489830203396, "grad_norm": 1.3203998804092407, "learning_rate": 3.0948508118679574e-06, "loss": 0.3651, "step": 9181 }, { "epoch": 0.5509089818203636, "grad_norm": 1.2809768915176392, "learning_rate": 3.0941752252292484e-06, "loss": 0.3492, "step": 9182 }, { "epoch": 0.5509689806203876, "grad_norm": 1.2416746616363525, "learning_rate": 3.0934996539166925e-06, "loss": 0.4149, "step": 9183 }, { "epoch": 0.5510289794204116, "grad_norm": 1.1477142572402954, "learning_rate": 3.0928240979558033e-06, "loss": 0.3943, "step": 9184 }, { "epoch": 0.5510889782204356, "grad_norm": 1.3737980127334595, "learning_rate": 3.092148557372093e-06, "loss": 0.4609, "step": 9185 }, { "epoch": 0.5511489770204596, "grad_norm": 1.3744945526123047, "learning_rate": 3.0914730321910732e-06, "loss": 0.448, "step": 9186 }, { "epoch": 0.5512089758204836, "grad_norm": 1.4359385967254639, "learning_rate": 3.0907975224382555e-06, "loss": 0.4077, "step": 9187 }, { "epoch": 0.5512689746205076, "grad_norm": 1.2486999034881592, "learning_rate": 3.090122028139151e-06, "loss": 0.3958, "step": 9188 }, { "epoch": 0.5513289734205316, "grad_norm": 1.2285293340682983, "learning_rate": 3.0894465493192706e-06, "loss": 0.4327, "step": 9189 }, { "epoch": 0.5513889722205556, "grad_norm": 1.402179479598999, "learning_rate": 3.0887710860041233e-06, "loss": 0.4071, "step": 9190 }, { "epoch": 0.5514489710205795, "grad_norm": 1.2610074281692505, "learning_rate": 3.088095638219219e-06, "loss": 0.3813, "step": 9191 }, { "epoch": 0.5515089698206036, "grad_norm": 1.2467683553695679, "learning_rate": 3.087420205990065e-06, "loss": 0.346, "step": 9192 }, { "epoch": 0.5515689686206275, "grad_norm": 1.1456139087677002, "learning_rate": 3.086744789342169e-06, "loss": 0.3544, "step": 9193 }, { "epoch": 0.5516289674206516, "grad_norm": 1.3696320056915283, "learning_rate": 3.0860693883010426e-06, "loss": 0.4136, "step": 9194 }, { "epoch": 0.5516889662206755, "grad_norm": 1.168285846710205, "learning_rate": 3.0853940028921868e-06, "loss": 0.3719, "step": 9195 }, { "epoch": 0.5517489650206996, "grad_norm": 1.3504682779312134, "learning_rate": 3.0847186331411127e-06, "loss": 0.3889, "step": 9196 }, { "epoch": 0.5518089638207235, "grad_norm": 1.3209725618362427, "learning_rate": 3.0840432790733217e-06, "loss": 0.4274, "step": 9197 }, { "epoch": 0.5518689626207476, "grad_norm": 1.2878819704055786, "learning_rate": 3.0833679407143234e-06, "loss": 0.4235, "step": 9198 }, { "epoch": 0.5519289614207716, "grad_norm": 1.238629937171936, "learning_rate": 3.082692618089617e-06, "loss": 0.3525, "step": 9199 }, { "epoch": 0.5519889602207956, "grad_norm": 1.2814569473266602, "learning_rate": 3.0820173112247107e-06, "loss": 0.3709, "step": 9200 }, { "epoch": 0.5520489590208196, "grad_norm": 1.351317048072815, "learning_rate": 3.081342020145107e-06, "loss": 0.423, "step": 9201 }, { "epoch": 0.5521089578208436, "grad_norm": 1.2712681293487549, "learning_rate": 3.0806667448763065e-06, "loss": 0.3817, "step": 9202 }, { "epoch": 0.5521689566208676, "grad_norm": 1.3005415201187134, "learning_rate": 3.079991485443813e-06, "loss": 0.3924, "step": 9203 }, { "epoch": 0.5522289554208916, "grad_norm": 1.245788812637329, "learning_rate": 3.0793162418731277e-06, "loss": 0.4106, "step": 9204 }, { "epoch": 0.5522889542209156, "grad_norm": 1.2438081502914429, "learning_rate": 3.0786410141897506e-06, "loss": 0.3931, "step": 9205 }, { "epoch": 0.5523489530209396, "grad_norm": 1.2231743335723877, "learning_rate": 3.0779658024191837e-06, "loss": 0.4087, "step": 9206 }, { "epoch": 0.5524089518209636, "grad_norm": 1.2373425960540771, "learning_rate": 3.077290606586925e-06, "loss": 0.3937, "step": 9207 }, { "epoch": 0.5524689506209876, "grad_norm": 1.2486380338668823, "learning_rate": 3.0766154267184747e-06, "loss": 0.4337, "step": 9208 }, { "epoch": 0.5525289494210116, "grad_norm": 1.2260037660598755, "learning_rate": 3.0759402628393307e-06, "loss": 0.3643, "step": 9209 }, { "epoch": 0.5525889482210355, "grad_norm": 1.4624978303909302, "learning_rate": 3.075265114974991e-06, "loss": 0.4972, "step": 9210 }, { "epoch": 0.5526489470210596, "grad_norm": 1.2486292123794556, "learning_rate": 3.0745899831509522e-06, "loss": 0.3955, "step": 9211 }, { "epoch": 0.5527089458210835, "grad_norm": 1.2706950902938843, "learning_rate": 3.073914867392711e-06, "loss": 0.3796, "step": 9212 }, { "epoch": 0.5527689446211076, "grad_norm": 1.3501627445220947, "learning_rate": 3.0732397677257666e-06, "loss": 0.3862, "step": 9213 }, { "epoch": 0.5528289434211315, "grad_norm": 1.3110949993133545, "learning_rate": 3.0725646841756092e-06, "loss": 0.3772, "step": 9214 }, { "epoch": 0.5528889422211556, "grad_norm": 1.1408672332763672, "learning_rate": 3.0718896167677377e-06, "loss": 0.379, "step": 9215 }, { "epoch": 0.5529489410211795, "grad_norm": 1.2709194421768188, "learning_rate": 3.0712145655276445e-06, "loss": 0.3915, "step": 9216 }, { "epoch": 0.5530089398212036, "grad_norm": 1.2555910348892212, "learning_rate": 3.0705395304808234e-06, "loss": 0.401, "step": 9217 }, { "epoch": 0.5530689386212275, "grad_norm": 1.3490570783615112, "learning_rate": 3.069864511652768e-06, "loss": 0.4121, "step": 9218 }, { "epoch": 0.5531289374212516, "grad_norm": 1.217057466506958, "learning_rate": 3.06918950906897e-06, "loss": 0.388, "step": 9219 }, { "epoch": 0.5531889362212756, "grad_norm": 1.1562923192977905, "learning_rate": 3.0685145227549222e-06, "loss": 0.3392, "step": 9220 }, { "epoch": 0.5532489350212996, "grad_norm": 1.2752859592437744, "learning_rate": 3.067839552736114e-06, "loss": 0.3943, "step": 9221 }, { "epoch": 0.5533089338213236, "grad_norm": 1.197029948234558, "learning_rate": 3.0671645990380373e-06, "loss": 0.4051, "step": 9222 }, { "epoch": 0.5533689326213476, "grad_norm": 1.1222355365753174, "learning_rate": 3.0664896616861825e-06, "loss": 0.3428, "step": 9223 }, { "epoch": 0.5534289314213716, "grad_norm": 1.243230938911438, "learning_rate": 3.0658147407060367e-06, "loss": 0.4241, "step": 9224 }, { "epoch": 0.5534889302213956, "grad_norm": 1.3663500547409058, "learning_rate": 3.065139836123092e-06, "loss": 0.3674, "step": 9225 }, { "epoch": 0.5535489290214196, "grad_norm": 1.287673830986023, "learning_rate": 3.0644649479628317e-06, "loss": 0.3535, "step": 9226 }, { "epoch": 0.5536089278214436, "grad_norm": 1.2583050727844238, "learning_rate": 3.063790076250749e-06, "loss": 0.3667, "step": 9227 }, { "epoch": 0.5536689266214676, "grad_norm": 1.2634121179580688, "learning_rate": 3.063115221012325e-06, "loss": 0.3899, "step": 9228 }, { "epoch": 0.5537289254214915, "grad_norm": 1.1968237161636353, "learning_rate": 3.0624403822730502e-06, "loss": 0.3667, "step": 9229 }, { "epoch": 0.5537889242215156, "grad_norm": 1.2002196311950684, "learning_rate": 3.061765560058409e-06, "loss": 0.3604, "step": 9230 }, { "epoch": 0.5538489230215395, "grad_norm": 1.3061197996139526, "learning_rate": 3.061090754393885e-06, "loss": 0.3881, "step": 9231 }, { "epoch": 0.5539089218215636, "grad_norm": 1.4794421195983887, "learning_rate": 3.0604159653049642e-06, "loss": 0.4028, "step": 9232 }, { "epoch": 0.5539689206215875, "grad_norm": 1.242098331451416, "learning_rate": 3.0597411928171293e-06, "loss": 0.3992, "step": 9233 }, { "epoch": 0.5540289194216116, "grad_norm": 1.2427715063095093, "learning_rate": 3.0590664369558634e-06, "loss": 0.399, "step": 9234 }, { "epoch": 0.5540889182216355, "grad_norm": 1.277244210243225, "learning_rate": 3.0583916977466507e-06, "loss": 0.3901, "step": 9235 }, { "epoch": 0.5541489170216596, "grad_norm": 1.1433311700820923, "learning_rate": 3.05771697521497e-06, "loss": 0.3402, "step": 9236 }, { "epoch": 0.5542089158216835, "grad_norm": 1.3621493577957153, "learning_rate": 3.0570422693863055e-06, "loss": 0.4491, "step": 9237 }, { "epoch": 0.5542689146217076, "grad_norm": 1.3382468223571777, "learning_rate": 3.0563675802861356e-06, "loss": 0.4221, "step": 9238 }, { "epoch": 0.5543289134217315, "grad_norm": 1.2690311670303345, "learning_rate": 3.0556929079399415e-06, "loss": 0.3656, "step": 9239 }, { "epoch": 0.5543889122217556, "grad_norm": 1.1445738077163696, "learning_rate": 3.0550182523732016e-06, "loss": 0.4126, "step": 9240 }, { "epoch": 0.5544489110217795, "grad_norm": 1.1635161638259888, "learning_rate": 3.0543436136113937e-06, "loss": 0.3635, "step": 9241 }, { "epoch": 0.5545089098218036, "grad_norm": 1.306993842124939, "learning_rate": 3.0536689916799993e-06, "loss": 0.3906, "step": 9242 }, { "epoch": 0.5545689086218276, "grad_norm": 1.3163224458694458, "learning_rate": 3.052994386604492e-06, "loss": 0.4443, "step": 9243 }, { "epoch": 0.5546289074218516, "grad_norm": 1.2614437341690063, "learning_rate": 3.052319798410352e-06, "loss": 0.3635, "step": 9244 }, { "epoch": 0.5546889062218756, "grad_norm": 1.237857460975647, "learning_rate": 3.0516452271230514e-06, "loss": 0.4125, "step": 9245 }, { "epoch": 0.5547489050218996, "grad_norm": 1.3018194437026978, "learning_rate": 3.0509706727680683e-06, "loss": 0.4063, "step": 9246 }, { "epoch": 0.5548089038219236, "grad_norm": 1.2814699411392212, "learning_rate": 3.0502961353708784e-06, "loss": 0.3952, "step": 9247 }, { "epoch": 0.5548689026219475, "grad_norm": 1.9541168212890625, "learning_rate": 3.0496216149569535e-06, "loss": 0.3898, "step": 9248 }, { "epoch": 0.5549289014219716, "grad_norm": 1.3709524869918823, "learning_rate": 3.048947111551769e-06, "loss": 0.3907, "step": 9249 }, { "epoch": 0.5549889002219955, "grad_norm": 1.2759121656417847, "learning_rate": 3.048272625180797e-06, "loss": 0.3963, "step": 9250 }, { "epoch": 0.5550488990220196, "grad_norm": 1.393837332725525, "learning_rate": 3.04759815586951e-06, "loss": 0.4655, "step": 9251 }, { "epoch": 0.5551088978220435, "grad_norm": 1.224839448928833, "learning_rate": 3.046923703643379e-06, "loss": 0.3996, "step": 9252 }, { "epoch": 0.5551688966220676, "grad_norm": 1.1475260257720947, "learning_rate": 3.0462492685278753e-06, "loss": 0.3476, "step": 9253 }, { "epoch": 0.5552288954220915, "grad_norm": 1.3902297019958496, "learning_rate": 3.04557485054847e-06, "loss": 0.3864, "step": 9254 }, { "epoch": 0.5552888942221156, "grad_norm": 1.2768607139587402, "learning_rate": 3.044900449730632e-06, "loss": 0.4297, "step": 9255 }, { "epoch": 0.5553488930221395, "grad_norm": 1.31556236743927, "learning_rate": 3.044226066099831e-06, "loss": 0.3636, "step": 9256 }, { "epoch": 0.5554088918221636, "grad_norm": 1.1711311340332031, "learning_rate": 3.0435516996815348e-06, "loss": 0.3605, "step": 9257 }, { "epoch": 0.5554688906221875, "grad_norm": 1.2978333234786987, "learning_rate": 3.04287735050121e-06, "loss": 0.4421, "step": 9258 }, { "epoch": 0.5555288894222116, "grad_norm": 1.2811806201934814, "learning_rate": 3.0422030185843274e-06, "loss": 0.3977, "step": 9259 }, { "epoch": 0.5555888882222355, "grad_norm": 1.3242247104644775, "learning_rate": 3.041528703956349e-06, "loss": 0.4157, "step": 9260 }, { "epoch": 0.5556488870222596, "grad_norm": 1.34285569190979, "learning_rate": 3.040854406642744e-06, "loss": 0.3851, "step": 9261 }, { "epoch": 0.5557088858222835, "grad_norm": 1.2550822496414185, "learning_rate": 3.0401801266689754e-06, "loss": 0.4259, "step": 9262 }, { "epoch": 0.5557688846223076, "grad_norm": 1.4405431747436523, "learning_rate": 3.0395058640605086e-06, "loss": 0.4559, "step": 9263 }, { "epoch": 0.5558288834223315, "grad_norm": 1.3011696338653564, "learning_rate": 3.038831618842808e-06, "loss": 0.3952, "step": 9264 }, { "epoch": 0.5558888822223556, "grad_norm": 1.2603780031204224, "learning_rate": 3.0381573910413353e-06, "loss": 0.3585, "step": 9265 }, { "epoch": 0.5559488810223796, "grad_norm": 1.3301767110824585, "learning_rate": 3.0374831806815546e-06, "loss": 0.4172, "step": 9266 }, { "epoch": 0.5560088798224035, "grad_norm": 1.1809738874435425, "learning_rate": 3.0368089877889265e-06, "loss": 0.4083, "step": 9267 }, { "epoch": 0.5560688786224276, "grad_norm": 1.332693338394165, "learning_rate": 3.0361348123889127e-06, "loss": 0.3534, "step": 9268 }, { "epoch": 0.5561288774224515, "grad_norm": 1.2206456661224365, "learning_rate": 3.035460654506973e-06, "loss": 0.3846, "step": 9269 }, { "epoch": 0.5561888762224756, "grad_norm": 1.262703537940979, "learning_rate": 3.0347865141685677e-06, "loss": 0.3506, "step": 9270 }, { "epoch": 0.5562488750224995, "grad_norm": 1.2019466161727905, "learning_rate": 3.0341123913991584e-06, "loss": 0.3773, "step": 9271 }, { "epoch": 0.5563088738225236, "grad_norm": 1.209216833114624, "learning_rate": 3.033438286224199e-06, "loss": 0.3414, "step": 9272 }, { "epoch": 0.5563688726225475, "grad_norm": 1.2808468341827393, "learning_rate": 3.0327641986691526e-06, "loss": 0.3796, "step": 9273 }, { "epoch": 0.5564288714225716, "grad_norm": 1.2549526691436768, "learning_rate": 3.0320901287594713e-06, "loss": 0.4224, "step": 9274 }, { "epoch": 0.5564888702225955, "grad_norm": 1.1727089881896973, "learning_rate": 3.031416076520614e-06, "loss": 0.3805, "step": 9275 }, { "epoch": 0.5565488690226196, "grad_norm": 1.3995683193206787, "learning_rate": 3.0307420419780384e-06, "loss": 0.4651, "step": 9276 }, { "epoch": 0.5566088678226435, "grad_norm": 1.3607786893844604, "learning_rate": 3.030068025157197e-06, "loss": 0.3811, "step": 9277 }, { "epoch": 0.5566688666226676, "grad_norm": 1.2093212604522705, "learning_rate": 3.0293940260835457e-06, "loss": 0.3772, "step": 9278 }, { "epoch": 0.5567288654226915, "grad_norm": 1.1906646490097046, "learning_rate": 3.028720044782537e-06, "loss": 0.3405, "step": 9279 }, { "epoch": 0.5567888642227156, "grad_norm": 1.251842975616455, "learning_rate": 3.0280460812796266e-06, "loss": 0.3829, "step": 9280 }, { "epoch": 0.5568488630227395, "grad_norm": 1.2537567615509033, "learning_rate": 3.0273721356002637e-06, "loss": 0.4318, "step": 9281 }, { "epoch": 0.5569088618227636, "grad_norm": 1.3703693151474, "learning_rate": 3.0266982077699027e-06, "loss": 0.4067, "step": 9282 }, { "epoch": 0.5569688606227875, "grad_norm": 1.2822027206420898, "learning_rate": 3.0260242978139947e-06, "loss": 0.3523, "step": 9283 }, { "epoch": 0.5570288594228116, "grad_norm": 1.2743916511535645, "learning_rate": 3.025350405757989e-06, "loss": 0.3804, "step": 9284 }, { "epoch": 0.5570888582228355, "grad_norm": 1.1984471082687378, "learning_rate": 3.024676531627336e-06, "loss": 0.3833, "step": 9285 }, { "epoch": 0.5571488570228595, "grad_norm": 1.0657973289489746, "learning_rate": 3.0240026754474844e-06, "loss": 0.3483, "step": 9286 }, { "epoch": 0.5572088558228836, "grad_norm": 1.1339788436889648, "learning_rate": 3.0233288372438826e-06, "loss": 0.3192, "step": 9287 }, { "epoch": 0.5572688546229075, "grad_norm": 1.2966341972351074, "learning_rate": 3.0226550170419807e-06, "loss": 0.4033, "step": 9288 }, { "epoch": 0.5573288534229316, "grad_norm": 1.373176097869873, "learning_rate": 3.0219812148672223e-06, "loss": 0.3855, "step": 9289 }, { "epoch": 0.5573888522229555, "grad_norm": 1.2624056339263916, "learning_rate": 3.0213074307450576e-06, "loss": 0.4038, "step": 9290 }, { "epoch": 0.5574488510229796, "grad_norm": 1.2193423509597778, "learning_rate": 3.0206336647009283e-06, "loss": 0.3876, "step": 9291 }, { "epoch": 0.5575088498230035, "grad_norm": 1.2983132600784302, "learning_rate": 3.019959916760284e-06, "loss": 0.3874, "step": 9292 }, { "epoch": 0.5575688486230276, "grad_norm": 1.4261306524276733, "learning_rate": 3.0192861869485635e-06, "loss": 0.4394, "step": 9293 }, { "epoch": 0.5576288474230515, "grad_norm": 1.3115501403808594, "learning_rate": 3.0186124752912144e-06, "loss": 0.3952, "step": 9294 }, { "epoch": 0.5576888462230756, "grad_norm": 1.4192800521850586, "learning_rate": 3.0179387818136805e-06, "loss": 0.4124, "step": 9295 }, { "epoch": 0.5577488450230995, "grad_norm": 1.1437641382217407, "learning_rate": 3.017265106541401e-06, "loss": 0.3758, "step": 9296 }, { "epoch": 0.5578088438231236, "grad_norm": 1.2745299339294434, "learning_rate": 3.01659144949982e-06, "loss": 0.4098, "step": 9297 }, { "epoch": 0.5578688426231475, "grad_norm": 1.164262294769287, "learning_rate": 3.015917810714377e-06, "loss": 0.3707, "step": 9298 }, { "epoch": 0.5579288414231716, "grad_norm": 1.145426630973816, "learning_rate": 3.015244190210513e-06, "loss": 0.3671, "step": 9299 }, { "epoch": 0.5579888402231955, "grad_norm": 1.284385323524475, "learning_rate": 3.0145705880136677e-06, "loss": 0.4, "step": 9300 }, { "epoch": 0.5580488390232196, "grad_norm": 1.4008411169052124, "learning_rate": 3.013897004149279e-06, "loss": 0.4233, "step": 9301 }, { "epoch": 0.5581088378232435, "grad_norm": 1.257880449295044, "learning_rate": 3.013223438642787e-06, "loss": 0.3766, "step": 9302 }, { "epoch": 0.5581688366232676, "grad_norm": 1.1426533460617065, "learning_rate": 3.0125498915196265e-06, "loss": 0.3636, "step": 9303 }, { "epoch": 0.5582288354232915, "grad_norm": 1.2572605609893799, "learning_rate": 3.0118763628052356e-06, "loss": 0.4184, "step": 9304 }, { "epoch": 0.5582888342233155, "grad_norm": 1.4411718845367432, "learning_rate": 3.011202852525052e-06, "loss": 0.4016, "step": 9305 }, { "epoch": 0.5583488330233395, "grad_norm": 1.2623790502548218, "learning_rate": 3.0105293607045086e-06, "loss": 0.3729, "step": 9306 }, { "epoch": 0.5584088318233635, "grad_norm": 1.1986329555511475, "learning_rate": 3.009855887369042e-06, "loss": 0.3711, "step": 9307 }, { "epoch": 0.5584688306233875, "grad_norm": 1.1689342260360718, "learning_rate": 3.009182432544085e-06, "loss": 0.3781, "step": 9308 }, { "epoch": 0.5585288294234115, "grad_norm": 1.3841900825500488, "learning_rate": 3.0085089962550722e-06, "loss": 0.438, "step": 9309 }, { "epoch": 0.5585888282234356, "grad_norm": 1.3199526071548462, "learning_rate": 3.007835578527434e-06, "loss": 0.4566, "step": 9310 }, { "epoch": 0.5586488270234595, "grad_norm": 1.218353271484375, "learning_rate": 3.0071621793866044e-06, "loss": 0.4204, "step": 9311 }, { "epoch": 0.5587088258234836, "grad_norm": 1.1052296161651611, "learning_rate": 3.0064887988580145e-06, "loss": 0.3711, "step": 9312 }, { "epoch": 0.5587688246235075, "grad_norm": 1.15199613571167, "learning_rate": 3.0058154369670933e-06, "loss": 0.3428, "step": 9313 }, { "epoch": 0.5588288234235316, "grad_norm": 1.2220178842544556, "learning_rate": 3.0051420937392725e-06, "loss": 0.3848, "step": 9314 }, { "epoch": 0.5588888222235555, "grad_norm": 1.2184088230133057, "learning_rate": 3.0044687691999793e-06, "loss": 0.3424, "step": 9315 }, { "epoch": 0.5589488210235796, "grad_norm": 1.3483814001083374, "learning_rate": 3.0037954633746424e-06, "loss": 0.4148, "step": 9316 }, { "epoch": 0.5590088198236035, "grad_norm": 1.2259000539779663, "learning_rate": 3.0031221762886926e-06, "loss": 0.3925, "step": 9317 }, { "epoch": 0.5590688186236276, "grad_norm": 1.1861379146575928, "learning_rate": 3.002448907967552e-06, "loss": 0.3649, "step": 9318 }, { "epoch": 0.5591288174236515, "grad_norm": 1.431832194328308, "learning_rate": 3.0017756584366516e-06, "loss": 0.3782, "step": 9319 }, { "epoch": 0.5591888162236756, "grad_norm": 1.4019516706466675, "learning_rate": 3.001102427721412e-06, "loss": 0.4276, "step": 9320 }, { "epoch": 0.5592488150236995, "grad_norm": 1.2401913404464722, "learning_rate": 3.0004292158472637e-06, "loss": 0.3671, "step": 9321 }, { "epoch": 0.5593088138237235, "grad_norm": 1.2879273891448975, "learning_rate": 2.999756022839625e-06, "loss": 0.4189, "step": 9322 }, { "epoch": 0.5593688126237475, "grad_norm": 1.33303964138031, "learning_rate": 2.9990828487239237e-06, "loss": 0.3692, "step": 9323 }, { "epoch": 0.5594288114237715, "grad_norm": 1.3362592458724976, "learning_rate": 2.998409693525581e-06, "loss": 0.3874, "step": 9324 }, { "epoch": 0.5594888102237955, "grad_norm": 1.3689215183258057, "learning_rate": 2.9977365572700185e-06, "loss": 0.454, "step": 9325 }, { "epoch": 0.5595488090238195, "grad_norm": 1.3205969333648682, "learning_rate": 2.9970634399826587e-06, "loss": 0.4141, "step": 9326 }, { "epoch": 0.5596088078238435, "grad_norm": 1.3279043436050415, "learning_rate": 2.996390341688921e-06, "loss": 0.4581, "step": 9327 }, { "epoch": 0.5596688066238675, "grad_norm": 1.3205758333206177, "learning_rate": 2.995717262414225e-06, "loss": 0.374, "step": 9328 }, { "epoch": 0.5597288054238915, "grad_norm": 1.3297003507614136, "learning_rate": 2.9950442021839914e-06, "loss": 0.3959, "step": 9329 }, { "epoch": 0.5597888042239155, "grad_norm": 1.30517578125, "learning_rate": 2.994371161023637e-06, "loss": 0.4064, "step": 9330 }, { "epoch": 0.5598488030239395, "grad_norm": 1.2329516410827637, "learning_rate": 2.9936981389585813e-06, "loss": 0.3944, "step": 9331 }, { "epoch": 0.5599088018239635, "grad_norm": 1.349251389503479, "learning_rate": 2.993025136014239e-06, "loss": 0.3565, "step": 9332 }, { "epoch": 0.5599688006239876, "grad_norm": 1.3142192363739014, "learning_rate": 2.9923521522160286e-06, "loss": 0.4313, "step": 9333 }, { "epoch": 0.5600287994240115, "grad_norm": 1.3453822135925293, "learning_rate": 2.991679187589364e-06, "loss": 0.4633, "step": 9334 }, { "epoch": 0.5600887982240356, "grad_norm": 1.4993754625320435, "learning_rate": 2.991006242159659e-06, "loss": 0.4483, "step": 9335 }, { "epoch": 0.5601487970240595, "grad_norm": 1.2815492153167725, "learning_rate": 2.990333315952332e-06, "loss": 0.352, "step": 9336 }, { "epoch": 0.5602087958240836, "grad_norm": 1.196932077407837, "learning_rate": 2.9896604089927915e-06, "loss": 0.4028, "step": 9337 }, { "epoch": 0.5602687946241075, "grad_norm": 1.339740514755249, "learning_rate": 2.9889875213064544e-06, "loss": 0.43, "step": 9338 }, { "epoch": 0.5603287934241316, "grad_norm": 1.359465479850769, "learning_rate": 2.9883146529187284e-06, "loss": 0.3786, "step": 9339 }, { "epoch": 0.5603887922241555, "grad_norm": 1.2659099102020264, "learning_rate": 2.9876418038550274e-06, "loss": 0.3669, "step": 9340 }, { "epoch": 0.5604487910241795, "grad_norm": 1.1684889793395996, "learning_rate": 2.9869689741407615e-06, "loss": 0.4079, "step": 9341 }, { "epoch": 0.5605087898242035, "grad_norm": 1.4263381958007812, "learning_rate": 2.98629616380134e-06, "loss": 0.4193, "step": 9342 }, { "epoch": 0.5605687886242275, "grad_norm": 1.2541730403900146, "learning_rate": 2.9856233728621723e-06, "loss": 0.4303, "step": 9343 }, { "epoch": 0.5606287874242515, "grad_norm": 1.3508025407791138, "learning_rate": 2.9849506013486655e-06, "loss": 0.415, "step": 9344 }, { "epoch": 0.5606887862242755, "grad_norm": 1.4039669036865234, "learning_rate": 2.984277849286228e-06, "loss": 0.3858, "step": 9345 }, { "epoch": 0.5607487850242995, "grad_norm": 1.3368149995803833, "learning_rate": 2.9836051167002676e-06, "loss": 0.4272, "step": 9346 }, { "epoch": 0.5608087838243235, "grad_norm": 1.2976981401443481, "learning_rate": 2.9829324036161883e-06, "loss": 0.4294, "step": 9347 }, { "epoch": 0.5608687826243475, "grad_norm": 1.3880047798156738, "learning_rate": 2.982259710059397e-06, "loss": 0.3825, "step": 9348 }, { "epoch": 0.5609287814243715, "grad_norm": 1.2229738235473633, "learning_rate": 2.981587036055297e-06, "loss": 0.3717, "step": 9349 }, { "epoch": 0.5609887802243955, "grad_norm": 1.2883591651916504, "learning_rate": 2.980914381629293e-06, "loss": 0.3864, "step": 9350 }, { "epoch": 0.5610487790244195, "grad_norm": 1.395417332649231, "learning_rate": 2.980241746806787e-06, "loss": 0.3908, "step": 9351 }, { "epoch": 0.5611087778244435, "grad_norm": 1.406385898590088, "learning_rate": 2.9795691316131826e-06, "loss": 0.376, "step": 9352 }, { "epoch": 0.5611687766244675, "grad_norm": 1.355704665184021, "learning_rate": 2.978896536073882e-06, "loss": 0.4347, "step": 9353 }, { "epoch": 0.5612287754244915, "grad_norm": 1.271677017211914, "learning_rate": 2.9782239602142843e-06, "loss": 0.3911, "step": 9354 }, { "epoch": 0.5612887742245155, "grad_norm": 1.2064915895462036, "learning_rate": 2.9775514040597908e-06, "loss": 0.3676, "step": 9355 }, { "epoch": 0.5613487730245396, "grad_norm": 1.1783250570297241, "learning_rate": 2.9768788676358002e-06, "loss": 0.4052, "step": 9356 }, { "epoch": 0.5614087718245635, "grad_norm": 1.3186835050582886, "learning_rate": 2.9762063509677114e-06, "loss": 0.4805, "step": 9357 }, { "epoch": 0.5614687706245876, "grad_norm": 1.2343473434448242, "learning_rate": 2.9755338540809233e-06, "loss": 0.3822, "step": 9358 }, { "epoch": 0.5615287694246115, "grad_norm": 1.2059839963912964, "learning_rate": 2.9748613770008306e-06, "loss": 0.4034, "step": 9359 }, { "epoch": 0.5615887682246355, "grad_norm": 1.3493576049804688, "learning_rate": 2.9741889197528323e-06, "loss": 0.3624, "step": 9360 }, { "epoch": 0.5616487670246595, "grad_norm": 1.1913883686065674, "learning_rate": 2.9735164823623224e-06, "loss": 0.3703, "step": 9361 }, { "epoch": 0.5617087658246835, "grad_norm": 1.3926661014556885, "learning_rate": 2.9728440648546964e-06, "loss": 0.3633, "step": 9362 }, { "epoch": 0.5617687646247075, "grad_norm": 1.2639803886413574, "learning_rate": 2.972171667255348e-06, "loss": 0.4335, "step": 9363 }, { "epoch": 0.5618287634247315, "grad_norm": 1.2864559888839722, "learning_rate": 2.9714992895896706e-06, "loss": 0.3645, "step": 9364 }, { "epoch": 0.5618887622247555, "grad_norm": 1.3642663955688477, "learning_rate": 2.970826931883059e-06, "loss": 0.4249, "step": 9365 }, { "epoch": 0.5619487610247795, "grad_norm": 1.3853391408920288, "learning_rate": 2.9701545941609013e-06, "loss": 0.4283, "step": 9366 }, { "epoch": 0.5620087598248035, "grad_norm": 1.280727505683899, "learning_rate": 2.9694822764485922e-06, "loss": 0.4335, "step": 9367 }, { "epoch": 0.5620687586248275, "grad_norm": 1.2508760690689087, "learning_rate": 2.968809978771519e-06, "loss": 0.3935, "step": 9368 }, { "epoch": 0.5621287574248515, "grad_norm": 1.3077821731567383, "learning_rate": 2.9681377011550734e-06, "loss": 0.3759, "step": 9369 }, { "epoch": 0.5621887562248755, "grad_norm": 1.2438372373580933, "learning_rate": 2.967465443624644e-06, "loss": 0.3974, "step": 9370 }, { "epoch": 0.5622487550248995, "grad_norm": 1.2370332479476929, "learning_rate": 2.9667932062056183e-06, "loss": 0.374, "step": 9371 }, { "epoch": 0.5623087538249235, "grad_norm": 1.311164379119873, "learning_rate": 2.9661209889233843e-06, "loss": 0.3946, "step": 9372 }, { "epoch": 0.5623687526249475, "grad_norm": 1.4320908784866333, "learning_rate": 2.9654487918033274e-06, "loss": 0.4093, "step": 9373 }, { "epoch": 0.5624287514249715, "grad_norm": 1.473007082939148, "learning_rate": 2.9647766148708355e-06, "loss": 0.3955, "step": 9374 }, { "epoch": 0.5624887502249954, "grad_norm": 1.2505066394805908, "learning_rate": 2.964104458151291e-06, "loss": 0.364, "step": 9375 }, { "epoch": 0.5625487490250195, "grad_norm": 1.2999297380447388, "learning_rate": 2.9634323216700803e-06, "loss": 0.4263, "step": 9376 }, { "epoch": 0.5626087478250436, "grad_norm": 1.1929839849472046, "learning_rate": 2.9627602054525864e-06, "loss": 0.3286, "step": 9377 }, { "epoch": 0.5626687466250675, "grad_norm": 1.2721333503723145, "learning_rate": 2.9620881095241914e-06, "loss": 0.3703, "step": 9378 }, { "epoch": 0.5627287454250915, "grad_norm": 1.2565319538116455, "learning_rate": 2.961416033910278e-06, "loss": 0.4218, "step": 9379 }, { "epoch": 0.5627887442251155, "grad_norm": 1.3040775060653687, "learning_rate": 2.960743978636227e-06, "loss": 0.3631, "step": 9380 }, { "epoch": 0.5628487430251395, "grad_norm": 1.3073348999023438, "learning_rate": 2.9600719437274186e-06, "loss": 0.3803, "step": 9381 }, { "epoch": 0.5629087418251635, "grad_norm": 1.2457023859024048, "learning_rate": 2.9593999292092346e-06, "loss": 0.3676, "step": 9382 }, { "epoch": 0.5629687406251875, "grad_norm": 1.3277363777160645, "learning_rate": 2.9587279351070505e-06, "loss": 0.4371, "step": 9383 }, { "epoch": 0.5630287394252115, "grad_norm": 1.384690523147583, "learning_rate": 2.958055961446248e-06, "loss": 0.3877, "step": 9384 }, { "epoch": 0.5630887382252355, "grad_norm": 1.31086266040802, "learning_rate": 2.957384008252201e-06, "loss": 0.3923, "step": 9385 }, { "epoch": 0.5631487370252595, "grad_norm": 1.2762993574142456, "learning_rate": 2.9567120755502884e-06, "loss": 0.4003, "step": 9386 }, { "epoch": 0.5632087358252835, "grad_norm": 1.4690377712249756, "learning_rate": 2.9560401633658863e-06, "loss": 0.4056, "step": 9387 }, { "epoch": 0.5632687346253075, "grad_norm": 1.264159917831421, "learning_rate": 2.955368271724368e-06, "loss": 0.3946, "step": 9388 }, { "epoch": 0.5633287334253315, "grad_norm": 1.1951555013656616, "learning_rate": 2.9546964006511095e-06, "loss": 0.3654, "step": 9389 }, { "epoch": 0.5633887322253555, "grad_norm": 1.5020431280136108, "learning_rate": 2.9540245501714834e-06, "loss": 0.4366, "step": 9390 }, { "epoch": 0.5634487310253795, "grad_norm": 1.239853858947754, "learning_rate": 2.953352720310863e-06, "loss": 0.3842, "step": 9391 }, { "epoch": 0.5635087298254035, "grad_norm": 1.4258856773376465, "learning_rate": 2.9526809110946187e-06, "loss": 0.4096, "step": 9392 }, { "epoch": 0.5635687286254275, "grad_norm": 1.2500181198120117, "learning_rate": 2.952009122548123e-06, "loss": 0.3997, "step": 9393 }, { "epoch": 0.5636287274254514, "grad_norm": 1.3066695928573608, "learning_rate": 2.951337354696747e-06, "loss": 0.3959, "step": 9394 }, { "epoch": 0.5636887262254755, "grad_norm": 1.1672236919403076, "learning_rate": 2.9506656075658585e-06, "loss": 0.4026, "step": 9395 }, { "epoch": 0.5637487250254994, "grad_norm": 1.348892092704773, "learning_rate": 2.9499938811808287e-06, "loss": 0.3836, "step": 9396 }, { "epoch": 0.5638087238255235, "grad_norm": 1.2507202625274658, "learning_rate": 2.949322175567022e-06, "loss": 0.3596, "step": 9397 }, { "epoch": 0.5638687226255474, "grad_norm": 1.4023667573928833, "learning_rate": 2.9486504907498084e-06, "loss": 0.4483, "step": 9398 }, { "epoch": 0.5639287214255715, "grad_norm": 1.2576560974121094, "learning_rate": 2.9479788267545548e-06, "loss": 0.4263, "step": 9399 }, { "epoch": 0.5639887202255955, "grad_norm": 1.441733479499817, "learning_rate": 2.9473071836066256e-06, "loss": 0.4724, "step": 9400 }, { "epoch": 0.5640487190256195, "grad_norm": 1.25565767288208, "learning_rate": 2.946635561331386e-06, "loss": 0.389, "step": 9401 }, { "epoch": 0.5641087178256435, "grad_norm": 1.3860723972320557, "learning_rate": 2.9459639599542e-06, "loss": 0.4239, "step": 9402 }, { "epoch": 0.5641687166256675, "grad_norm": 1.182437777519226, "learning_rate": 2.9452923795004317e-06, "loss": 0.3954, "step": 9403 }, { "epoch": 0.5642287154256915, "grad_norm": 1.2961634397506714, "learning_rate": 2.9446208199954416e-06, "loss": 0.4229, "step": 9404 }, { "epoch": 0.5642887142257155, "grad_norm": 1.1679587364196777, "learning_rate": 2.9439492814645936e-06, "loss": 0.3201, "step": 9405 }, { "epoch": 0.5643487130257395, "grad_norm": 1.1775388717651367, "learning_rate": 2.9432777639332484e-06, "loss": 0.3453, "step": 9406 }, { "epoch": 0.5644087118257635, "grad_norm": 1.215921401977539, "learning_rate": 2.942606267426765e-06, "loss": 0.3917, "step": 9407 }, { "epoch": 0.5644687106257875, "grad_norm": 1.299965500831604, "learning_rate": 2.941934791970504e-06, "loss": 0.3702, "step": 9408 }, { "epoch": 0.5645287094258115, "grad_norm": 1.3283666372299194, "learning_rate": 2.941263337589822e-06, "loss": 0.3885, "step": 9409 }, { "epoch": 0.5645887082258355, "grad_norm": 1.3486930131912231, "learning_rate": 2.940591904310078e-06, "loss": 0.4331, "step": 9410 }, { "epoch": 0.5646487070258595, "grad_norm": 1.1521812677383423, "learning_rate": 2.939920492156632e-06, "loss": 0.3963, "step": 9411 }, { "epoch": 0.5647087058258835, "grad_norm": 1.3273903131484985, "learning_rate": 2.939249101154834e-06, "loss": 0.3857, "step": 9412 }, { "epoch": 0.5647687046259074, "grad_norm": 1.1720387935638428, "learning_rate": 2.9385777313300455e-06, "loss": 0.3578, "step": 9413 }, { "epoch": 0.5648287034259315, "grad_norm": 1.3283685445785522, "learning_rate": 2.9379063827076155e-06, "loss": 0.4271, "step": 9414 }, { "epoch": 0.5648887022259554, "grad_norm": 1.1895103454589844, "learning_rate": 2.937235055312903e-06, "loss": 0.3989, "step": 9415 }, { "epoch": 0.5649487010259795, "grad_norm": 1.270734429359436, "learning_rate": 2.9365637491712566e-06, "loss": 0.3688, "step": 9416 }, { "epoch": 0.5650086998260034, "grad_norm": 1.3833677768707275, "learning_rate": 2.9358924643080306e-06, "loss": 0.3926, "step": 9417 }, { "epoch": 0.5650686986260275, "grad_norm": 1.2405941486358643, "learning_rate": 2.9352212007485778e-06, "loss": 0.4233, "step": 9418 }, { "epoch": 0.5651286974260514, "grad_norm": 1.390032172203064, "learning_rate": 2.9345499585182454e-06, "loss": 0.3909, "step": 9419 }, { "epoch": 0.5651886962260755, "grad_norm": 1.2282112836837769, "learning_rate": 2.933878737642386e-06, "loss": 0.3791, "step": 9420 }, { "epoch": 0.5652486950260994, "grad_norm": 1.4018148183822632, "learning_rate": 2.9332075381463467e-06, "loss": 0.3962, "step": 9421 }, { "epoch": 0.5653086938261235, "grad_norm": 1.1578489542007446, "learning_rate": 2.9325363600554765e-06, "loss": 0.3691, "step": 9422 }, { "epoch": 0.5653686926261475, "grad_norm": 1.2250195741653442, "learning_rate": 2.9318652033951234e-06, "loss": 0.3872, "step": 9423 }, { "epoch": 0.5654286914261715, "grad_norm": 1.3872075080871582, "learning_rate": 2.931194068190632e-06, "loss": 0.4202, "step": 9424 }, { "epoch": 0.5654886902261955, "grad_norm": 1.1879234313964844, "learning_rate": 2.93052295446735e-06, "loss": 0.3963, "step": 9425 }, { "epoch": 0.5655486890262195, "grad_norm": 1.1885992288589478, "learning_rate": 2.929851862250621e-06, "loss": 0.373, "step": 9426 }, { "epoch": 0.5656086878262435, "grad_norm": 1.3343573808670044, "learning_rate": 2.929180791565789e-06, "loss": 0.415, "step": 9427 }, { "epoch": 0.5656686866262675, "grad_norm": 1.299492359161377, "learning_rate": 2.9285097424381998e-06, "loss": 0.447, "step": 9428 }, { "epoch": 0.5657286854262915, "grad_norm": 1.2061070203781128, "learning_rate": 2.9278387148931918e-06, "loss": 0.393, "step": 9429 }, { "epoch": 0.5657886842263155, "grad_norm": 1.2548288106918335, "learning_rate": 2.927167708956111e-06, "loss": 0.4002, "step": 9430 }, { "epoch": 0.5658486830263395, "grad_norm": 1.3292226791381836, "learning_rate": 2.9264967246522935e-06, "loss": 0.3519, "step": 9431 }, { "epoch": 0.5659086818263634, "grad_norm": 1.3087021112442017, "learning_rate": 2.925825762007084e-06, "loss": 0.3961, "step": 9432 }, { "epoch": 0.5659686806263875, "grad_norm": 1.343227505683899, "learning_rate": 2.925154821045818e-06, "loss": 0.392, "step": 9433 }, { "epoch": 0.5660286794264114, "grad_norm": 1.2870898246765137, "learning_rate": 2.9244839017938355e-06, "loss": 0.4417, "step": 9434 }, { "epoch": 0.5660886782264355, "grad_norm": 1.2502022981643677, "learning_rate": 2.9238130042764754e-06, "loss": 0.3937, "step": 9435 }, { "epoch": 0.5661486770264594, "grad_norm": 1.296743392944336, "learning_rate": 2.923142128519072e-06, "loss": 0.341, "step": 9436 }, { "epoch": 0.5662086758264835, "grad_norm": 1.2482486963272095, "learning_rate": 2.922471274546963e-06, "loss": 0.4017, "step": 9437 }, { "epoch": 0.5662686746265074, "grad_norm": 1.1599986553192139, "learning_rate": 2.921800442385481e-06, "loss": 0.3637, "step": 9438 }, { "epoch": 0.5663286734265315, "grad_norm": 1.2944400310516357, "learning_rate": 2.9211296320599634e-06, "loss": 0.4065, "step": 9439 }, { "epoch": 0.5663886722265554, "grad_norm": 1.2794398069381714, "learning_rate": 2.920458843595742e-06, "loss": 0.4044, "step": 9440 }, { "epoch": 0.5664486710265795, "grad_norm": 1.332302212715149, "learning_rate": 2.919788077018149e-06, "loss": 0.4084, "step": 9441 }, { "epoch": 0.5665086698266034, "grad_norm": 1.3673681020736694, "learning_rate": 2.919117332352519e-06, "loss": 0.4078, "step": 9442 }, { "epoch": 0.5665686686266275, "grad_norm": 1.2570322751998901, "learning_rate": 2.9184466096241784e-06, "loss": 0.389, "step": 9443 }, { "epoch": 0.5666286674266515, "grad_norm": 1.3491848707199097, "learning_rate": 2.917775908858462e-06, "loss": 0.407, "step": 9444 }, { "epoch": 0.5666886662266755, "grad_norm": 1.2505754232406616, "learning_rate": 2.917105230080695e-06, "loss": 0.3803, "step": 9445 }, { "epoch": 0.5667486650266995, "grad_norm": 1.2986069917678833, "learning_rate": 2.916434573316208e-06, "loss": 0.3586, "step": 9446 }, { "epoch": 0.5668086638267235, "grad_norm": 1.1671451330184937, "learning_rate": 2.91576393859033e-06, "loss": 0.3945, "step": 9447 }, { "epoch": 0.5668686626267475, "grad_norm": 1.4930607080459595, "learning_rate": 2.915093325928385e-06, "loss": 0.4958, "step": 9448 }, { "epoch": 0.5669286614267715, "grad_norm": 1.2315839529037476, "learning_rate": 2.9144227353557005e-06, "loss": 0.3971, "step": 9449 }, { "epoch": 0.5669886602267955, "grad_norm": 1.2769622802734375, "learning_rate": 2.9137521668976015e-06, "loss": 0.4434, "step": 9450 }, { "epoch": 0.5670486590268194, "grad_norm": 1.3148325681686401, "learning_rate": 2.9130816205794115e-06, "loss": 0.4107, "step": 9451 }, { "epoch": 0.5671086578268435, "grad_norm": 1.3375695943832397, "learning_rate": 2.912411096426456e-06, "loss": 0.4287, "step": 9452 }, { "epoch": 0.5671686566268674, "grad_norm": 1.284196376800537, "learning_rate": 2.911740594464056e-06, "loss": 0.4283, "step": 9453 }, { "epoch": 0.5672286554268915, "grad_norm": 1.4486597776412964, "learning_rate": 2.9110701147175337e-06, "loss": 0.434, "step": 9454 }, { "epoch": 0.5672886542269154, "grad_norm": 1.2832320928573608, "learning_rate": 2.910399657212209e-06, "loss": 0.4317, "step": 9455 }, { "epoch": 0.5673486530269395, "grad_norm": 1.3254766464233398, "learning_rate": 2.909729221973404e-06, "loss": 0.3814, "step": 9456 }, { "epoch": 0.5674086518269634, "grad_norm": 1.290088176727295, "learning_rate": 2.909058809026437e-06, "loss": 0.3666, "step": 9457 }, { "epoch": 0.5674686506269875, "grad_norm": 1.2172735929489136, "learning_rate": 2.908388418396625e-06, "loss": 0.3836, "step": 9458 }, { "epoch": 0.5675286494270114, "grad_norm": 1.27768075466156, "learning_rate": 2.9077180501092894e-06, "loss": 0.3927, "step": 9459 }, { "epoch": 0.5675886482270355, "grad_norm": 1.3511217832565308, "learning_rate": 2.9070477041897423e-06, "loss": 0.4191, "step": 9460 }, { "epoch": 0.5676486470270594, "grad_norm": 1.192368984222412, "learning_rate": 2.9063773806633047e-06, "loss": 0.4201, "step": 9461 }, { "epoch": 0.5677086458270835, "grad_norm": 1.2595528364181519, "learning_rate": 2.905707079555286e-06, "loss": 0.3419, "step": 9462 }, { "epoch": 0.5677686446271074, "grad_norm": 1.321318507194519, "learning_rate": 2.9050368008910043e-06, "loss": 0.4013, "step": 9463 }, { "epoch": 0.5678286434271315, "grad_norm": 1.2836476564407349, "learning_rate": 2.9043665446957726e-06, "loss": 0.4042, "step": 9464 }, { "epoch": 0.5678886422271554, "grad_norm": 1.1947648525238037, "learning_rate": 2.9036963109949022e-06, "loss": 0.3859, "step": 9465 }, { "epoch": 0.5679486410271795, "grad_norm": 1.2935185432434082, "learning_rate": 2.9030260998137063e-06, "loss": 0.3973, "step": 9466 }, { "epoch": 0.5680086398272035, "grad_norm": 1.2706525325775146, "learning_rate": 2.902355911177494e-06, "loss": 0.4052, "step": 9467 }, { "epoch": 0.5680686386272275, "grad_norm": 1.363995909690857, "learning_rate": 2.901685745111576e-06, "loss": 0.3781, "step": 9468 }, { "epoch": 0.5681286374272515, "grad_norm": 1.2724062204360962, "learning_rate": 2.901015601641262e-06, "loss": 0.4169, "step": 9469 }, { "epoch": 0.5681886362272754, "grad_norm": 1.4048179388046265, "learning_rate": 2.900345480791859e-06, "loss": 0.4051, "step": 9470 }, { "epoch": 0.5682486350272995, "grad_norm": 1.195898175239563, "learning_rate": 2.8996753825886763e-06, "loss": 0.3889, "step": 9471 }, { "epoch": 0.5683086338273234, "grad_norm": 1.2986431121826172, "learning_rate": 2.8990053070570184e-06, "loss": 0.4221, "step": 9472 }, { "epoch": 0.5683686326273475, "grad_norm": 1.2790719270706177, "learning_rate": 2.898335254222193e-06, "loss": 0.3441, "step": 9473 }, { "epoch": 0.5684286314273714, "grad_norm": 1.2127254009246826, "learning_rate": 2.897665224109503e-06, "loss": 0.4085, "step": 9474 }, { "epoch": 0.5684886302273955, "grad_norm": 1.2848509550094604, "learning_rate": 2.896995216744252e-06, "loss": 0.396, "step": 9475 }, { "epoch": 0.5685486290274194, "grad_norm": 1.1124573945999146, "learning_rate": 2.896325232151747e-06, "loss": 0.3732, "step": 9476 }, { "epoch": 0.5686086278274435, "grad_norm": 1.320370078086853, "learning_rate": 2.8956552703572855e-06, "loss": 0.4028, "step": 9477 }, { "epoch": 0.5686686266274674, "grad_norm": 1.2737926244735718, "learning_rate": 2.894985331386172e-06, "loss": 0.3789, "step": 9478 }, { "epoch": 0.5687286254274915, "grad_norm": 1.2294158935546875, "learning_rate": 2.8943154152637056e-06, "loss": 0.3675, "step": 9479 }, { "epoch": 0.5687886242275154, "grad_norm": 1.2615649700164795, "learning_rate": 2.893645522015187e-06, "loss": 0.3944, "step": 9480 }, { "epoch": 0.5688486230275395, "grad_norm": 1.2685638666152954, "learning_rate": 2.892975651665915e-06, "loss": 0.4105, "step": 9481 }, { "epoch": 0.5689086218275634, "grad_norm": 1.3330435752868652, "learning_rate": 2.892305804241186e-06, "loss": 0.4226, "step": 9482 }, { "epoch": 0.5689686206275875, "grad_norm": 1.2104541063308716, "learning_rate": 2.891635979766299e-06, "loss": 0.3828, "step": 9483 }, { "epoch": 0.5690286194276114, "grad_norm": 1.2127962112426758, "learning_rate": 2.890966178266549e-06, "loss": 0.3934, "step": 9484 }, { "epoch": 0.5690886182276355, "grad_norm": 1.3147822618484497, "learning_rate": 2.8902963997672317e-06, "loss": 0.3997, "step": 9485 }, { "epoch": 0.5691486170276594, "grad_norm": 1.1814229488372803, "learning_rate": 2.8896266442936413e-06, "loss": 0.3341, "step": 9486 }, { "epoch": 0.5692086158276835, "grad_norm": 1.2123538255691528, "learning_rate": 2.8889569118710708e-06, "loss": 0.344, "step": 9487 }, { "epoch": 0.5692686146277074, "grad_norm": 1.3083033561706543, "learning_rate": 2.888287202524816e-06, "loss": 0.4089, "step": 9488 }, { "epoch": 0.5693286134277314, "grad_norm": 1.3231614828109741, "learning_rate": 2.8876175162801643e-06, "loss": 0.3723, "step": 9489 }, { "epoch": 0.5693886122277555, "grad_norm": 1.3011409044265747, "learning_rate": 2.8869478531624115e-06, "loss": 0.3901, "step": 9490 }, { "epoch": 0.5694486110277794, "grad_norm": 1.4798249006271362, "learning_rate": 2.886278213196843e-06, "loss": 0.3956, "step": 9491 }, { "epoch": 0.5695086098278035, "grad_norm": 1.4301131963729858, "learning_rate": 2.8856085964087512e-06, "loss": 0.4286, "step": 9492 }, { "epoch": 0.5695686086278274, "grad_norm": 1.4860460758209229, "learning_rate": 2.884939002823424e-06, "loss": 0.4308, "step": 9493 }, { "epoch": 0.5696286074278515, "grad_norm": 1.3087849617004395, "learning_rate": 2.8842694324661478e-06, "loss": 0.3696, "step": 9494 }, { "epoch": 0.5696886062278754, "grad_norm": 1.2484959363937378, "learning_rate": 2.8835998853622105e-06, "loss": 0.4103, "step": 9495 }, { "epoch": 0.5697486050278995, "grad_norm": 1.4493141174316406, "learning_rate": 2.8829303615368964e-06, "loss": 0.4065, "step": 9496 }, { "epoch": 0.5698086038279234, "grad_norm": 1.3656178712844849, "learning_rate": 2.882260861015492e-06, "loss": 0.4069, "step": 9497 }, { "epoch": 0.5698686026279475, "grad_norm": 1.2675968408584595, "learning_rate": 2.88159138382328e-06, "loss": 0.3822, "step": 9498 }, { "epoch": 0.5699286014279714, "grad_norm": 1.1621085405349731, "learning_rate": 2.8809219299855436e-06, "loss": 0.4084, "step": 9499 }, { "epoch": 0.5699886002279955, "grad_norm": 1.2777502536773682, "learning_rate": 2.8802524995275664e-06, "loss": 0.4342, "step": 9500 }, { "epoch": 0.5700485990280194, "grad_norm": 1.2652239799499512, "learning_rate": 2.8795830924746277e-06, "loss": 0.3914, "step": 9501 }, { "epoch": 0.5701085978280435, "grad_norm": 1.2116085290908813, "learning_rate": 2.87891370885201e-06, "loss": 0.3853, "step": 9502 }, { "epoch": 0.5701685966280674, "grad_norm": 1.422574758529663, "learning_rate": 2.878244348684991e-06, "loss": 0.3961, "step": 9503 }, { "epoch": 0.5702285954280915, "grad_norm": 1.2802883386611938, "learning_rate": 2.8775750119988495e-06, "loss": 0.3732, "step": 9504 }, { "epoch": 0.5702885942281154, "grad_norm": 1.2535982131958008, "learning_rate": 2.8769056988188666e-06, "loss": 0.4008, "step": 9505 }, { "epoch": 0.5703485930281395, "grad_norm": 1.258881688117981, "learning_rate": 2.8762364091703137e-06, "loss": 0.4046, "step": 9506 }, { "epoch": 0.5704085918281634, "grad_norm": 1.3293319940567017, "learning_rate": 2.8755671430784725e-06, "loss": 0.4286, "step": 9507 }, { "epoch": 0.5704685906281874, "grad_norm": 1.128566026687622, "learning_rate": 2.8748979005686134e-06, "loss": 0.4043, "step": 9508 }, { "epoch": 0.5705285894282114, "grad_norm": 1.409989356994629, "learning_rate": 2.874228681666013e-06, "loss": 0.4289, "step": 9509 }, { "epoch": 0.5705885882282354, "grad_norm": 1.3592575788497925, "learning_rate": 2.873559486395945e-06, "loss": 0.4127, "step": 9510 }, { "epoch": 0.5706485870282594, "grad_norm": 1.1646806001663208, "learning_rate": 2.8728903147836802e-06, "loss": 0.3722, "step": 9511 }, { "epoch": 0.5707085858282834, "grad_norm": 1.1362299919128418, "learning_rate": 2.872221166854492e-06, "loss": 0.4179, "step": 9512 }, { "epoch": 0.5707685846283075, "grad_norm": 1.230289101600647, "learning_rate": 2.8715520426336497e-06, "loss": 0.4226, "step": 9513 }, { "epoch": 0.5708285834283314, "grad_norm": 1.3415167331695557, "learning_rate": 2.870882942146424e-06, "loss": 0.3921, "step": 9514 }, { "epoch": 0.5708885822283555, "grad_norm": 1.1989344358444214, "learning_rate": 2.870213865418083e-06, "loss": 0.3908, "step": 9515 }, { "epoch": 0.5709485810283794, "grad_norm": 1.2344988584518433, "learning_rate": 2.8695448124738948e-06, "loss": 0.3824, "step": 9516 }, { "epoch": 0.5710085798284035, "grad_norm": 1.2853314876556396, "learning_rate": 2.868875783339127e-06, "loss": 0.3792, "step": 9517 }, { "epoch": 0.5710685786284274, "grad_norm": 1.1948599815368652, "learning_rate": 2.8682067780390454e-06, "loss": 0.3697, "step": 9518 }, { "epoch": 0.5711285774284515, "grad_norm": 1.2570877075195312, "learning_rate": 2.8675377965989158e-06, "loss": 0.403, "step": 9519 }, { "epoch": 0.5711885762284754, "grad_norm": 1.1400291919708252, "learning_rate": 2.866868839044001e-06, "loss": 0.3681, "step": 9520 }, { "epoch": 0.5712485750284995, "grad_norm": 1.2464746236801147, "learning_rate": 2.866199905399565e-06, "loss": 0.3789, "step": 9521 }, { "epoch": 0.5713085738285234, "grad_norm": 1.2687510251998901, "learning_rate": 2.865530995690873e-06, "loss": 0.3677, "step": 9522 }, { "epoch": 0.5713685726285475, "grad_norm": 1.1964192390441895, "learning_rate": 2.864862109943184e-06, "loss": 0.3816, "step": 9523 }, { "epoch": 0.5714285714285714, "grad_norm": 1.3479183912277222, "learning_rate": 2.8641932481817594e-06, "loss": 0.4022, "step": 9524 }, { "epoch": 0.5714885702285954, "grad_norm": 1.2495945692062378, "learning_rate": 2.8635244104318585e-06, "loss": 0.3667, "step": 9525 }, { "epoch": 0.5715485690286194, "grad_norm": 1.2571170330047607, "learning_rate": 2.8628555967187418e-06, "loss": 0.4077, "step": 9526 }, { "epoch": 0.5716085678286434, "grad_norm": 1.3054085969924927, "learning_rate": 2.862186807067666e-06, "loss": 0.3758, "step": 9527 }, { "epoch": 0.5716685666286674, "grad_norm": 1.2197458744049072, "learning_rate": 2.8615180415038878e-06, "loss": 0.4378, "step": 9528 }, { "epoch": 0.5717285654286914, "grad_norm": 1.2749415636062622, "learning_rate": 2.860849300052666e-06, "loss": 0.4018, "step": 9529 }, { "epoch": 0.5717885642287154, "grad_norm": 1.1782809495925903, "learning_rate": 2.860180582739253e-06, "loss": 0.3766, "step": 9530 }, { "epoch": 0.5718485630287394, "grad_norm": 1.3337023258209229, "learning_rate": 2.859511889588905e-06, "loss": 0.4072, "step": 9531 }, { "epoch": 0.5719085618287634, "grad_norm": 1.3282514810562134, "learning_rate": 2.858843220626874e-06, "loss": 0.3863, "step": 9532 }, { "epoch": 0.5719685606287874, "grad_norm": 1.2289079427719116, "learning_rate": 2.8581745758784128e-06, "loss": 0.3674, "step": 9533 }, { "epoch": 0.5720285594288115, "grad_norm": 1.2798566818237305, "learning_rate": 2.857505955368776e-06, "loss": 0.3847, "step": 9534 }, { "epoch": 0.5720885582288354, "grad_norm": 1.2979122400283813, "learning_rate": 2.85683735912321e-06, "loss": 0.4596, "step": 9535 }, { "epoch": 0.5721485570288595, "grad_norm": 1.2428250312805176, "learning_rate": 2.8561687871669686e-06, "loss": 0.3822, "step": 9536 }, { "epoch": 0.5722085558288834, "grad_norm": 1.4843412637710571, "learning_rate": 2.855500239525297e-06, "loss": 0.3641, "step": 9537 }, { "epoch": 0.5722685546289075, "grad_norm": 1.3086504936218262, "learning_rate": 2.854831716223447e-06, "loss": 0.3627, "step": 9538 }, { "epoch": 0.5723285534289314, "grad_norm": 1.409630537033081, "learning_rate": 2.8541632172866614e-06, "loss": 0.4049, "step": 9539 }, { "epoch": 0.5723885522289555, "grad_norm": 1.2624156475067139, "learning_rate": 2.8534947427401895e-06, "loss": 0.3961, "step": 9540 }, { "epoch": 0.5724485510289794, "grad_norm": 1.3594616651535034, "learning_rate": 2.8528262926092764e-06, "loss": 0.437, "step": 9541 }, { "epoch": 0.5725085498290035, "grad_norm": 1.2417200803756714, "learning_rate": 2.852157866919165e-06, "loss": 0.3589, "step": 9542 }, { "epoch": 0.5725685486290274, "grad_norm": 1.2510356903076172, "learning_rate": 2.8514894656951006e-06, "loss": 0.3942, "step": 9543 }, { "epoch": 0.5726285474290514, "grad_norm": 1.339532494544983, "learning_rate": 2.850821088962323e-06, "loss": 0.3875, "step": 9544 }, { "epoch": 0.5726885462290754, "grad_norm": 3.707090377807617, "learning_rate": 2.850152736746076e-06, "loss": 0.4438, "step": 9545 }, { "epoch": 0.5727485450290994, "grad_norm": 1.321914792060852, "learning_rate": 2.8494844090715994e-06, "loss": 0.4219, "step": 9546 }, { "epoch": 0.5728085438291234, "grad_norm": 1.2517294883728027, "learning_rate": 2.8488161059641323e-06, "loss": 0.3459, "step": 9547 }, { "epoch": 0.5728685426291474, "grad_norm": 1.2952470779418945, "learning_rate": 2.848147827448915e-06, "loss": 0.3787, "step": 9548 }, { "epoch": 0.5729285414291714, "grad_norm": 1.2339749336242676, "learning_rate": 2.8474795735511836e-06, "loss": 0.3456, "step": 9549 }, { "epoch": 0.5729885402291954, "grad_norm": 1.2800259590148926, "learning_rate": 2.8468113442961753e-06, "loss": 0.4107, "step": 9550 }, { "epoch": 0.5730485390292194, "grad_norm": 1.2823078632354736, "learning_rate": 2.846143139709129e-06, "loss": 0.4016, "step": 9551 }, { "epoch": 0.5731085378292434, "grad_norm": 1.2463586330413818, "learning_rate": 2.8454749598152743e-06, "loss": 0.3404, "step": 9552 }, { "epoch": 0.5731685366292674, "grad_norm": 1.2968031167984009, "learning_rate": 2.8448068046398516e-06, "loss": 0.4377, "step": 9553 }, { "epoch": 0.5732285354292914, "grad_norm": 1.3499009609222412, "learning_rate": 2.844138674208088e-06, "loss": 0.4309, "step": 9554 }, { "epoch": 0.5732885342293154, "grad_norm": 1.2030224800109863, "learning_rate": 2.843470568545221e-06, "loss": 0.3486, "step": 9555 }, { "epoch": 0.5733485330293394, "grad_norm": 1.1446343660354614, "learning_rate": 2.842802487676477e-06, "loss": 0.3782, "step": 9556 }, { "epoch": 0.5734085318293635, "grad_norm": 1.254815936088562, "learning_rate": 2.8421344316270898e-06, "loss": 0.3484, "step": 9557 }, { "epoch": 0.5734685306293874, "grad_norm": 1.252492904663086, "learning_rate": 2.841466400422289e-06, "loss": 0.3804, "step": 9558 }, { "epoch": 0.5735285294294115, "grad_norm": 1.3550143241882324, "learning_rate": 2.8407983940873006e-06, "loss": 0.3846, "step": 9559 }, { "epoch": 0.5735885282294354, "grad_norm": 1.259876012802124, "learning_rate": 2.8401304126473546e-06, "loss": 0.3803, "step": 9560 }, { "epoch": 0.5736485270294595, "grad_norm": 1.2734119892120361, "learning_rate": 2.8394624561276757e-06, "loss": 0.3973, "step": 9561 }, { "epoch": 0.5737085258294834, "grad_norm": 1.3565349578857422, "learning_rate": 2.8387945245534906e-06, "loss": 0.397, "step": 9562 }, { "epoch": 0.5737685246295074, "grad_norm": 1.3523585796356201, "learning_rate": 2.8381266179500244e-06, "loss": 0.3923, "step": 9563 }, { "epoch": 0.5738285234295314, "grad_norm": 1.272947907447815, "learning_rate": 2.8374587363424996e-06, "loss": 0.3985, "step": 9564 }, { "epoch": 0.5738885222295554, "grad_norm": 1.28315269947052, "learning_rate": 2.836790879756141e-06, "loss": 0.3874, "step": 9565 }, { "epoch": 0.5739485210295794, "grad_norm": 1.3422566652297974, "learning_rate": 2.836123048216168e-06, "loss": 0.4171, "step": 9566 }, { "epoch": 0.5740085198296034, "grad_norm": 1.1778091192245483, "learning_rate": 2.835455241747804e-06, "loss": 0.3805, "step": 9567 }, { "epoch": 0.5740685186296274, "grad_norm": 1.3852046728134155, "learning_rate": 2.834787460376266e-06, "loss": 0.4237, "step": 9568 }, { "epoch": 0.5741285174296514, "grad_norm": 1.3784061670303345, "learning_rate": 2.834119704126776e-06, "loss": 0.394, "step": 9569 }, { "epoch": 0.5741885162296754, "grad_norm": 1.2065320014953613, "learning_rate": 2.8334519730245515e-06, "loss": 0.4226, "step": 9570 }, { "epoch": 0.5742485150296994, "grad_norm": 1.1979831457138062, "learning_rate": 2.8327842670948084e-06, "loss": 0.364, "step": 9571 }, { "epoch": 0.5743085138297234, "grad_norm": 1.2770155668258667, "learning_rate": 2.832116586362765e-06, "loss": 0.3923, "step": 9572 }, { "epoch": 0.5743685126297474, "grad_norm": 1.2384076118469238, "learning_rate": 2.831448930853634e-06, "loss": 0.3771, "step": 9573 }, { "epoch": 0.5744285114297714, "grad_norm": 1.2148163318634033, "learning_rate": 2.830781300592631e-06, "loss": 0.3847, "step": 9574 }, { "epoch": 0.5744885102297954, "grad_norm": 1.2815752029418945, "learning_rate": 2.83011369560497e-06, "loss": 0.4046, "step": 9575 }, { "epoch": 0.5745485090298194, "grad_norm": 1.5234342813491821, "learning_rate": 2.8294461159158625e-06, "loss": 0.4982, "step": 9576 }, { "epoch": 0.5746085078298434, "grad_norm": 1.2258453369140625, "learning_rate": 2.8287785615505206e-06, "loss": 0.3741, "step": 9577 }, { "epoch": 0.5746685066298673, "grad_norm": 1.1543359756469727, "learning_rate": 2.8281110325341532e-06, "loss": 0.392, "step": 9578 }, { "epoch": 0.5747285054298914, "grad_norm": 1.2565782070159912, "learning_rate": 2.8274435288919724e-06, "loss": 0.3968, "step": 9579 }, { "epoch": 0.5747885042299155, "grad_norm": 1.3421711921691895, "learning_rate": 2.826776050649184e-06, "loss": 0.4327, "step": 9580 }, { "epoch": 0.5748485030299394, "grad_norm": 1.325461506843567, "learning_rate": 2.8261085978309966e-06, "loss": 0.3788, "step": 9581 }, { "epoch": 0.5749085018299634, "grad_norm": 1.3653782606124878, "learning_rate": 2.8254411704626195e-06, "loss": 0.3625, "step": 9582 }, { "epoch": 0.5749685006299874, "grad_norm": 1.2556265592575073, "learning_rate": 2.8247737685692538e-06, "loss": 0.4093, "step": 9583 }, { "epoch": 0.5750284994300114, "grad_norm": 1.2448272705078125, "learning_rate": 2.8241063921761084e-06, "loss": 0.3415, "step": 9584 }, { "epoch": 0.5750884982300354, "grad_norm": 1.2983291149139404, "learning_rate": 2.8234390413083835e-06, "loss": 0.3657, "step": 9585 }, { "epoch": 0.5751484970300594, "grad_norm": 1.3052175045013428, "learning_rate": 2.8227717159912842e-06, "loss": 0.3824, "step": 9586 }, { "epoch": 0.5752084958300834, "grad_norm": 1.2741729021072388, "learning_rate": 2.8221044162500123e-06, "loss": 0.3733, "step": 9587 }, { "epoch": 0.5752684946301074, "grad_norm": 1.3316218852996826, "learning_rate": 2.8214371421097675e-06, "loss": 0.4166, "step": 9588 }, { "epoch": 0.5753284934301314, "grad_norm": 1.440007209777832, "learning_rate": 2.820769893595751e-06, "loss": 0.4114, "step": 9589 }, { "epoch": 0.5753884922301554, "grad_norm": 1.285215139389038, "learning_rate": 2.8201026707331604e-06, "loss": 0.4047, "step": 9590 }, { "epoch": 0.5754484910301794, "grad_norm": 1.2931976318359375, "learning_rate": 2.8194354735471946e-06, "loss": 0.4859, "step": 9591 }, { "epoch": 0.5755084898302034, "grad_norm": 1.3417879343032837, "learning_rate": 2.8187683020630506e-06, "loss": 0.4004, "step": 9592 }, { "epoch": 0.5755684886302274, "grad_norm": 1.3172521591186523, "learning_rate": 2.8181011563059234e-06, "loss": 0.4005, "step": 9593 }, { "epoch": 0.5756284874302514, "grad_norm": 1.3275511264801025, "learning_rate": 2.81743403630101e-06, "loss": 0.4401, "step": 9594 }, { "epoch": 0.5756884862302754, "grad_norm": 1.216720700263977, "learning_rate": 2.8167669420735026e-06, "loss": 0.3805, "step": 9595 }, { "epoch": 0.5757484850302994, "grad_norm": 1.1320421695709229, "learning_rate": 2.816099873648595e-06, "loss": 0.3879, "step": 9596 }, { "epoch": 0.5758084838303233, "grad_norm": 1.2358431816101074, "learning_rate": 2.8154328310514792e-06, "loss": 0.3969, "step": 9597 }, { "epoch": 0.5758684826303474, "grad_norm": 1.3182625770568848, "learning_rate": 2.8147658143073455e-06, "loss": 0.385, "step": 9598 }, { "epoch": 0.5759284814303713, "grad_norm": 1.3422025442123413, "learning_rate": 2.8140988234413875e-06, "loss": 0.3781, "step": 9599 }, { "epoch": 0.5759884802303954, "grad_norm": 1.1746845245361328, "learning_rate": 2.813431858478789e-06, "loss": 0.3836, "step": 9600 }, { "epoch": 0.5760484790304194, "grad_norm": 1.4513962268829346, "learning_rate": 2.8127649194447443e-06, "loss": 0.4011, "step": 9601 }, { "epoch": 0.5761084778304434, "grad_norm": 1.2777217626571655, "learning_rate": 2.812098006364435e-06, "loss": 0.4134, "step": 9602 }, { "epoch": 0.5761684766304674, "grad_norm": 1.3251421451568604, "learning_rate": 2.811431119263051e-06, "loss": 0.3847, "step": 9603 }, { "epoch": 0.5762284754304914, "grad_norm": 1.2725049257278442, "learning_rate": 2.810764258165777e-06, "loss": 0.3874, "step": 9604 }, { "epoch": 0.5762884742305154, "grad_norm": 1.5634031295776367, "learning_rate": 2.810097423097796e-06, "loss": 0.4096, "step": 9605 }, { "epoch": 0.5763484730305394, "grad_norm": 1.1717427968978882, "learning_rate": 2.8094306140842925e-06, "loss": 0.3699, "step": 9606 }, { "epoch": 0.5764084718305634, "grad_norm": 1.222719430923462, "learning_rate": 2.808763831150448e-06, "loss": 0.4079, "step": 9607 }, { "epoch": 0.5764684706305874, "grad_norm": 1.1556823253631592, "learning_rate": 2.8080970743214456e-06, "loss": 0.3776, "step": 9608 }, { "epoch": 0.5765284694306114, "grad_norm": 1.3534648418426514, "learning_rate": 2.8074303436224634e-06, "loss": 0.3889, "step": 9609 }, { "epoch": 0.5765884682306354, "grad_norm": 1.3924739360809326, "learning_rate": 2.8067636390786816e-06, "loss": 0.4092, "step": 9610 }, { "epoch": 0.5766484670306594, "grad_norm": 1.3534663915634155, "learning_rate": 2.8060969607152796e-06, "loss": 0.4114, "step": 9611 }, { "epoch": 0.5767084658306834, "grad_norm": 1.272803544998169, "learning_rate": 2.8054303085574333e-06, "loss": 0.4169, "step": 9612 }, { "epoch": 0.5767684646307074, "grad_norm": 1.456404685974121, "learning_rate": 2.8047636826303204e-06, "loss": 0.3905, "step": 9613 }, { "epoch": 0.5768284634307314, "grad_norm": 1.4284071922302246, "learning_rate": 2.8040970829591146e-06, "loss": 0.3922, "step": 9614 }, { "epoch": 0.5768884622307554, "grad_norm": 1.2739014625549316, "learning_rate": 2.8034305095689916e-06, "loss": 0.3873, "step": 9615 }, { "epoch": 0.5769484610307793, "grad_norm": 1.2894824743270874, "learning_rate": 2.802763962485126e-06, "loss": 0.3529, "step": 9616 }, { "epoch": 0.5770084598308034, "grad_norm": 1.1608742475509644, "learning_rate": 2.802097441732688e-06, "loss": 0.3511, "step": 9617 }, { "epoch": 0.5770684586308273, "grad_norm": 1.326498031616211, "learning_rate": 2.8014309473368503e-06, "loss": 0.4224, "step": 9618 }, { "epoch": 0.5771284574308514, "grad_norm": 1.2185404300689697, "learning_rate": 2.8007644793227826e-06, "loss": 0.3558, "step": 9619 }, { "epoch": 0.5771884562308753, "grad_norm": 1.3374327421188354, "learning_rate": 2.8000980377156552e-06, "loss": 0.3985, "step": 9620 }, { "epoch": 0.5772484550308994, "grad_norm": 1.2766313552856445, "learning_rate": 2.7994316225406365e-06, "loss": 0.3752, "step": 9621 }, { "epoch": 0.5773084538309233, "grad_norm": 1.24949312210083, "learning_rate": 2.798765233822893e-06, "loss": 0.3568, "step": 9622 }, { "epoch": 0.5773684526309474, "grad_norm": 1.2056512832641602, "learning_rate": 2.7980988715875924e-06, "loss": 0.3932, "step": 9623 }, { "epoch": 0.5774284514309714, "grad_norm": 1.177912712097168, "learning_rate": 2.797432535859899e-06, "loss": 0.3476, "step": 9624 }, { "epoch": 0.5774884502309954, "grad_norm": 1.249556303024292, "learning_rate": 2.7967662266649786e-06, "loss": 0.4034, "step": 9625 }, { "epoch": 0.5775484490310194, "grad_norm": 1.3988690376281738, "learning_rate": 2.7960999440279928e-06, "loss": 0.4446, "step": 9626 }, { "epoch": 0.5776084478310434, "grad_norm": 1.1872012615203857, "learning_rate": 2.795433687974105e-06, "loss": 0.3748, "step": 9627 }, { "epoch": 0.5776684466310674, "grad_norm": 1.2803142070770264, "learning_rate": 2.7947674585284785e-06, "loss": 0.3921, "step": 9628 }, { "epoch": 0.5777284454310914, "grad_norm": 1.1601592302322388, "learning_rate": 2.79410125571627e-06, "loss": 0.361, "step": 9629 }, { "epoch": 0.5777884442311154, "grad_norm": 1.3660460710525513, "learning_rate": 2.7934350795626434e-06, "loss": 0.3925, "step": 9630 }, { "epoch": 0.5778484430311394, "grad_norm": 1.453865885734558, "learning_rate": 2.7927689300927526e-06, "loss": 0.4542, "step": 9631 }, { "epoch": 0.5779084418311634, "grad_norm": 1.3158670663833618, "learning_rate": 2.7921028073317574e-06, "loss": 0.385, "step": 9632 }, { "epoch": 0.5779684406311874, "grad_norm": 1.4803824424743652, "learning_rate": 2.7914367113048153e-06, "loss": 0.437, "step": 9633 }, { "epoch": 0.5780284394312114, "grad_norm": 1.199663758277893, "learning_rate": 2.7907706420370793e-06, "loss": 0.3811, "step": 9634 }, { "epoch": 0.5780884382312353, "grad_norm": 1.167934536933899, "learning_rate": 2.790104599553706e-06, "loss": 0.3386, "step": 9635 }, { "epoch": 0.5781484370312594, "grad_norm": 1.4498347043991089, "learning_rate": 2.7894385838798464e-06, "loss": 0.4082, "step": 9636 }, { "epoch": 0.5782084358312833, "grad_norm": 1.3362935781478882, "learning_rate": 2.7887725950406556e-06, "loss": 0.4223, "step": 9637 }, { "epoch": 0.5782684346313074, "grad_norm": 1.2753798961639404, "learning_rate": 2.7881066330612823e-06, "loss": 0.3834, "step": 9638 }, { "epoch": 0.5783284334313313, "grad_norm": 1.2828489542007446, "learning_rate": 2.7874406979668783e-06, "loss": 0.3905, "step": 9639 }, { "epoch": 0.5783884322313554, "grad_norm": 1.2047185897827148, "learning_rate": 2.7867747897825933e-06, "loss": 0.3715, "step": 9640 }, { "epoch": 0.5784484310313793, "grad_norm": 1.432550311088562, "learning_rate": 2.7861089085335744e-06, "loss": 0.4126, "step": 9641 }, { "epoch": 0.5785084298314034, "grad_norm": 1.2327065467834473, "learning_rate": 2.7854430542449706e-06, "loss": 0.3919, "step": 9642 }, { "epoch": 0.5785684286314273, "grad_norm": 1.4175379276275635, "learning_rate": 2.784777226941926e-06, "loss": 0.3856, "step": 9643 }, { "epoch": 0.5786284274314514, "grad_norm": 1.3745251893997192, "learning_rate": 2.784111426649586e-06, "loss": 0.409, "step": 9644 }, { "epoch": 0.5786884262314753, "grad_norm": 1.3067820072174072, "learning_rate": 2.7834456533930985e-06, "loss": 0.3978, "step": 9645 }, { "epoch": 0.5787484250314994, "grad_norm": 1.2991873025894165, "learning_rate": 2.7827799071976017e-06, "loss": 0.4042, "step": 9646 }, { "epoch": 0.5788084238315234, "grad_norm": 1.1801178455352783, "learning_rate": 2.7821141880882424e-06, "loss": 0.3952, "step": 9647 }, { "epoch": 0.5788684226315474, "grad_norm": 1.2361836433410645, "learning_rate": 2.7814484960901572e-06, "loss": 0.4114, "step": 9648 }, { "epoch": 0.5789284214315714, "grad_norm": 1.4386601448059082, "learning_rate": 2.7807828312284903e-06, "loss": 0.4425, "step": 9649 }, { "epoch": 0.5789884202315954, "grad_norm": 1.311785340309143, "learning_rate": 2.780117193528378e-06, "loss": 0.3691, "step": 9650 }, { "epoch": 0.5790484190316194, "grad_norm": 1.105989933013916, "learning_rate": 2.7794515830149603e-06, "loss": 0.3189, "step": 9651 }, { "epoch": 0.5791084178316434, "grad_norm": 1.3558610677719116, "learning_rate": 2.778785999713374e-06, "loss": 0.3845, "step": 9652 }, { "epoch": 0.5791684166316674, "grad_norm": 1.4063259363174438, "learning_rate": 2.7781204436487534e-06, "loss": 0.4038, "step": 9653 }, { "epoch": 0.5792284154316913, "grad_norm": 1.4072864055633545, "learning_rate": 2.7774549148462357e-06, "loss": 0.4002, "step": 9654 }, { "epoch": 0.5792884142317154, "grad_norm": 1.331497073173523, "learning_rate": 2.776789413330953e-06, "loss": 0.3944, "step": 9655 }, { "epoch": 0.5793484130317393, "grad_norm": 1.3857535123825073, "learning_rate": 2.7761239391280393e-06, "loss": 0.4288, "step": 9656 }, { "epoch": 0.5794084118317634, "grad_norm": 1.2274878025054932, "learning_rate": 2.7754584922626278e-06, "loss": 0.3804, "step": 9657 }, { "epoch": 0.5794684106317873, "grad_norm": 1.3682388067245483, "learning_rate": 2.774793072759846e-06, "loss": 0.4071, "step": 9658 }, { "epoch": 0.5795284094318114, "grad_norm": 1.3943301439285278, "learning_rate": 2.7741276806448283e-06, "loss": 0.3995, "step": 9659 }, { "epoch": 0.5795884082318353, "grad_norm": 1.3204495906829834, "learning_rate": 2.7734623159426985e-06, "loss": 0.3838, "step": 9660 }, { "epoch": 0.5796484070318594, "grad_norm": 1.294668197631836, "learning_rate": 2.7727969786785875e-06, "loss": 0.3827, "step": 9661 }, { "epoch": 0.5797084058318833, "grad_norm": 1.2433507442474365, "learning_rate": 2.772131668877623e-06, "loss": 0.3657, "step": 9662 }, { "epoch": 0.5797684046319074, "grad_norm": 1.2425627708435059, "learning_rate": 2.771466386564928e-06, "loss": 0.3747, "step": 9663 }, { "epoch": 0.5798284034319313, "grad_norm": 1.3169255256652832, "learning_rate": 2.770801131765629e-06, "loss": 0.3821, "step": 9664 }, { "epoch": 0.5798884022319554, "grad_norm": 1.231698989868164, "learning_rate": 2.7701359045048483e-06, "loss": 0.3995, "step": 9665 }, { "epoch": 0.5799484010319793, "grad_norm": 1.2216267585754395, "learning_rate": 2.7694707048077107e-06, "loss": 0.3938, "step": 9666 }, { "epoch": 0.5800083998320034, "grad_norm": 1.1516700983047485, "learning_rate": 2.7688055326993348e-06, "loss": 0.3894, "step": 9667 }, { "epoch": 0.5800683986320273, "grad_norm": 1.339869737625122, "learning_rate": 2.768140388204843e-06, "loss": 0.3686, "step": 9668 }, { "epoch": 0.5801283974320514, "grad_norm": 1.273429036140442, "learning_rate": 2.7674752713493545e-06, "loss": 0.4408, "step": 9669 }, { "epoch": 0.5801883962320754, "grad_norm": 1.310141682624817, "learning_rate": 2.766810182157988e-06, "loss": 0.3654, "step": 9670 }, { "epoch": 0.5802483950320994, "grad_norm": 1.39793062210083, "learning_rate": 2.7661451206558604e-06, "loss": 0.3927, "step": 9671 }, { "epoch": 0.5803083938321234, "grad_norm": 1.2058993577957153, "learning_rate": 2.7654800868680872e-06, "loss": 0.3512, "step": 9672 }, { "epoch": 0.5803683926321473, "grad_norm": 1.4906063079833984, "learning_rate": 2.764815080819784e-06, "loss": 0.4179, "step": 9673 }, { "epoch": 0.5804283914321714, "grad_norm": 1.148339867591858, "learning_rate": 2.7641501025360683e-06, "loss": 0.3696, "step": 9674 }, { "epoch": 0.5804883902321953, "grad_norm": 1.2472617626190186, "learning_rate": 2.763485152042049e-06, "loss": 0.3656, "step": 9675 }, { "epoch": 0.5805483890322194, "grad_norm": 1.258109211921692, "learning_rate": 2.7628202293628414e-06, "loss": 0.3796, "step": 9676 }, { "epoch": 0.5806083878322433, "grad_norm": 1.2237457036972046, "learning_rate": 2.762155334523553e-06, "loss": 0.3824, "step": 9677 }, { "epoch": 0.5806683866322674, "grad_norm": 1.144554615020752, "learning_rate": 2.7614904675492983e-06, "loss": 0.363, "step": 9678 }, { "epoch": 0.5807283854322913, "grad_norm": 1.3452563285827637, "learning_rate": 2.7608256284651817e-06, "loss": 0.415, "step": 9679 }, { "epoch": 0.5807883842323154, "grad_norm": 1.1675111055374146, "learning_rate": 2.760160817296315e-06, "loss": 0.3828, "step": 9680 }, { "epoch": 0.5808483830323393, "grad_norm": 1.5067979097366333, "learning_rate": 2.759496034067803e-06, "loss": 0.4369, "step": 9681 }, { "epoch": 0.5809083818323634, "grad_norm": 1.3568787574768066, "learning_rate": 2.7588312788047524e-06, "loss": 0.3695, "step": 9682 }, { "epoch": 0.5809683806323873, "grad_norm": 1.3529586791992188, "learning_rate": 2.7581665515322675e-06, "loss": 0.4249, "step": 9683 }, { "epoch": 0.5810283794324114, "grad_norm": 1.322477102279663, "learning_rate": 2.757501852275452e-06, "loss": 0.3971, "step": 9684 }, { "epoch": 0.5810883782324353, "grad_norm": 1.3003277778625488, "learning_rate": 2.7568371810594085e-06, "loss": 0.4197, "step": 9685 }, { "epoch": 0.5811483770324594, "grad_norm": 1.2854074239730835, "learning_rate": 2.7561725379092397e-06, "loss": 0.3777, "step": 9686 }, { "epoch": 0.5812083758324833, "grad_norm": 1.2291914224624634, "learning_rate": 2.7555079228500444e-06, "loss": 0.4065, "step": 9687 }, { "epoch": 0.5812683746325074, "grad_norm": 1.2763466835021973, "learning_rate": 2.754843335906924e-06, "loss": 0.4186, "step": 9688 }, { "epoch": 0.5813283734325313, "grad_norm": 1.2382574081420898, "learning_rate": 2.7541787771049747e-06, "loss": 0.4149, "step": 9689 }, { "epoch": 0.5813883722325554, "grad_norm": 1.249804139137268, "learning_rate": 2.753514246469296e-06, "loss": 0.3917, "step": 9690 }, { "epoch": 0.5814483710325794, "grad_norm": 1.238370656967163, "learning_rate": 2.752849744024982e-06, "loss": 0.3406, "step": 9691 }, { "epoch": 0.5815083698326033, "grad_norm": 1.361518144607544, "learning_rate": 2.752185269797129e-06, "loss": 0.3632, "step": 9692 }, { "epoch": 0.5815683686326274, "grad_norm": 1.1287505626678467, "learning_rate": 2.7515208238108334e-06, "loss": 0.3424, "step": 9693 }, { "epoch": 0.5816283674326513, "grad_norm": 1.277910590171814, "learning_rate": 2.750856406091184e-06, "loss": 0.4115, "step": 9694 }, { "epoch": 0.5816883662326754, "grad_norm": 1.129764437675476, "learning_rate": 2.7501920166632766e-06, "loss": 0.3666, "step": 9695 }, { "epoch": 0.5817483650326993, "grad_norm": 1.3801805973052979, "learning_rate": 2.7495276555521996e-06, "loss": 0.4555, "step": 9696 }, { "epoch": 0.5818083638327234, "grad_norm": 1.2709397077560425, "learning_rate": 2.7488633227830445e-06, "loss": 0.3774, "step": 9697 }, { "epoch": 0.5818683626327473, "grad_norm": 1.2348027229309082, "learning_rate": 2.7481990183809e-06, "loss": 0.4162, "step": 9698 }, { "epoch": 0.5819283614327714, "grad_norm": 1.323256492614746, "learning_rate": 2.747534742370852e-06, "loss": 0.3518, "step": 9699 }, { "epoch": 0.5819883602327953, "grad_norm": 1.3001458644866943, "learning_rate": 2.7468704947779903e-06, "loss": 0.4097, "step": 9700 }, { "epoch": 0.5820483590328194, "grad_norm": 1.2830994129180908, "learning_rate": 2.7462062756273974e-06, "loss": 0.3674, "step": 9701 }, { "epoch": 0.5821083578328433, "grad_norm": 1.2395304441452026, "learning_rate": 2.7455420849441597e-06, "loss": 0.364, "step": 9702 }, { "epoch": 0.5821683566328674, "grad_norm": 1.1371716260910034, "learning_rate": 2.7448779227533603e-06, "loss": 0.3951, "step": 9703 }, { "epoch": 0.5822283554328913, "grad_norm": 1.3902456760406494, "learning_rate": 2.74421378908008e-06, "loss": 0.42, "step": 9704 }, { "epoch": 0.5822883542329154, "grad_norm": 1.2609620094299316, "learning_rate": 2.743549683949404e-06, "loss": 0.4094, "step": 9705 }, { "epoch": 0.5823483530329393, "grad_norm": 1.2409816980361938, "learning_rate": 2.742885607386408e-06, "loss": 0.3984, "step": 9706 }, { "epoch": 0.5824083518329634, "grad_norm": 1.371550440788269, "learning_rate": 2.742221559416175e-06, "loss": 0.431, "step": 9707 }, { "epoch": 0.5824683506329873, "grad_norm": 1.3713254928588867, "learning_rate": 2.7415575400637793e-06, "loss": 0.4152, "step": 9708 }, { "epoch": 0.5825283494330113, "grad_norm": 1.2551276683807373, "learning_rate": 2.740893549354301e-06, "loss": 0.3933, "step": 9709 }, { "epoch": 0.5825883482330353, "grad_norm": 1.3082983493804932, "learning_rate": 2.740229587312815e-06, "loss": 0.4056, "step": 9710 }, { "epoch": 0.5826483470330593, "grad_norm": 1.3616145849227905, "learning_rate": 2.739565653964395e-06, "loss": 0.4422, "step": 9711 }, { "epoch": 0.5827083458330833, "grad_norm": 1.3482760190963745, "learning_rate": 2.7389017493341173e-06, "loss": 0.3824, "step": 9712 }, { "epoch": 0.5827683446331073, "grad_norm": 1.3733766078948975, "learning_rate": 2.7382378734470517e-06, "loss": 0.4587, "step": 9713 }, { "epoch": 0.5828283434331314, "grad_norm": 1.2953617572784424, "learning_rate": 2.737574026328271e-06, "loss": 0.3923, "step": 9714 }, { "epoch": 0.5828883422331553, "grad_norm": 1.3577282428741455, "learning_rate": 2.7369102080028466e-06, "loss": 0.3983, "step": 9715 }, { "epoch": 0.5829483410331794, "grad_norm": 1.3077486753463745, "learning_rate": 2.736246418495846e-06, "loss": 0.4113, "step": 9716 }, { "epoch": 0.5830083398332033, "grad_norm": 1.2105473279953003, "learning_rate": 2.7355826578323397e-06, "loss": 0.3785, "step": 9717 }, { "epoch": 0.5830683386332274, "grad_norm": 1.1588528156280518, "learning_rate": 2.734918926037393e-06, "loss": 0.4051, "step": 9718 }, { "epoch": 0.5831283374332513, "grad_norm": 1.2944591045379639, "learning_rate": 2.7342552231360734e-06, "loss": 0.3835, "step": 9719 }, { "epoch": 0.5831883362332754, "grad_norm": 1.3094704151153564, "learning_rate": 2.7335915491534444e-06, "loss": 0.4145, "step": 9720 }, { "epoch": 0.5832483350332993, "grad_norm": 1.2466254234313965, "learning_rate": 2.73292790411457e-06, "loss": 0.4318, "step": 9721 }, { "epoch": 0.5833083338333234, "grad_norm": 1.2119609117507935, "learning_rate": 2.7322642880445164e-06, "loss": 0.396, "step": 9722 }, { "epoch": 0.5833683326333473, "grad_norm": 1.0387614965438843, "learning_rate": 2.73160070096834e-06, "loss": 0.3298, "step": 9723 }, { "epoch": 0.5834283314333714, "grad_norm": 1.3842300176620483, "learning_rate": 2.7309371429111074e-06, "loss": 0.3948, "step": 9724 }, { "epoch": 0.5834883302333953, "grad_norm": 1.2049003839492798, "learning_rate": 2.7302736138978725e-06, "loss": 0.3625, "step": 9725 }, { "epoch": 0.5835483290334194, "grad_norm": 1.1911582946777344, "learning_rate": 2.729610113953697e-06, "loss": 0.3686, "step": 9726 }, { "epoch": 0.5836083278334433, "grad_norm": 1.200803518295288, "learning_rate": 2.7289466431036384e-06, "loss": 0.381, "step": 9727 }, { "epoch": 0.5836683266334673, "grad_norm": 1.2191768884658813, "learning_rate": 2.728283201372752e-06, "loss": 0.4215, "step": 9728 }, { "epoch": 0.5837283254334913, "grad_norm": 1.5470211505889893, "learning_rate": 2.727619788786093e-06, "loss": 0.3851, "step": 9729 }, { "epoch": 0.5837883242335153, "grad_norm": 1.1099854707717896, "learning_rate": 2.726956405368715e-06, "loss": 0.3854, "step": 9730 }, { "epoch": 0.5838483230335393, "grad_norm": 1.1196945905685425, "learning_rate": 2.7262930511456727e-06, "loss": 0.3516, "step": 9731 }, { "epoch": 0.5839083218335633, "grad_norm": 1.2590014934539795, "learning_rate": 2.725629726142016e-06, "loss": 0.4121, "step": 9732 }, { "epoch": 0.5839683206335873, "grad_norm": 1.2572321891784668, "learning_rate": 2.724966430382797e-06, "loss": 0.413, "step": 9733 }, { "epoch": 0.5840283194336113, "grad_norm": 1.4310758113861084, "learning_rate": 2.7243031638930646e-06, "loss": 0.3785, "step": 9734 }, { "epoch": 0.5840883182336353, "grad_norm": 1.2973389625549316, "learning_rate": 2.7236399266978677e-06, "loss": 0.3882, "step": 9735 }, { "epoch": 0.5841483170336593, "grad_norm": 1.3757610321044922, "learning_rate": 2.722976718822254e-06, "loss": 0.3873, "step": 9736 }, { "epoch": 0.5842083158336834, "grad_norm": 1.357696294784546, "learning_rate": 2.722313540291269e-06, "loss": 0.3847, "step": 9737 }, { "epoch": 0.5842683146337073, "grad_norm": 1.1855823993682861, "learning_rate": 2.7216503911299576e-06, "loss": 0.384, "step": 9738 }, { "epoch": 0.5843283134337314, "grad_norm": 1.4626375436782837, "learning_rate": 2.720987271363367e-06, "loss": 0.3664, "step": 9739 }, { "epoch": 0.5843883122337553, "grad_norm": 1.3608568906784058, "learning_rate": 2.7203241810165357e-06, "loss": 0.41, "step": 9740 }, { "epoch": 0.5844483110337794, "grad_norm": 1.2985312938690186, "learning_rate": 2.7196611201145095e-06, "loss": 0.4663, "step": 9741 }, { "epoch": 0.5845083098338033, "grad_norm": 1.3125956058502197, "learning_rate": 2.718998088682327e-06, "loss": 0.4047, "step": 9742 }, { "epoch": 0.5845683086338274, "grad_norm": 1.3939107656478882, "learning_rate": 2.7183350867450276e-06, "loss": 0.388, "step": 9743 }, { "epoch": 0.5846283074338513, "grad_norm": 1.446781039237976, "learning_rate": 2.7176721143276524e-06, "loss": 0.4433, "step": 9744 }, { "epoch": 0.5846883062338754, "grad_norm": 1.2799137830734253, "learning_rate": 2.717009171455236e-06, "loss": 0.4008, "step": 9745 }, { "epoch": 0.5847483050338993, "grad_norm": 1.3279063701629639, "learning_rate": 2.716346258152817e-06, "loss": 0.3679, "step": 9746 }, { "epoch": 0.5848083038339233, "grad_norm": 1.3492857217788696, "learning_rate": 2.7156833744454285e-06, "loss": 0.388, "step": 9747 }, { "epoch": 0.5848683026339473, "grad_norm": 1.3499560356140137, "learning_rate": 2.7150205203581066e-06, "loss": 0.4569, "step": 9748 }, { "epoch": 0.5849283014339713, "grad_norm": 1.4159722328186035, "learning_rate": 2.714357695915882e-06, "loss": 0.4294, "step": 9749 }, { "epoch": 0.5849883002339953, "grad_norm": 1.3483421802520752, "learning_rate": 2.7136949011437876e-06, "loss": 0.4128, "step": 9750 }, { "epoch": 0.5850482990340193, "grad_norm": 1.3919806480407715, "learning_rate": 2.7130321360668565e-06, "loss": 0.397, "step": 9751 }, { "epoch": 0.5851082978340433, "grad_norm": 1.1954985857009888, "learning_rate": 2.712369400710114e-06, "loss": 0.3853, "step": 9752 }, { "epoch": 0.5851682966340673, "grad_norm": 1.1927480697631836, "learning_rate": 2.711706695098593e-06, "loss": 0.3672, "step": 9753 }, { "epoch": 0.5852282954340913, "grad_norm": 1.2756198644638062, "learning_rate": 2.7110440192573164e-06, "loss": 0.3455, "step": 9754 }, { "epoch": 0.5852882942341153, "grad_norm": 1.3522965908050537, "learning_rate": 2.710381373211313e-06, "loss": 0.3764, "step": 9755 }, { "epoch": 0.5853482930341393, "grad_norm": 1.168677568435669, "learning_rate": 2.709718756985609e-06, "loss": 0.3183, "step": 9756 }, { "epoch": 0.5854082918341633, "grad_norm": 1.230670690536499, "learning_rate": 2.7090561706052263e-06, "loss": 0.3803, "step": 9757 }, { "epoch": 0.5854682906341874, "grad_norm": 1.432633638381958, "learning_rate": 2.708393614095189e-06, "loss": 0.3902, "step": 9758 }, { "epoch": 0.5855282894342113, "grad_norm": 1.1906801462173462, "learning_rate": 2.7077310874805177e-06, "loss": 0.3605, "step": 9759 }, { "epoch": 0.5855882882342354, "grad_norm": 1.2736197710037231, "learning_rate": 2.7070685907862347e-06, "loss": 0.3735, "step": 9760 }, { "epoch": 0.5856482870342593, "grad_norm": 1.421066403388977, "learning_rate": 2.706406124037357e-06, "loss": 0.3813, "step": 9761 }, { "epoch": 0.5857082858342834, "grad_norm": 1.1787089109420776, "learning_rate": 2.705743687258905e-06, "loss": 0.3872, "step": 9762 }, { "epoch": 0.5857682846343073, "grad_norm": 1.2525378465652466, "learning_rate": 2.7050812804758957e-06, "loss": 0.3516, "step": 9763 }, { "epoch": 0.5858282834343314, "grad_norm": 1.2715802192687988, "learning_rate": 2.7044189037133444e-06, "loss": 0.4044, "step": 9764 }, { "epoch": 0.5858882822343553, "grad_norm": 1.3024462461471558, "learning_rate": 2.703756556996267e-06, "loss": 0.4061, "step": 9765 }, { "epoch": 0.5859482810343793, "grad_norm": 1.2152693271636963, "learning_rate": 2.703094240349676e-06, "loss": 0.399, "step": 9766 }, { "epoch": 0.5860082798344033, "grad_norm": 1.5262447595596313, "learning_rate": 2.702431953798584e-06, "loss": 0.415, "step": 9767 }, { "epoch": 0.5860682786344273, "grad_norm": 1.3733540773391724, "learning_rate": 2.701769697368006e-06, "loss": 0.3912, "step": 9768 }, { "epoch": 0.5861282774344513, "grad_norm": 1.423423171043396, "learning_rate": 2.701107471082947e-06, "loss": 0.437, "step": 9769 }, { "epoch": 0.5861882762344753, "grad_norm": 1.3024262189865112, "learning_rate": 2.700445274968422e-06, "loss": 0.3698, "step": 9770 }, { "epoch": 0.5862482750344993, "grad_norm": 1.2092797756195068, "learning_rate": 2.6997831090494333e-06, "loss": 0.4129, "step": 9771 }, { "epoch": 0.5863082738345233, "grad_norm": 1.2795361280441284, "learning_rate": 2.6991209733509934e-06, "loss": 0.3774, "step": 9772 }, { "epoch": 0.5863682726345473, "grad_norm": 1.347348690032959, "learning_rate": 2.698458867898103e-06, "loss": 0.3693, "step": 9773 }, { "epoch": 0.5864282714345713, "grad_norm": 1.3250133991241455, "learning_rate": 2.6977967927157704e-06, "loss": 0.386, "step": 9774 }, { "epoch": 0.5864882702345953, "grad_norm": 1.320838212966919, "learning_rate": 2.6971347478289987e-06, "loss": 0.3787, "step": 9775 }, { "epoch": 0.5865482690346193, "grad_norm": 1.2818621397018433, "learning_rate": 2.696472733262789e-06, "loss": 0.357, "step": 9776 }, { "epoch": 0.5866082678346433, "grad_norm": 1.2879447937011719, "learning_rate": 2.695810749042144e-06, "loss": 0.4249, "step": 9777 }, { "epoch": 0.5866682666346673, "grad_norm": 1.2512608766555786, "learning_rate": 2.6951487951920625e-06, "loss": 0.3695, "step": 9778 }, { "epoch": 0.5867282654346913, "grad_norm": 1.3681600093841553, "learning_rate": 2.694486871737544e-06, "loss": 0.4169, "step": 9779 }, { "epoch": 0.5867882642347153, "grad_norm": 1.3171212673187256, "learning_rate": 2.693824978703587e-06, "loss": 0.4727, "step": 9780 }, { "epoch": 0.5868482630347394, "grad_norm": 1.3973742723464966, "learning_rate": 2.6931631161151875e-06, "loss": 0.3935, "step": 9781 }, { "epoch": 0.5869082618347633, "grad_norm": 1.2691545486450195, "learning_rate": 2.6925012839973416e-06, "loss": 0.3742, "step": 9782 }, { "epoch": 0.5869682606347874, "grad_norm": 1.2705234289169312, "learning_rate": 2.6918394823750425e-06, "loss": 0.3879, "step": 9783 }, { "epoch": 0.5870282594348113, "grad_norm": 1.1833451986312866, "learning_rate": 2.6911777112732836e-06, "loss": 0.3763, "step": 9784 }, { "epoch": 0.5870882582348353, "grad_norm": 1.3335317373275757, "learning_rate": 2.6905159707170587e-06, "loss": 0.3782, "step": 9785 }, { "epoch": 0.5871482570348593, "grad_norm": 1.347846508026123, "learning_rate": 2.689854260731357e-06, "loss": 0.425, "step": 9786 }, { "epoch": 0.5872082558348833, "grad_norm": 1.2749600410461426, "learning_rate": 2.6891925813411697e-06, "loss": 0.4003, "step": 9787 }, { "epoch": 0.5872682546349073, "grad_norm": 1.1917641162872314, "learning_rate": 2.6885309325714835e-06, "loss": 0.4113, "step": 9788 }, { "epoch": 0.5873282534349313, "grad_norm": 1.5171680450439453, "learning_rate": 2.687869314447288e-06, "loss": 0.4346, "step": 9789 }, { "epoch": 0.5873882522349553, "grad_norm": 1.263603925704956, "learning_rate": 2.6872077269935677e-06, "loss": 0.3685, "step": 9790 }, { "epoch": 0.5874482510349793, "grad_norm": 1.3320239782333374, "learning_rate": 2.6865461702353083e-06, "loss": 0.3802, "step": 9791 }, { "epoch": 0.5875082498350033, "grad_norm": 1.4589283466339111, "learning_rate": 2.6858846441974944e-06, "loss": 0.4229, "step": 9792 }, { "epoch": 0.5875682486350273, "grad_norm": 1.3080247640609741, "learning_rate": 2.6852231489051083e-06, "loss": 0.3771, "step": 9793 }, { "epoch": 0.5876282474350513, "grad_norm": 1.2145808935165405, "learning_rate": 2.6845616843831325e-06, "loss": 0.3872, "step": 9794 }, { "epoch": 0.5876882462350753, "grad_norm": 1.2889461517333984, "learning_rate": 2.6839002506565455e-06, "loss": 0.3343, "step": 9795 }, { "epoch": 0.5877482450350993, "grad_norm": 1.4100383520126343, "learning_rate": 2.6832388477503276e-06, "loss": 0.4281, "step": 9796 }, { "epoch": 0.5878082438351233, "grad_norm": 1.094869613647461, "learning_rate": 2.682577475689459e-06, "loss": 0.3357, "step": 9797 }, { "epoch": 0.5878682426351473, "grad_norm": 1.3243862390518188, "learning_rate": 2.6819161344989134e-06, "loss": 0.4043, "step": 9798 }, { "epoch": 0.5879282414351713, "grad_norm": 1.2998350858688354, "learning_rate": 2.6812548242036696e-06, "loss": 0.3684, "step": 9799 }, { "epoch": 0.5879882402351952, "grad_norm": 1.317641019821167, "learning_rate": 2.6805935448286994e-06, "loss": 0.3929, "step": 9800 }, { "epoch": 0.5880482390352193, "grad_norm": 1.353995680809021, "learning_rate": 2.67993229639898e-06, "loss": 0.4038, "step": 9801 }, { "epoch": 0.5881082378352432, "grad_norm": 1.3003342151641846, "learning_rate": 2.6792710789394788e-06, "loss": 0.3872, "step": 9802 }, { "epoch": 0.5881682366352673, "grad_norm": 1.3992946147918701, "learning_rate": 2.6786098924751706e-06, "loss": 0.371, "step": 9803 }, { "epoch": 0.5882282354352913, "grad_norm": 1.2644798755645752, "learning_rate": 2.6779487370310254e-06, "loss": 0.4056, "step": 9804 }, { "epoch": 0.5882882342353153, "grad_norm": 1.24687922000885, "learning_rate": 2.6772876126320104e-06, "loss": 0.4053, "step": 9805 }, { "epoch": 0.5883482330353393, "grad_norm": 1.284015417098999, "learning_rate": 2.6766265193030947e-06, "loss": 0.3745, "step": 9806 }, { "epoch": 0.5884082318353633, "grad_norm": 1.2583365440368652, "learning_rate": 2.6759654570692435e-06, "loss": 0.4133, "step": 9807 }, { "epoch": 0.5884682306353873, "grad_norm": 1.3315999507904053, "learning_rate": 2.6753044259554226e-06, "loss": 0.3934, "step": 9808 }, { "epoch": 0.5885282294354113, "grad_norm": 1.269648790359497, "learning_rate": 2.674643425986597e-06, "loss": 0.3756, "step": 9809 }, { "epoch": 0.5885882282354353, "grad_norm": 1.316767930984497, "learning_rate": 2.6739824571877283e-06, "loss": 0.3573, "step": 9810 }, { "epoch": 0.5886482270354593, "grad_norm": 1.371801495552063, "learning_rate": 2.6733215195837795e-06, "loss": 0.4274, "step": 9811 }, { "epoch": 0.5887082258354833, "grad_norm": 1.2578825950622559, "learning_rate": 2.6726606131997096e-06, "loss": 0.3646, "step": 9812 }, { "epoch": 0.5887682246355073, "grad_norm": 1.3833277225494385, "learning_rate": 2.6719997380604795e-06, "loss": 0.4112, "step": 9813 }, { "epoch": 0.5888282234355313, "grad_norm": 1.2427891492843628, "learning_rate": 2.6713388941910466e-06, "loss": 0.4238, "step": 9814 }, { "epoch": 0.5888882222355553, "grad_norm": 1.3742566108703613, "learning_rate": 2.6706780816163678e-06, "loss": 0.4362, "step": 9815 }, { "epoch": 0.5889482210355793, "grad_norm": 1.1746877431869507, "learning_rate": 2.6700173003614014e-06, "loss": 0.3237, "step": 9816 }, { "epoch": 0.5890082198356033, "grad_norm": 1.2398786544799805, "learning_rate": 2.669356550451098e-06, "loss": 0.378, "step": 9817 }, { "epoch": 0.5890682186356273, "grad_norm": 1.3120523691177368, "learning_rate": 2.6686958319104153e-06, "loss": 0.3965, "step": 9818 }, { "epoch": 0.5891282174356512, "grad_norm": 1.2682725191116333, "learning_rate": 2.668035144764302e-06, "loss": 0.4134, "step": 9819 }, { "epoch": 0.5891882162356753, "grad_norm": 1.2638700008392334, "learning_rate": 2.667374489037711e-06, "loss": 0.3799, "step": 9820 }, { "epoch": 0.5892482150356992, "grad_norm": 1.2374992370605469, "learning_rate": 2.666713864755593e-06, "loss": 0.3491, "step": 9821 }, { "epoch": 0.5893082138357233, "grad_norm": 1.3497908115386963, "learning_rate": 2.6660532719428952e-06, "loss": 0.445, "step": 9822 }, { "epoch": 0.5893682126357472, "grad_norm": 1.2326741218566895, "learning_rate": 2.665392710624566e-06, "loss": 0.3817, "step": 9823 }, { "epoch": 0.5894282114357713, "grad_norm": 1.2853758335113525, "learning_rate": 2.664732180825552e-06, "loss": 0.4149, "step": 9824 }, { "epoch": 0.5894882102357952, "grad_norm": 1.2682664394378662, "learning_rate": 2.6640716825707977e-06, "loss": 0.3703, "step": 9825 }, { "epoch": 0.5895482090358193, "grad_norm": 1.2493784427642822, "learning_rate": 2.663411215885248e-06, "loss": 0.4212, "step": 9826 }, { "epoch": 0.5896082078358433, "grad_norm": 1.3602596521377563, "learning_rate": 2.6627507807938443e-06, "loss": 0.3913, "step": 9827 }, { "epoch": 0.5896682066358673, "grad_norm": 1.306039571762085, "learning_rate": 2.66209037732153e-06, "loss": 0.4067, "step": 9828 }, { "epoch": 0.5897282054358913, "grad_norm": 1.3078287839889526, "learning_rate": 2.661430005493244e-06, "loss": 0.3902, "step": 9829 }, { "epoch": 0.5897882042359153, "grad_norm": 1.2563129663467407, "learning_rate": 2.660769665333927e-06, "loss": 0.3681, "step": 9830 }, { "epoch": 0.5898482030359393, "grad_norm": 1.2000515460968018, "learning_rate": 2.6601093568685148e-06, "loss": 0.3707, "step": 9831 }, { "epoch": 0.5899082018359633, "grad_norm": 1.1653077602386475, "learning_rate": 2.659449080121946e-06, "loss": 0.3538, "step": 9832 }, { "epoch": 0.5899682006359873, "grad_norm": 1.3579213619232178, "learning_rate": 2.658788835119157e-06, "loss": 0.3465, "step": 9833 }, { "epoch": 0.5900281994360113, "grad_norm": 1.4216309785842896, "learning_rate": 2.6581286218850804e-06, "loss": 0.3953, "step": 9834 }, { "epoch": 0.5900881982360353, "grad_norm": 1.4590250253677368, "learning_rate": 2.6574684404446506e-06, "loss": 0.4061, "step": 9835 }, { "epoch": 0.5901481970360593, "grad_norm": 1.226420521736145, "learning_rate": 2.656808290822799e-06, "loss": 0.3663, "step": 9836 }, { "epoch": 0.5902081958360833, "grad_norm": 1.3931822776794434, "learning_rate": 2.656148173044456e-06, "loss": 0.4158, "step": 9837 }, { "epoch": 0.5902681946361072, "grad_norm": 1.1782680749893188, "learning_rate": 2.655488087134553e-06, "loss": 0.3422, "step": 9838 }, { "epoch": 0.5903281934361313, "grad_norm": 1.3513782024383545, "learning_rate": 2.6548280331180166e-06, "loss": 0.391, "step": 9839 }, { "epoch": 0.5903881922361552, "grad_norm": 1.2018623352050781, "learning_rate": 2.654168011019776e-06, "loss": 0.3916, "step": 9840 }, { "epoch": 0.5904481910361793, "grad_norm": 1.2667473554611206, "learning_rate": 2.653508020864755e-06, "loss": 0.4254, "step": 9841 }, { "epoch": 0.5905081898362032, "grad_norm": 1.3582168817520142, "learning_rate": 2.65284806267788e-06, "loss": 0.4269, "step": 9842 }, { "epoch": 0.5905681886362273, "grad_norm": 1.462226152420044, "learning_rate": 2.6521881364840733e-06, "loss": 0.3963, "step": 9843 }, { "epoch": 0.5906281874362512, "grad_norm": 1.257827877998352, "learning_rate": 2.6515282423082574e-06, "loss": 0.3756, "step": 9844 }, { "epoch": 0.5906881862362753, "grad_norm": 1.4598468542099, "learning_rate": 2.6508683801753565e-06, "loss": 0.4044, "step": 9845 }, { "epoch": 0.5907481850362992, "grad_norm": 1.2813317775726318, "learning_rate": 2.6502085501102853e-06, "loss": 0.3953, "step": 9846 }, { "epoch": 0.5908081838363233, "grad_norm": 1.5535869598388672, "learning_rate": 2.649548752137969e-06, "loss": 0.3754, "step": 9847 }, { "epoch": 0.5908681826363473, "grad_norm": 1.3125488758087158, "learning_rate": 2.6488889862833188e-06, "loss": 0.3707, "step": 9848 }, { "epoch": 0.5909281814363713, "grad_norm": 1.2527958154678345, "learning_rate": 2.6482292525712544e-06, "loss": 0.3789, "step": 9849 }, { "epoch": 0.5909881802363953, "grad_norm": 1.308078408241272, "learning_rate": 2.6475695510266918e-06, "loss": 0.3662, "step": 9850 }, { "epoch": 0.5910481790364193, "grad_norm": 1.155196189880371, "learning_rate": 2.6469098816745425e-06, "loss": 0.3543, "step": 9851 }, { "epoch": 0.5911081778364433, "grad_norm": 1.1605737209320068, "learning_rate": 2.6462502445397212e-06, "loss": 0.379, "step": 9852 }, { "epoch": 0.5911681766364673, "grad_norm": 1.0897715091705322, "learning_rate": 2.6455906396471376e-06, "loss": 0.3408, "step": 9853 }, { "epoch": 0.5912281754364913, "grad_norm": 1.3036341667175293, "learning_rate": 2.6449310670217035e-06, "loss": 0.41, "step": 9854 }, { "epoch": 0.5912881742365153, "grad_norm": 1.245686650276184, "learning_rate": 2.644271526688327e-06, "loss": 0.4652, "step": 9855 }, { "epoch": 0.5913481730365393, "grad_norm": 1.2309608459472656, "learning_rate": 2.643612018671916e-06, "loss": 0.3896, "step": 9856 }, { "epoch": 0.5914081718365632, "grad_norm": 1.1808778047561646, "learning_rate": 2.642952542997378e-06, "loss": 0.3309, "step": 9857 }, { "epoch": 0.5914681706365873, "grad_norm": 1.3980438709259033, "learning_rate": 2.642293099689617e-06, "loss": 0.3958, "step": 9858 }, { "epoch": 0.5915281694366112, "grad_norm": 1.356894850730896, "learning_rate": 2.6416336887735385e-06, "loss": 0.3779, "step": 9859 }, { "epoch": 0.5915881682366353, "grad_norm": 1.4345953464508057, "learning_rate": 2.6409743102740448e-06, "loss": 0.3885, "step": 9860 }, { "epoch": 0.5916481670366592, "grad_norm": 1.1433991193771362, "learning_rate": 2.6403149642160366e-06, "loss": 0.3835, "step": 9861 }, { "epoch": 0.5917081658366833, "grad_norm": 1.3670116662979126, "learning_rate": 2.639655650624417e-06, "loss": 0.3974, "step": 9862 }, { "epoch": 0.5917681646367072, "grad_norm": 1.2361841201782227, "learning_rate": 2.638996369524083e-06, "loss": 0.3324, "step": 9863 }, { "epoch": 0.5918281634367313, "grad_norm": 1.3201045989990234, "learning_rate": 2.6383371209399344e-06, "loss": 0.3758, "step": 9864 }, { "epoch": 0.5918881622367552, "grad_norm": 1.362177848815918, "learning_rate": 2.6376779048968652e-06, "loss": 0.4294, "step": 9865 }, { "epoch": 0.5919481610367793, "grad_norm": 1.405508041381836, "learning_rate": 2.6370187214197734e-06, "loss": 0.38, "step": 9866 }, { "epoch": 0.5920081598368032, "grad_norm": 1.1139380931854248, "learning_rate": 2.636359570533554e-06, "loss": 0.3857, "step": 9867 }, { "epoch": 0.5920681586368273, "grad_norm": 1.4251843690872192, "learning_rate": 2.6357004522630973e-06, "loss": 0.3673, "step": 9868 }, { "epoch": 0.5921281574368512, "grad_norm": 1.4090074300765991, "learning_rate": 2.6350413666332983e-06, "loss": 0.4263, "step": 9869 }, { "epoch": 0.5921881562368753, "grad_norm": 1.3328468799591064, "learning_rate": 2.6343823136690453e-06, "loss": 0.3547, "step": 9870 }, { "epoch": 0.5922481550368993, "grad_norm": 1.3093960285186768, "learning_rate": 2.633723293395229e-06, "loss": 0.359, "step": 9871 }, { "epoch": 0.5923081538369233, "grad_norm": 1.137304425239563, "learning_rate": 2.6330643058367362e-06, "loss": 0.3694, "step": 9872 }, { "epoch": 0.5923681526369473, "grad_norm": 1.321396827697754, "learning_rate": 2.632405351018455e-06, "loss": 0.4247, "step": 9873 }, { "epoch": 0.5924281514369713, "grad_norm": 1.2637983560562134, "learning_rate": 2.6317464289652724e-06, "loss": 0.3936, "step": 9874 }, { "epoch": 0.5924881502369953, "grad_norm": 1.2250573635101318, "learning_rate": 2.6310875397020704e-06, "loss": 0.351, "step": 9875 }, { "epoch": 0.5925481490370192, "grad_norm": 1.1324427127838135, "learning_rate": 2.630428683253733e-06, "loss": 0.3614, "step": 9876 }, { "epoch": 0.5926081478370433, "grad_norm": 1.141701340675354, "learning_rate": 2.6297698596451425e-06, "loss": 0.374, "step": 9877 }, { "epoch": 0.5926681466370672, "grad_norm": 1.2337967157363892, "learning_rate": 2.629111068901179e-06, "loss": 0.3646, "step": 9878 }, { "epoch": 0.5927281454370913, "grad_norm": 1.2097151279449463, "learning_rate": 2.6284523110467243e-06, "loss": 0.3593, "step": 9879 }, { "epoch": 0.5927881442371152, "grad_norm": 1.2825441360473633, "learning_rate": 2.627793586106654e-06, "loss": 0.3814, "step": 9880 }, { "epoch": 0.5928481430371393, "grad_norm": 1.3390930891036987, "learning_rate": 2.627134894105847e-06, "loss": 0.42, "step": 9881 }, { "epoch": 0.5929081418371632, "grad_norm": 1.309127688407898, "learning_rate": 2.6264762350691772e-06, "loss": 0.4087, "step": 9882 }, { "epoch": 0.5929681406371873, "grad_norm": 1.2677046060562134, "learning_rate": 2.625817609021521e-06, "loss": 0.3686, "step": 9883 }, { "epoch": 0.5930281394372112, "grad_norm": 1.1838139295578003, "learning_rate": 2.625159015987751e-06, "loss": 0.3813, "step": 9884 }, { "epoch": 0.5930881382372353, "grad_norm": 1.2897900342941284, "learning_rate": 2.6245004559927384e-06, "loss": 0.4119, "step": 9885 }, { "epoch": 0.5931481370372592, "grad_norm": 1.2091139554977417, "learning_rate": 2.6238419290613562e-06, "loss": 0.4169, "step": 9886 }, { "epoch": 0.5932081358372833, "grad_norm": 1.4222016334533691, "learning_rate": 2.6231834352184713e-06, "loss": 0.4269, "step": 9887 }, { "epoch": 0.5932681346373072, "grad_norm": 1.408482313156128, "learning_rate": 2.622524974488954e-06, "loss": 0.3868, "step": 9888 }, { "epoch": 0.5933281334373313, "grad_norm": 1.3483896255493164, "learning_rate": 2.62186654689767e-06, "loss": 0.3736, "step": 9889 }, { "epoch": 0.5933881322373552, "grad_norm": 1.4254080057144165, "learning_rate": 2.6212081524694857e-06, "loss": 0.4276, "step": 9890 }, { "epoch": 0.5934481310373793, "grad_norm": 1.3113967180252075, "learning_rate": 2.6205497912292673e-06, "loss": 0.3699, "step": 9891 }, { "epoch": 0.5935081298374032, "grad_norm": 1.2753022909164429, "learning_rate": 2.6198914632018744e-06, "loss": 0.4227, "step": 9892 }, { "epoch": 0.5935681286374273, "grad_norm": 1.202728509902954, "learning_rate": 2.619233168412173e-06, "loss": 0.3492, "step": 9893 }, { "epoch": 0.5936281274374513, "grad_norm": 1.3740497827529907, "learning_rate": 2.61857490688502e-06, "loss": 0.4176, "step": 9894 }, { "epoch": 0.5936881262374752, "grad_norm": 1.2603766918182373, "learning_rate": 2.6179166786452795e-06, "loss": 0.3673, "step": 9895 }, { "epoch": 0.5937481250374993, "grad_norm": 1.2463144063949585, "learning_rate": 2.6172584837178056e-06, "loss": 0.3523, "step": 9896 }, { "epoch": 0.5938081238375232, "grad_norm": 1.3777211904525757, "learning_rate": 2.6166003221274566e-06, "loss": 0.4605, "step": 9897 }, { "epoch": 0.5938681226375473, "grad_norm": 1.396665334701538, "learning_rate": 2.61594219389909e-06, "loss": 0.4458, "step": 9898 }, { "epoch": 0.5939281214375712, "grad_norm": 1.2676316499710083, "learning_rate": 2.6152840990575582e-06, "loss": 0.4495, "step": 9899 }, { "epoch": 0.5939881202375953, "grad_norm": 1.3191505670547485, "learning_rate": 2.614626037627716e-06, "loss": 0.3948, "step": 9900 }, { "epoch": 0.5940481190376192, "grad_norm": 1.2426204681396484, "learning_rate": 2.6139680096344135e-06, "loss": 0.3682, "step": 9901 }, { "epoch": 0.5941081178376433, "grad_norm": 1.3579193353652954, "learning_rate": 2.6133100151025033e-06, "loss": 0.3895, "step": 9902 }, { "epoch": 0.5941681166376672, "grad_norm": 1.3268009424209595, "learning_rate": 2.612652054056834e-06, "loss": 0.3932, "step": 9903 }, { "epoch": 0.5942281154376913, "grad_norm": 1.4773776531219482, "learning_rate": 2.6119941265222536e-06, "loss": 0.4307, "step": 9904 }, { "epoch": 0.5942881142377152, "grad_norm": 1.2957158088684082, "learning_rate": 2.61133623252361e-06, "loss": 0.3489, "step": 9905 }, { "epoch": 0.5943481130377393, "grad_norm": 1.2739216089248657, "learning_rate": 2.6106783720857474e-06, "loss": 0.3407, "step": 9906 }, { "epoch": 0.5944081118377632, "grad_norm": 1.2488971948623657, "learning_rate": 2.6100205452335103e-06, "loss": 0.3888, "step": 9907 }, { "epoch": 0.5944681106377873, "grad_norm": 1.4251248836517334, "learning_rate": 2.6093627519917443e-06, "loss": 0.3636, "step": 9908 }, { "epoch": 0.5945281094378112, "grad_norm": 1.2760558128356934, "learning_rate": 2.6087049923852873e-06, "loss": 0.3787, "step": 9909 }, { "epoch": 0.5945881082378353, "grad_norm": 1.1840569972991943, "learning_rate": 2.6080472664389846e-06, "loss": 0.3752, "step": 9910 }, { "epoch": 0.5946481070378592, "grad_norm": 1.2720640897750854, "learning_rate": 2.60738957417767e-06, "loss": 0.3546, "step": 9911 }, { "epoch": 0.5947081058378832, "grad_norm": 1.281294822692871, "learning_rate": 2.6067319156261867e-06, "loss": 0.3837, "step": 9912 }, { "epoch": 0.5947681046379072, "grad_norm": 1.4597102403640747, "learning_rate": 2.606074290809368e-06, "loss": 0.4491, "step": 9913 }, { "epoch": 0.5948281034379312, "grad_norm": 1.3119909763336182, "learning_rate": 2.605416699752051e-06, "loss": 0.4159, "step": 9914 }, { "epoch": 0.5948881022379553, "grad_norm": 1.3307807445526123, "learning_rate": 2.6047591424790702e-06, "loss": 0.3972, "step": 9915 }, { "epoch": 0.5949481010379792, "grad_norm": 1.2291220426559448, "learning_rate": 2.604101619015257e-06, "loss": 0.3855, "step": 9916 }, { "epoch": 0.5950080998380033, "grad_norm": 1.3193202018737793, "learning_rate": 2.6034441293854447e-06, "loss": 0.3827, "step": 9917 }, { "epoch": 0.5950680986380272, "grad_norm": 1.3899155855178833, "learning_rate": 2.6027866736144624e-06, "loss": 0.4311, "step": 9918 }, { "epoch": 0.5951280974380513, "grad_norm": 1.2924549579620361, "learning_rate": 2.602129251727139e-06, "loss": 0.4317, "step": 9919 }, { "epoch": 0.5951880962380752, "grad_norm": 1.2653300762176514, "learning_rate": 2.601471863748305e-06, "loss": 0.4099, "step": 9920 }, { "epoch": 0.5952480950380993, "grad_norm": 1.1192662715911865, "learning_rate": 2.600814509702783e-06, "loss": 0.3421, "step": 9921 }, { "epoch": 0.5953080938381232, "grad_norm": 1.1589845418930054, "learning_rate": 2.6001571896154018e-06, "loss": 0.3583, "step": 9922 }, { "epoch": 0.5953680926381473, "grad_norm": 1.2630062103271484, "learning_rate": 2.5994999035109824e-06, "loss": 0.4046, "step": 9923 }, { "epoch": 0.5954280914381712, "grad_norm": 1.3940575122833252, "learning_rate": 2.5988426514143514e-06, "loss": 0.3962, "step": 9924 }, { "epoch": 0.5954880902381953, "grad_norm": 1.3464171886444092, "learning_rate": 2.598185433350325e-06, "loss": 0.4008, "step": 9925 }, { "epoch": 0.5955480890382192, "grad_norm": 1.35613214969635, "learning_rate": 2.5975282493437272e-06, "loss": 0.4549, "step": 9926 }, { "epoch": 0.5956080878382433, "grad_norm": 1.1742730140686035, "learning_rate": 2.5968710994193765e-06, "loss": 0.3561, "step": 9927 }, { "epoch": 0.5956680866382672, "grad_norm": 1.265430212020874, "learning_rate": 2.5962139836020886e-06, "loss": 0.4164, "step": 9928 }, { "epoch": 0.5957280854382913, "grad_norm": 1.1980879306793213, "learning_rate": 2.595556901916682e-06, "loss": 0.3535, "step": 9929 }, { "epoch": 0.5957880842383152, "grad_norm": 1.3372948169708252, "learning_rate": 2.59489985438797e-06, "loss": 0.3923, "step": 9930 }, { "epoch": 0.5958480830383392, "grad_norm": 1.243314504623413, "learning_rate": 2.5942428410407664e-06, "loss": 0.375, "step": 9931 }, { "epoch": 0.5959080818383632, "grad_norm": 1.3241782188415527, "learning_rate": 2.593585861899885e-06, "loss": 0.3844, "step": 9932 }, { "epoch": 0.5959680806383872, "grad_norm": 1.2259540557861328, "learning_rate": 2.5929289169901354e-06, "loss": 0.4065, "step": 9933 }, { "epoch": 0.5960280794384112, "grad_norm": 1.31752610206604, "learning_rate": 2.592272006336329e-06, "loss": 0.4401, "step": 9934 }, { "epoch": 0.5960880782384352, "grad_norm": 1.3232074975967407, "learning_rate": 2.591615129963272e-06, "loss": 0.4017, "step": 9935 }, { "epoch": 0.5961480770384592, "grad_norm": 1.3652145862579346, "learning_rate": 2.590958287895774e-06, "loss": 0.3993, "step": 9936 }, { "epoch": 0.5962080758384832, "grad_norm": 1.313094139099121, "learning_rate": 2.590301480158639e-06, "loss": 0.3773, "step": 9937 }, { "epoch": 0.5962680746385073, "grad_norm": 1.5478795766830444, "learning_rate": 2.589644706776672e-06, "loss": 0.4431, "step": 9938 }, { "epoch": 0.5963280734385312, "grad_norm": 1.2208988666534424, "learning_rate": 2.5889879677746787e-06, "loss": 0.3763, "step": 9939 }, { "epoch": 0.5963880722385553, "grad_norm": 1.4254752397537231, "learning_rate": 2.588331263177457e-06, "loss": 0.4221, "step": 9940 }, { "epoch": 0.5964480710385792, "grad_norm": 1.1951594352722168, "learning_rate": 2.587674593009813e-06, "loss": 0.3471, "step": 9941 }, { "epoch": 0.5965080698386033, "grad_norm": 1.4771199226379395, "learning_rate": 2.58701795729654e-06, "loss": 0.3846, "step": 9942 }, { "epoch": 0.5965680686386272, "grad_norm": 1.3093388080596924, "learning_rate": 2.5863613560624407e-06, "loss": 0.4179, "step": 9943 }, { "epoch": 0.5966280674386513, "grad_norm": 1.3184959888458252, "learning_rate": 2.5857047893323105e-06, "loss": 0.3992, "step": 9944 }, { "epoch": 0.5966880662386752, "grad_norm": 1.1908067464828491, "learning_rate": 2.5850482571309444e-06, "loss": 0.4251, "step": 9945 }, { "epoch": 0.5967480650386993, "grad_norm": 1.2183555364608765, "learning_rate": 2.584391759483138e-06, "loss": 0.3834, "step": 9946 }, { "epoch": 0.5968080638387232, "grad_norm": 1.3449859619140625, "learning_rate": 2.5837352964136825e-06, "loss": 0.4109, "step": 9947 }, { "epoch": 0.5968680626387473, "grad_norm": 1.3635846376419067, "learning_rate": 2.5830788679473708e-06, "loss": 0.4182, "step": 9948 }, { "epoch": 0.5969280614387712, "grad_norm": 1.2569540739059448, "learning_rate": 2.582422474108993e-06, "loss": 0.3669, "step": 9949 }, { "epoch": 0.5969880602387952, "grad_norm": 1.3236169815063477, "learning_rate": 2.5817661149233367e-06, "loss": 0.3705, "step": 9950 }, { "epoch": 0.5970480590388192, "grad_norm": 1.4833428859710693, "learning_rate": 2.581109790415192e-06, "loss": 0.3931, "step": 9951 }, { "epoch": 0.5971080578388432, "grad_norm": 1.3592884540557861, "learning_rate": 2.5804535006093436e-06, "loss": 0.4016, "step": 9952 }, { "epoch": 0.5971680566388672, "grad_norm": 1.4071705341339111, "learning_rate": 2.579797245530577e-06, "loss": 0.4057, "step": 9953 }, { "epoch": 0.5972280554388912, "grad_norm": 1.3261568546295166, "learning_rate": 2.5791410252036763e-06, "loss": 0.3885, "step": 9954 }, { "epoch": 0.5972880542389152, "grad_norm": 1.4625513553619385, "learning_rate": 2.5784848396534225e-06, "loss": 0.431, "step": 9955 }, { "epoch": 0.5973480530389392, "grad_norm": 1.2725474834442139, "learning_rate": 2.5778286889046e-06, "loss": 0.3496, "step": 9956 }, { "epoch": 0.5974080518389632, "grad_norm": 1.4053694009780884, "learning_rate": 2.577172572981984e-06, "loss": 0.3986, "step": 9957 }, { "epoch": 0.5974680506389872, "grad_norm": 1.2418965101242065, "learning_rate": 2.5765164919103576e-06, "loss": 0.3446, "step": 9958 }, { "epoch": 0.5975280494390112, "grad_norm": 1.192396879196167, "learning_rate": 2.5758604457144945e-06, "loss": 0.3704, "step": 9959 }, { "epoch": 0.5975880482390352, "grad_norm": 1.2671644687652588, "learning_rate": 2.5752044344191726e-06, "loss": 0.3951, "step": 9960 }, { "epoch": 0.5976480470390593, "grad_norm": 1.3156071901321411, "learning_rate": 2.5745484580491663e-06, "loss": 0.3649, "step": 9961 }, { "epoch": 0.5977080458390832, "grad_norm": 1.48582923412323, "learning_rate": 2.573892516629248e-06, "loss": 0.422, "step": 9962 }, { "epoch": 0.5977680446391073, "grad_norm": 1.165024995803833, "learning_rate": 2.57323661018419e-06, "loss": 0.3625, "step": 9963 }, { "epoch": 0.5978280434391312, "grad_norm": 1.4121840000152588, "learning_rate": 2.572580738738763e-06, "loss": 0.4411, "step": 9964 }, { "epoch": 0.5978880422391553, "grad_norm": 1.5459001064300537, "learning_rate": 2.5719249023177367e-06, "loss": 0.4845, "step": 9965 }, { "epoch": 0.5979480410391792, "grad_norm": 1.2527538537979126, "learning_rate": 2.5712691009458774e-06, "loss": 0.3853, "step": 9966 }, { "epoch": 0.5980080398392033, "grad_norm": 1.2427024841308594, "learning_rate": 2.5706133346479526e-06, "loss": 0.3878, "step": 9967 }, { "epoch": 0.5980680386392272, "grad_norm": 1.221763253211975, "learning_rate": 2.56995760344873e-06, "loss": 0.3687, "step": 9968 }, { "epoch": 0.5981280374392512, "grad_norm": 1.225534200668335, "learning_rate": 2.569301907372969e-06, "loss": 0.3988, "step": 9969 }, { "epoch": 0.5981880362392752, "grad_norm": 1.2578730583190918, "learning_rate": 2.568646246445437e-06, "loss": 0.3988, "step": 9970 }, { "epoch": 0.5982480350392992, "grad_norm": 1.1523829698562622, "learning_rate": 2.567990620690891e-06, "loss": 0.3522, "step": 9971 }, { "epoch": 0.5983080338393232, "grad_norm": 1.3062853813171387, "learning_rate": 2.5673350301340936e-06, "loss": 0.3933, "step": 9972 }, { "epoch": 0.5983680326393472, "grad_norm": 1.2034947872161865, "learning_rate": 2.566679474799804e-06, "loss": 0.368, "step": 9973 }, { "epoch": 0.5984280314393712, "grad_norm": 1.2211990356445312, "learning_rate": 2.5660239547127773e-06, "loss": 0.3954, "step": 9974 }, { "epoch": 0.5984880302393952, "grad_norm": 1.3781187534332275, "learning_rate": 2.5653684698977714e-06, "loss": 0.3887, "step": 9975 }, { "epoch": 0.5985480290394192, "grad_norm": 1.309728741645813, "learning_rate": 2.5647130203795394e-06, "loss": 0.3825, "step": 9976 }, { "epoch": 0.5986080278394432, "grad_norm": 1.3330174684524536, "learning_rate": 2.5640576061828366e-06, "loss": 0.3966, "step": 9977 }, { "epoch": 0.5986680266394672, "grad_norm": 1.3077508211135864, "learning_rate": 2.563402227332413e-06, "loss": 0.3991, "step": 9978 }, { "epoch": 0.5987280254394912, "grad_norm": 1.3000767230987549, "learning_rate": 2.5627468838530197e-06, "loss": 0.3572, "step": 9979 }, { "epoch": 0.5987880242395152, "grad_norm": 1.2078222036361694, "learning_rate": 2.5620915757694075e-06, "loss": 0.3862, "step": 9980 }, { "epoch": 0.5988480230395392, "grad_norm": 1.2952876091003418, "learning_rate": 2.561436303106323e-06, "loss": 0.4209, "step": 9981 }, { "epoch": 0.5989080218395632, "grad_norm": 1.2417174577713013, "learning_rate": 2.5607810658885138e-06, "loss": 0.3727, "step": 9982 }, { "epoch": 0.5989680206395872, "grad_norm": 1.399736762046814, "learning_rate": 2.5601258641407236e-06, "loss": 0.4438, "step": 9983 }, { "epoch": 0.5990280194396113, "grad_norm": 1.3395209312438965, "learning_rate": 2.559470697887697e-06, "loss": 0.4097, "step": 9984 }, { "epoch": 0.5990880182396352, "grad_norm": 1.1593544483184814, "learning_rate": 2.5588155671541797e-06, "loss": 0.348, "step": 9985 }, { "epoch": 0.5991480170396593, "grad_norm": 1.15543794631958, "learning_rate": 2.5581604719649075e-06, "loss": 0.3783, "step": 9986 }, { "epoch": 0.5992080158396832, "grad_norm": 1.2779624462127686, "learning_rate": 2.5575054123446257e-06, "loss": 0.3978, "step": 9987 }, { "epoch": 0.5992680146397072, "grad_norm": 1.3414946794509888, "learning_rate": 2.556850388318068e-06, "loss": 0.3784, "step": 9988 }, { "epoch": 0.5993280134397312, "grad_norm": 1.3521679639816284, "learning_rate": 2.5561953999099752e-06, "loss": 0.425, "step": 9989 }, { "epoch": 0.5993880122397552, "grad_norm": 1.1977167129516602, "learning_rate": 2.555540447145083e-06, "loss": 0.3684, "step": 9990 }, { "epoch": 0.5994480110397792, "grad_norm": 1.1976318359375, "learning_rate": 2.5548855300481247e-06, "loss": 0.3786, "step": 9991 }, { "epoch": 0.5995080098398032, "grad_norm": 1.2974226474761963, "learning_rate": 2.5542306486438342e-06, "loss": 0.3788, "step": 9992 }, { "epoch": 0.5995680086398272, "grad_norm": 1.2331130504608154, "learning_rate": 2.5535758029569428e-06, "loss": 0.3892, "step": 9993 }, { "epoch": 0.5996280074398512, "grad_norm": 1.2527555227279663, "learning_rate": 2.5529209930121816e-06, "loss": 0.3669, "step": 9994 }, { "epoch": 0.5996880062398752, "grad_norm": 1.3124825954437256, "learning_rate": 2.5522662188342793e-06, "loss": 0.3537, "step": 9995 }, { "epoch": 0.5997480050398992, "grad_norm": 1.3037915229797363, "learning_rate": 2.551611480447964e-06, "loss": 0.3796, "step": 9996 }, { "epoch": 0.5998080038399232, "grad_norm": 1.2581701278686523, "learning_rate": 2.550956777877963e-06, "loss": 0.3793, "step": 9997 }, { "epoch": 0.5998680026399472, "grad_norm": 1.6039791107177734, "learning_rate": 2.550302111149e-06, "loss": 0.4537, "step": 9998 }, { "epoch": 0.5999280014399712, "grad_norm": 1.2387120723724365, "learning_rate": 2.5496474802858e-06, "loss": 0.4049, "step": 9999 }, { "epoch": 0.5999880002399952, "grad_norm": 1.1872243881225586, "learning_rate": 2.548992885313084e-06, "loss": 0.3279, "step": 10000 }, { "epoch": 0.6000479990400192, "grad_norm": 1.3676910400390625, "learning_rate": 2.5483383262555733e-06, "loss": 0.4321, "step": 10001 }, { "epoch": 0.6001079978400432, "grad_norm": 1.3182059526443481, "learning_rate": 2.547683803137991e-06, "loss": 0.4387, "step": 10002 }, { "epoch": 0.6001679966400671, "grad_norm": 1.2827842235565186, "learning_rate": 2.5470293159850503e-06, "loss": 0.3786, "step": 10003 }, { "epoch": 0.6002279954400912, "grad_norm": 1.4609073400497437, "learning_rate": 2.5463748648214714e-06, "loss": 0.4379, "step": 10004 }, { "epoch": 0.6002879942401153, "grad_norm": 1.3493683338165283, "learning_rate": 2.545720449671969e-06, "loss": 0.4254, "step": 10005 }, { "epoch": 0.6003479930401392, "grad_norm": 1.2804139852523804, "learning_rate": 2.545066070561259e-06, "loss": 0.3809, "step": 10006 }, { "epoch": 0.6004079918401632, "grad_norm": 1.2502604722976685, "learning_rate": 2.5444117275140508e-06, "loss": 0.3992, "step": 10007 }, { "epoch": 0.6004679906401872, "grad_norm": 1.2080830335617065, "learning_rate": 2.5437574205550584e-06, "loss": 0.3667, "step": 10008 }, { "epoch": 0.6005279894402112, "grad_norm": 1.0437519550323486, "learning_rate": 2.543103149708992e-06, "loss": 0.3209, "step": 10009 }, { "epoch": 0.6005879882402352, "grad_norm": 1.299569010734558, "learning_rate": 2.5424489150005596e-06, "loss": 0.419, "step": 10010 }, { "epoch": 0.6006479870402592, "grad_norm": 1.2478251457214355, "learning_rate": 2.5417947164544703e-06, "loss": 0.402, "step": 10011 }, { "epoch": 0.6007079858402832, "grad_norm": 1.2410279512405396, "learning_rate": 2.541140554095428e-06, "loss": 0.4131, "step": 10012 }, { "epoch": 0.6007679846403072, "grad_norm": 1.2111632823944092, "learning_rate": 2.5404864279481374e-06, "loss": 0.3577, "step": 10013 }, { "epoch": 0.6008279834403312, "grad_norm": 1.2063517570495605, "learning_rate": 2.539832338037305e-06, "loss": 0.3515, "step": 10014 }, { "epoch": 0.6008879822403552, "grad_norm": 1.2116221189498901, "learning_rate": 2.5391782843876287e-06, "loss": 0.3999, "step": 10015 }, { "epoch": 0.6009479810403792, "grad_norm": 1.328415036201477, "learning_rate": 2.538524267023814e-06, "loss": 0.4124, "step": 10016 }, { "epoch": 0.6010079798404032, "grad_norm": 1.4113578796386719, "learning_rate": 2.5378702859705545e-06, "loss": 0.3574, "step": 10017 }, { "epoch": 0.6010679786404272, "grad_norm": 1.247597336769104, "learning_rate": 2.537216341252551e-06, "loss": 0.3683, "step": 10018 }, { "epoch": 0.6011279774404512, "grad_norm": 1.2694140672683716, "learning_rate": 2.536562432894501e-06, "loss": 0.3454, "step": 10019 }, { "epoch": 0.6011879762404752, "grad_norm": 1.2187703847885132, "learning_rate": 2.5359085609210982e-06, "loss": 0.3859, "step": 10020 }, { "epoch": 0.6012479750404992, "grad_norm": 1.3051984310150146, "learning_rate": 2.5352547253570368e-06, "loss": 0.4084, "step": 10021 }, { "epoch": 0.6013079738405231, "grad_norm": 1.1253294944763184, "learning_rate": 2.5346009262270082e-06, "loss": 0.3157, "step": 10022 }, { "epoch": 0.6013679726405472, "grad_norm": 1.275720477104187, "learning_rate": 2.533947163555706e-06, "loss": 0.3946, "step": 10023 }, { "epoch": 0.6014279714405711, "grad_norm": 1.4274952411651611, "learning_rate": 2.5332934373678166e-06, "loss": 0.4291, "step": 10024 }, { "epoch": 0.6014879702405952, "grad_norm": 1.4290235042572021, "learning_rate": 2.53263974768803e-06, "loss": 0.415, "step": 10025 }, { "epoch": 0.6015479690406191, "grad_norm": 1.2523839473724365, "learning_rate": 2.531986094541033e-06, "loss": 0.3823, "step": 10026 }, { "epoch": 0.6016079678406432, "grad_norm": 1.2663346529006958, "learning_rate": 2.531332477951511e-06, "loss": 0.4214, "step": 10027 }, { "epoch": 0.6016679666406672, "grad_norm": 1.2679067850112915, "learning_rate": 2.530678897944149e-06, "loss": 0.3599, "step": 10028 }, { "epoch": 0.6017279654406912, "grad_norm": 1.2422035932540894, "learning_rate": 2.5300253545436272e-06, "loss": 0.3456, "step": 10029 }, { "epoch": 0.6017879642407152, "grad_norm": 1.14523184299469, "learning_rate": 2.529371847774628e-06, "loss": 0.3711, "step": 10030 }, { "epoch": 0.6018479630407392, "grad_norm": 1.2592204809188843, "learning_rate": 2.528718377661834e-06, "loss": 0.3715, "step": 10031 }, { "epoch": 0.6019079618407632, "grad_norm": 1.3786797523498535, "learning_rate": 2.5280649442299206e-06, "loss": 0.3741, "step": 10032 }, { "epoch": 0.6019679606407872, "grad_norm": 1.364736795425415, "learning_rate": 2.5274115475035676e-06, "loss": 0.4063, "step": 10033 }, { "epoch": 0.6020279594408112, "grad_norm": 1.223465919494629, "learning_rate": 2.5267581875074466e-06, "loss": 0.3645, "step": 10034 }, { "epoch": 0.6020879582408352, "grad_norm": 1.200966477394104, "learning_rate": 2.526104864266238e-06, "loss": 0.3765, "step": 10035 }, { "epoch": 0.6021479570408592, "grad_norm": 1.4890780448913574, "learning_rate": 2.5254515778046087e-06, "loss": 0.4769, "step": 10036 }, { "epoch": 0.6022079558408832, "grad_norm": 1.2703888416290283, "learning_rate": 2.524798328147234e-06, "loss": 0.428, "step": 10037 }, { "epoch": 0.6022679546409072, "grad_norm": 1.4899262189865112, "learning_rate": 2.524145115318785e-06, "loss": 0.3849, "step": 10038 }, { "epoch": 0.6023279534409312, "grad_norm": 1.2790390253067017, "learning_rate": 2.5234919393439273e-06, "loss": 0.4042, "step": 10039 }, { "epoch": 0.6023879522409552, "grad_norm": 1.247969150543213, "learning_rate": 2.5228388002473314e-06, "loss": 0.36, "step": 10040 }, { "epoch": 0.6024479510409791, "grad_norm": 1.2902402877807617, "learning_rate": 2.522185698053661e-06, "loss": 0.3872, "step": 10041 }, { "epoch": 0.6025079498410032, "grad_norm": 1.2129510641098022, "learning_rate": 2.5215326327875824e-06, "loss": 0.4064, "step": 10042 }, { "epoch": 0.6025679486410271, "grad_norm": 1.304766297340393, "learning_rate": 2.5208796044737582e-06, "loss": 0.3635, "step": 10043 }, { "epoch": 0.6026279474410512, "grad_norm": 1.3988378047943115, "learning_rate": 2.5202266131368508e-06, "loss": 0.4382, "step": 10044 }, { "epoch": 0.6026879462410751, "grad_norm": 1.27381432056427, "learning_rate": 2.5195736588015205e-06, "loss": 0.3986, "step": 10045 }, { "epoch": 0.6027479450410992, "grad_norm": 1.2870711088180542, "learning_rate": 2.5189207414924255e-06, "loss": 0.4059, "step": 10046 }, { "epoch": 0.6028079438411231, "grad_norm": 1.2343387603759766, "learning_rate": 2.5182678612342252e-06, "loss": 0.3961, "step": 10047 }, { "epoch": 0.6028679426411472, "grad_norm": 1.345549464225769, "learning_rate": 2.5176150180515736e-06, "loss": 0.445, "step": 10048 }, { "epoch": 0.6029279414411711, "grad_norm": 1.2783321142196655, "learning_rate": 2.5169622119691277e-06, "loss": 0.4136, "step": 10049 }, { "epoch": 0.6029879402411952, "grad_norm": 1.2819724082946777, "learning_rate": 2.5163094430115412e-06, "loss": 0.3878, "step": 10050 }, { "epoch": 0.6030479390412192, "grad_norm": 1.270997166633606, "learning_rate": 2.5156567112034646e-06, "loss": 0.3712, "step": 10051 }, { "epoch": 0.6031079378412432, "grad_norm": 1.2401875257492065, "learning_rate": 2.5150040165695503e-06, "loss": 0.368, "step": 10052 }, { "epoch": 0.6031679366412672, "grad_norm": 1.427398681640625, "learning_rate": 2.514351359134445e-06, "loss": 0.3945, "step": 10053 }, { "epoch": 0.6032279354412912, "grad_norm": 1.2130131721496582, "learning_rate": 2.5136987389227992e-06, "loss": 0.3703, "step": 10054 }, { "epoch": 0.6032879342413152, "grad_norm": 1.3014230728149414, "learning_rate": 2.5130461559592586e-06, "loss": 0.3494, "step": 10055 }, { "epoch": 0.6033479330413392, "grad_norm": 1.1800286769866943, "learning_rate": 2.512393610268468e-06, "loss": 0.3845, "step": 10056 }, { "epoch": 0.6034079318413632, "grad_norm": 1.3436387777328491, "learning_rate": 2.5117411018750722e-06, "loss": 0.4138, "step": 10057 }, { "epoch": 0.6034679306413872, "grad_norm": 1.4301398992538452, "learning_rate": 2.5110886308037106e-06, "loss": 0.3636, "step": 10058 }, { "epoch": 0.6035279294414112, "grad_norm": 1.3543680906295776, "learning_rate": 2.510436197079026e-06, "loss": 0.386, "step": 10059 }, { "epoch": 0.6035879282414351, "grad_norm": 1.224665641784668, "learning_rate": 2.50978380072566e-06, "loss": 0.4157, "step": 10060 }, { "epoch": 0.6036479270414592, "grad_norm": 1.4139775037765503, "learning_rate": 2.509131441768246e-06, "loss": 0.4424, "step": 10061 }, { "epoch": 0.6037079258414831, "grad_norm": 1.36423659324646, "learning_rate": 2.5084791202314253e-06, "loss": 0.3763, "step": 10062 }, { "epoch": 0.6037679246415072, "grad_norm": 1.254090666770935, "learning_rate": 2.5078268361398285e-06, "loss": 0.438, "step": 10063 }, { "epoch": 0.6038279234415311, "grad_norm": 1.327229380607605, "learning_rate": 2.507174589518095e-06, "loss": 0.3896, "step": 10064 }, { "epoch": 0.6038879222415552, "grad_norm": 1.3113822937011719, "learning_rate": 2.5065223803908506e-06, "loss": 0.3885, "step": 10065 }, { "epoch": 0.6039479210415791, "grad_norm": 1.3330776691436768, "learning_rate": 2.5058702087827304e-06, "loss": 0.4105, "step": 10066 }, { "epoch": 0.6040079198416032, "grad_norm": 1.3723398447036743, "learning_rate": 2.5052180747183643e-06, "loss": 0.4315, "step": 10067 }, { "epoch": 0.6040679186416271, "grad_norm": 1.3087486028671265, "learning_rate": 2.504565978222378e-06, "loss": 0.3699, "step": 10068 }, { "epoch": 0.6041279174416512, "grad_norm": 1.3870933055877686, "learning_rate": 2.503913919319401e-06, "loss": 0.4274, "step": 10069 }, { "epoch": 0.6041879162416751, "grad_norm": 1.1680152416229248, "learning_rate": 2.503261898034056e-06, "loss": 0.334, "step": 10070 }, { "epoch": 0.6042479150416992, "grad_norm": 1.3100624084472656, "learning_rate": 2.502609914390968e-06, "loss": 0.4181, "step": 10071 }, { "epoch": 0.6043079138417232, "grad_norm": 1.3477444648742676, "learning_rate": 2.50195796841476e-06, "loss": 0.4031, "step": 10072 }, { "epoch": 0.6043679126417472, "grad_norm": 1.2884821891784668, "learning_rate": 2.5013060601300522e-06, "loss": 0.3898, "step": 10073 }, { "epoch": 0.6044279114417712, "grad_norm": 1.1914618015289307, "learning_rate": 2.500654189561465e-06, "loss": 0.3856, "step": 10074 }, { "epoch": 0.6044879102417952, "grad_norm": 1.297404408454895, "learning_rate": 2.500002356733615e-06, "loss": 0.3888, "step": 10075 }, { "epoch": 0.6045479090418192, "grad_norm": 1.492594599723816, "learning_rate": 2.4993505616711214e-06, "loss": 0.4653, "step": 10076 }, { "epoch": 0.6046079078418432, "grad_norm": 1.4384112358093262, "learning_rate": 2.498698804398597e-06, "loss": 0.3734, "step": 10077 }, { "epoch": 0.6046679066418672, "grad_norm": 1.3162306547164917, "learning_rate": 2.498047084940656e-06, "loss": 0.3836, "step": 10078 }, { "epoch": 0.6047279054418911, "grad_norm": 1.1526329517364502, "learning_rate": 2.497395403321914e-06, "loss": 0.3856, "step": 10079 }, { "epoch": 0.6047879042419152, "grad_norm": 1.2669264078140259, "learning_rate": 2.4967437595669778e-06, "loss": 0.399, "step": 10080 }, { "epoch": 0.6048479030419391, "grad_norm": 1.3498954772949219, "learning_rate": 2.496092153700461e-06, "loss": 0.3823, "step": 10081 }, { "epoch": 0.6049079018419632, "grad_norm": 1.2122007608413696, "learning_rate": 2.4954405857469675e-06, "loss": 0.3832, "step": 10082 }, { "epoch": 0.6049679006419871, "grad_norm": 1.317188024520874, "learning_rate": 2.4947890557311068e-06, "loss": 0.3808, "step": 10083 }, { "epoch": 0.6050278994420112, "grad_norm": 1.2926604747772217, "learning_rate": 2.494137563677485e-06, "loss": 0.3964, "step": 10084 }, { "epoch": 0.6050878982420351, "grad_norm": 1.4168864488601685, "learning_rate": 2.493486109610704e-06, "loss": 0.4383, "step": 10085 }, { "epoch": 0.6051478970420592, "grad_norm": 1.2658028602600098, "learning_rate": 2.4928346935553674e-06, "loss": 0.4663, "step": 10086 }, { "epoch": 0.6052078958420831, "grad_norm": 1.3372268676757812, "learning_rate": 2.4921833155360746e-06, "loss": 0.4316, "step": 10087 }, { "epoch": 0.6052678946421072, "grad_norm": 1.2502610683441162, "learning_rate": 2.4915319755774277e-06, "loss": 0.416, "step": 10088 }, { "epoch": 0.6053278934421311, "grad_norm": 1.2588222026824951, "learning_rate": 2.4908806737040227e-06, "loss": 0.3603, "step": 10089 }, { "epoch": 0.6053878922421552, "grad_norm": 1.339167594909668, "learning_rate": 2.490229409940457e-06, "loss": 0.3808, "step": 10090 }, { "epoch": 0.6054478910421791, "grad_norm": 1.4434301853179932, "learning_rate": 2.4895781843113273e-06, "loss": 0.4217, "step": 10091 }, { "epoch": 0.6055078898422032, "grad_norm": 1.4797245264053345, "learning_rate": 2.488926996841225e-06, "loss": 0.4058, "step": 10092 }, { "epoch": 0.6055678886422271, "grad_norm": 1.299880862236023, "learning_rate": 2.488275847554744e-06, "loss": 0.4057, "step": 10093 }, { "epoch": 0.6056278874422512, "grad_norm": 1.2677898406982422, "learning_rate": 2.487624736476474e-06, "loss": 0.3875, "step": 10094 }, { "epoch": 0.6056878862422752, "grad_norm": 1.3981468677520752, "learning_rate": 2.486973663631005e-06, "loss": 0.3873, "step": 10095 }, { "epoch": 0.6057478850422992, "grad_norm": 1.388201355934143, "learning_rate": 2.4863226290429273e-06, "loss": 0.4128, "step": 10096 }, { "epoch": 0.6058078838423232, "grad_norm": 1.3318790197372437, "learning_rate": 2.485671632736824e-06, "loss": 0.4089, "step": 10097 }, { "epoch": 0.6058678826423471, "grad_norm": 1.2907840013504028, "learning_rate": 2.4850206747372826e-06, "loss": 0.4105, "step": 10098 }, { "epoch": 0.6059278814423712, "grad_norm": 1.3304671049118042, "learning_rate": 2.4843697550688858e-06, "loss": 0.4062, "step": 10099 }, { "epoch": 0.6059878802423951, "grad_norm": 1.0948678255081177, "learning_rate": 2.4837188737562164e-06, "loss": 0.3406, "step": 10100 }, { "epoch": 0.6060478790424192, "grad_norm": 1.3650602102279663, "learning_rate": 2.4830680308238554e-06, "loss": 0.4239, "step": 10101 }, { "epoch": 0.6061078778424431, "grad_norm": 1.3288716077804565, "learning_rate": 2.4824172262963804e-06, "loss": 0.3601, "step": 10102 }, { "epoch": 0.6061678766424672, "grad_norm": 1.339133381843567, "learning_rate": 2.4817664601983726e-06, "loss": 0.3863, "step": 10103 }, { "epoch": 0.6062278754424911, "grad_norm": 1.2890219688415527, "learning_rate": 2.481115732554405e-06, "loss": 0.4135, "step": 10104 }, { "epoch": 0.6062878742425152, "grad_norm": 1.1721198558807373, "learning_rate": 2.480465043389055e-06, "loss": 0.3664, "step": 10105 }, { "epoch": 0.6063478730425391, "grad_norm": 1.3753726482391357, "learning_rate": 2.4798143927268945e-06, "loss": 0.3921, "step": 10106 }, { "epoch": 0.6064078718425632, "grad_norm": 1.298007845878601, "learning_rate": 2.4791637805924955e-06, "loss": 0.3919, "step": 10107 }, { "epoch": 0.6064678706425871, "grad_norm": 1.2644087076187134, "learning_rate": 2.4785132070104327e-06, "loss": 0.3773, "step": 10108 }, { "epoch": 0.6065278694426112, "grad_norm": 1.2411789894104004, "learning_rate": 2.4778626720052695e-06, "loss": 0.3748, "step": 10109 }, { "epoch": 0.6065878682426351, "grad_norm": 1.2793431282043457, "learning_rate": 2.477212175601578e-06, "loss": 0.4336, "step": 10110 }, { "epoch": 0.6066478670426592, "grad_norm": 1.3607810735702515, "learning_rate": 2.4765617178239217e-06, "loss": 0.3724, "step": 10111 }, { "epoch": 0.6067078658426831, "grad_norm": 1.4756510257720947, "learning_rate": 2.4759112986968667e-06, "loss": 0.4445, "step": 10112 }, { "epoch": 0.6067678646427072, "grad_norm": 1.2879133224487305, "learning_rate": 2.475260918244976e-06, "loss": 0.3394, "step": 10113 }, { "epoch": 0.6068278634427311, "grad_norm": 1.1537553071975708, "learning_rate": 2.474610576492812e-06, "loss": 0.3977, "step": 10114 }, { "epoch": 0.6068878622427551, "grad_norm": 1.2053921222686768, "learning_rate": 2.473960273464936e-06, "loss": 0.3811, "step": 10115 }, { "epoch": 0.6069478610427791, "grad_norm": 1.2212498188018799, "learning_rate": 2.4733100091859046e-06, "loss": 0.366, "step": 10116 }, { "epoch": 0.6070078598428031, "grad_norm": 1.4208694696426392, "learning_rate": 2.472659783680277e-06, "loss": 0.3873, "step": 10117 }, { "epoch": 0.6070678586428272, "grad_norm": 1.2699261903762817, "learning_rate": 2.4720095969726084e-06, "loss": 0.3583, "step": 10118 }, { "epoch": 0.6071278574428511, "grad_norm": 1.3690677881240845, "learning_rate": 2.471359449087454e-06, "loss": 0.3959, "step": 10119 }, { "epoch": 0.6071878562428752, "grad_norm": 1.4654059410095215, "learning_rate": 2.470709340049367e-06, "loss": 0.3787, "step": 10120 }, { "epoch": 0.6072478550428991, "grad_norm": 1.4647186994552612, "learning_rate": 2.4700592698828983e-06, "loss": 0.4238, "step": 10121 }, { "epoch": 0.6073078538429232, "grad_norm": 1.2503762245178223, "learning_rate": 2.469409238612599e-06, "loss": 0.397, "step": 10122 }, { "epoch": 0.6073678526429471, "grad_norm": 1.3663731813430786, "learning_rate": 2.4687592462630174e-06, "loss": 0.4001, "step": 10123 }, { "epoch": 0.6074278514429712, "grad_norm": 1.1809656620025635, "learning_rate": 2.4681092928586995e-06, "loss": 0.4223, "step": 10124 }, { "epoch": 0.6074878502429951, "grad_norm": 1.2639836072921753, "learning_rate": 2.4674593784241944e-06, "loss": 0.3621, "step": 10125 }, { "epoch": 0.6075478490430192, "grad_norm": 1.5234320163726807, "learning_rate": 2.4668095029840424e-06, "loss": 0.4106, "step": 10126 }, { "epoch": 0.6076078478430431, "grad_norm": 1.3014287948608398, "learning_rate": 2.4661596665627905e-06, "loss": 0.3949, "step": 10127 }, { "epoch": 0.6076678466430672, "grad_norm": 1.250298023223877, "learning_rate": 2.465509869184976e-06, "loss": 0.391, "step": 10128 }, { "epoch": 0.6077278454430911, "grad_norm": 1.2772936820983887, "learning_rate": 2.464860110875141e-06, "loss": 0.4181, "step": 10129 }, { "epoch": 0.6077878442431152, "grad_norm": 1.2705068588256836, "learning_rate": 2.4642103916578237e-06, "loss": 0.4509, "step": 10130 }, { "epoch": 0.6078478430431391, "grad_norm": 1.1763900518417358, "learning_rate": 2.4635607115575606e-06, "loss": 0.3622, "step": 10131 }, { "epoch": 0.6079078418431632, "grad_norm": 1.2409664392471313, "learning_rate": 2.462911070598888e-06, "loss": 0.3806, "step": 10132 }, { "epoch": 0.6079678406431871, "grad_norm": 1.1913400888442993, "learning_rate": 2.462261468806339e-06, "loss": 0.3461, "step": 10133 }, { "epoch": 0.6080278394432111, "grad_norm": 1.1533854007720947, "learning_rate": 2.461611906204447e-06, "loss": 0.3446, "step": 10134 }, { "epoch": 0.6080878382432351, "grad_norm": 1.085150122642517, "learning_rate": 2.4609623828177412e-06, "loss": 0.2986, "step": 10135 }, { "epoch": 0.6081478370432591, "grad_norm": 1.2689485549926758, "learning_rate": 2.460312898670753e-06, "loss": 0.3888, "step": 10136 }, { "epoch": 0.6082078358432831, "grad_norm": 1.33867347240448, "learning_rate": 2.45966345378801e-06, "loss": 0.4215, "step": 10137 }, { "epoch": 0.6082678346433071, "grad_norm": 1.2457724809646606, "learning_rate": 2.4590140481940377e-06, "loss": 0.3773, "step": 10138 }, { "epoch": 0.6083278334433311, "grad_norm": 1.2890533208847046, "learning_rate": 2.4583646819133634e-06, "loss": 0.3935, "step": 10139 }, { "epoch": 0.6083878322433551, "grad_norm": 1.2473942041397095, "learning_rate": 2.4577153549705077e-06, "loss": 0.3811, "step": 10140 }, { "epoch": 0.6084478310433792, "grad_norm": 1.3478721380233765, "learning_rate": 2.4570660673899956e-06, "loss": 0.3885, "step": 10141 }, { "epoch": 0.6085078298434031, "grad_norm": 1.2242403030395508, "learning_rate": 2.4564168191963465e-06, "loss": 0.3561, "step": 10142 }, { "epoch": 0.6085678286434272, "grad_norm": 1.3708844184875488, "learning_rate": 2.455767610414079e-06, "loss": 0.3739, "step": 10143 }, { "epoch": 0.6086278274434511, "grad_norm": 1.3342413902282715, "learning_rate": 2.4551184410677125e-06, "loss": 0.412, "step": 10144 }, { "epoch": 0.6086878262434752, "grad_norm": 1.2201088666915894, "learning_rate": 2.4544693111817607e-06, "loss": 0.3577, "step": 10145 }, { "epoch": 0.6087478250434991, "grad_norm": 1.4892077445983887, "learning_rate": 2.4538202207807405e-06, "loss": 0.3986, "step": 10146 }, { "epoch": 0.6088078238435232, "grad_norm": 1.3434699773788452, "learning_rate": 2.453171169889164e-06, "loss": 0.3924, "step": 10147 }, { "epoch": 0.6088678226435471, "grad_norm": 1.2968326807022095, "learning_rate": 2.4525221585315428e-06, "loss": 0.4221, "step": 10148 }, { "epoch": 0.6089278214435712, "grad_norm": 1.404398798942566, "learning_rate": 2.4518731867323876e-06, "loss": 0.4267, "step": 10149 }, { "epoch": 0.6089878202435951, "grad_norm": 1.274472951889038, "learning_rate": 2.4512242545162065e-06, "loss": 0.3625, "step": 10150 }, { "epoch": 0.6090478190436192, "grad_norm": 1.3032110929489136, "learning_rate": 2.450575361907508e-06, "loss": 0.3822, "step": 10151 }, { "epoch": 0.6091078178436431, "grad_norm": 1.3164881467819214, "learning_rate": 2.4499265089307965e-06, "loss": 0.3862, "step": 10152 }, { "epoch": 0.6091678166436671, "grad_norm": 1.3177322149276733, "learning_rate": 2.4492776956105753e-06, "loss": 0.3999, "step": 10153 }, { "epoch": 0.6092278154436911, "grad_norm": 1.3469079732894897, "learning_rate": 2.4486289219713506e-06, "loss": 0.4211, "step": 10154 }, { "epoch": 0.6092878142437151, "grad_norm": 1.3864134550094604, "learning_rate": 2.4479801880376195e-06, "loss": 0.3614, "step": 10155 }, { "epoch": 0.6093478130437391, "grad_norm": 1.3260470628738403, "learning_rate": 2.4473314938338858e-06, "loss": 0.36, "step": 10156 }, { "epoch": 0.6094078118437631, "grad_norm": 1.226315975189209, "learning_rate": 2.4466828393846435e-06, "loss": 0.3634, "step": 10157 }, { "epoch": 0.6094678106437871, "grad_norm": 1.4199669361114502, "learning_rate": 2.4460342247143935e-06, "loss": 0.3951, "step": 10158 }, { "epoch": 0.6095278094438111, "grad_norm": 1.2039083242416382, "learning_rate": 2.4453856498476274e-06, "loss": 0.3828, "step": 10159 }, { "epoch": 0.6095878082438351, "grad_norm": 1.3512039184570312, "learning_rate": 2.4447371148088405e-06, "loss": 0.4168, "step": 10160 }, { "epoch": 0.6096478070438591, "grad_norm": 1.2603371143341064, "learning_rate": 2.4440886196225264e-06, "loss": 0.4212, "step": 10161 }, { "epoch": 0.6097078058438832, "grad_norm": 1.1839234828948975, "learning_rate": 2.443440164313173e-06, "loss": 0.3521, "step": 10162 }, { "epoch": 0.6097678046439071, "grad_norm": 1.2627222537994385, "learning_rate": 2.4427917489052722e-06, "loss": 0.343, "step": 10163 }, { "epoch": 0.6098278034439312, "grad_norm": 1.3579072952270508, "learning_rate": 2.4421433734233094e-06, "loss": 0.3878, "step": 10164 }, { "epoch": 0.6098878022439551, "grad_norm": 1.1957579851150513, "learning_rate": 2.441495037891772e-06, "loss": 0.3625, "step": 10165 }, { "epoch": 0.6099478010439792, "grad_norm": 1.3700339794158936, "learning_rate": 2.4408467423351446e-06, "loss": 0.382, "step": 10166 }, { "epoch": 0.6100077998440031, "grad_norm": 1.3939476013183594, "learning_rate": 2.44019848677791e-06, "loss": 0.4339, "step": 10167 }, { "epoch": 0.6100677986440272, "grad_norm": 1.4154119491577148, "learning_rate": 2.4395502712445507e-06, "loss": 0.4092, "step": 10168 }, { "epoch": 0.6101277974440511, "grad_norm": 1.3445661067962646, "learning_rate": 2.4389020957595456e-06, "loss": 0.384, "step": 10169 }, { "epoch": 0.6101877962440752, "grad_norm": 1.2789400815963745, "learning_rate": 2.438253960347375e-06, "loss": 0.3991, "step": 10170 }, { "epoch": 0.6102477950440991, "grad_norm": 1.2360365390777588, "learning_rate": 2.437605865032514e-06, "loss": 0.3893, "step": 10171 }, { "epoch": 0.6103077938441231, "grad_norm": 1.291959285736084, "learning_rate": 2.4369578098394383e-06, "loss": 0.3639, "step": 10172 }, { "epoch": 0.6103677926441471, "grad_norm": 1.2493497133255005, "learning_rate": 2.436309794792626e-06, "loss": 0.4131, "step": 10173 }, { "epoch": 0.6104277914441711, "grad_norm": 1.2156699895858765, "learning_rate": 2.4356618199165434e-06, "loss": 0.389, "step": 10174 }, { "epoch": 0.6104877902441951, "grad_norm": 1.3202223777770996, "learning_rate": 2.435013885235667e-06, "loss": 0.4396, "step": 10175 }, { "epoch": 0.6105477890442191, "grad_norm": 1.3151905536651611, "learning_rate": 2.4343659907744626e-06, "loss": 0.379, "step": 10176 }, { "epoch": 0.6106077878442431, "grad_norm": 1.1741275787353516, "learning_rate": 2.4337181365574e-06, "loss": 0.4026, "step": 10177 }, { "epoch": 0.6106677866442671, "grad_norm": 1.2977055311203003, "learning_rate": 2.433070322608946e-06, "loss": 0.3902, "step": 10178 }, { "epoch": 0.6107277854442911, "grad_norm": 1.3226027488708496, "learning_rate": 2.4324225489535645e-06, "loss": 0.4016, "step": 10179 }, { "epoch": 0.6107877842443151, "grad_norm": 1.4190599918365479, "learning_rate": 2.43177481561572e-06, "loss": 0.4388, "step": 10180 }, { "epoch": 0.6108477830443391, "grad_norm": 1.3603154420852661, "learning_rate": 2.4311271226198733e-06, "loss": 0.4456, "step": 10181 }, { "epoch": 0.6109077818443631, "grad_norm": 1.438470721244812, "learning_rate": 2.4304794699904854e-06, "loss": 0.427, "step": 10182 }, { "epoch": 0.6109677806443871, "grad_norm": 1.2737044095993042, "learning_rate": 2.4298318577520165e-06, "loss": 0.4255, "step": 10183 }, { "epoch": 0.6110277794444111, "grad_norm": 1.2637145519256592, "learning_rate": 2.42918428592892e-06, "loss": 0.3829, "step": 10184 }, { "epoch": 0.6110877782444352, "grad_norm": 1.2442100048065186, "learning_rate": 2.4285367545456576e-06, "loss": 0.4239, "step": 10185 }, { "epoch": 0.6111477770444591, "grad_norm": 1.3641932010650635, "learning_rate": 2.4278892636266775e-06, "loss": 0.4199, "step": 10186 }, { "epoch": 0.6112077758444832, "grad_norm": 1.32269287109375, "learning_rate": 2.427241813196438e-06, "loss": 0.3939, "step": 10187 }, { "epoch": 0.6112677746445071, "grad_norm": 1.305168628692627, "learning_rate": 2.4265944032793857e-06, "loss": 0.3709, "step": 10188 }, { "epoch": 0.6113277734445312, "grad_norm": 1.2358757257461548, "learning_rate": 2.4259470338999727e-06, "loss": 0.3462, "step": 10189 }, { "epoch": 0.6113877722445551, "grad_norm": 1.3611669540405273, "learning_rate": 2.4252997050826475e-06, "loss": 0.431, "step": 10190 }, { "epoch": 0.6114477710445791, "grad_norm": 1.1773825883865356, "learning_rate": 2.424652416851856e-06, "loss": 0.3853, "step": 10191 }, { "epoch": 0.6115077698446031, "grad_norm": 1.2470983266830444, "learning_rate": 2.4240051692320434e-06, "loss": 0.3968, "step": 10192 }, { "epoch": 0.6115677686446271, "grad_norm": 1.2632148265838623, "learning_rate": 2.4233579622476527e-06, "loss": 0.3976, "step": 10193 }, { "epoch": 0.6116277674446511, "grad_norm": 1.3339378833770752, "learning_rate": 2.422710795923127e-06, "loss": 0.3835, "step": 10194 }, { "epoch": 0.6116877662446751, "grad_norm": 1.2616465091705322, "learning_rate": 2.4220636702829075e-06, "loss": 0.4012, "step": 10195 }, { "epoch": 0.6117477650446991, "grad_norm": 1.3346139192581177, "learning_rate": 2.4214165853514307e-06, "loss": 0.3747, "step": 10196 }, { "epoch": 0.6118077638447231, "grad_norm": 1.3427238464355469, "learning_rate": 2.4207695411531365e-06, "loss": 0.3843, "step": 10197 }, { "epoch": 0.6118677626447471, "grad_norm": 1.3378398418426514, "learning_rate": 2.420122537712459e-06, "loss": 0.3637, "step": 10198 }, { "epoch": 0.6119277614447711, "grad_norm": 1.2658287286758423, "learning_rate": 2.419475575053834e-06, "loss": 0.396, "step": 10199 }, { "epoch": 0.6119877602447951, "grad_norm": 1.3569386005401611, "learning_rate": 2.418828653201693e-06, "loss": 0.4026, "step": 10200 }, { "epoch": 0.6120477590448191, "grad_norm": 1.3704365491867065, "learning_rate": 2.418181772180467e-06, "loss": 0.3742, "step": 10201 }, { "epoch": 0.6121077578448431, "grad_norm": 1.3926784992218018, "learning_rate": 2.4175349320145895e-06, "loss": 0.3733, "step": 10202 }, { "epoch": 0.6121677566448671, "grad_norm": 1.2890658378601074, "learning_rate": 2.416888132728483e-06, "loss": 0.3706, "step": 10203 }, { "epoch": 0.612227755444891, "grad_norm": 1.4029898643493652, "learning_rate": 2.4162413743465794e-06, "loss": 0.3941, "step": 10204 }, { "epoch": 0.6122877542449151, "grad_norm": 1.386976957321167, "learning_rate": 2.4155946568933e-06, "loss": 0.4088, "step": 10205 }, { "epoch": 0.612347753044939, "grad_norm": 1.4348430633544922, "learning_rate": 2.41494798039307e-06, "loss": 0.3929, "step": 10206 }, { "epoch": 0.6124077518449631, "grad_norm": 1.1789430379867554, "learning_rate": 2.414301344870312e-06, "loss": 0.4, "step": 10207 }, { "epoch": 0.6124677506449872, "grad_norm": 1.3098938465118408, "learning_rate": 2.413654750349445e-06, "loss": 0.3669, "step": 10208 }, { "epoch": 0.6125277494450111, "grad_norm": 1.2789216041564941, "learning_rate": 2.4130081968548897e-06, "loss": 0.3818, "step": 10209 }, { "epoch": 0.6125877482450351, "grad_norm": 1.2145811319351196, "learning_rate": 2.4123616844110616e-06, "loss": 0.3887, "step": 10210 }, { "epoch": 0.6126477470450591, "grad_norm": 1.3090683221817017, "learning_rate": 2.411715213042378e-06, "loss": 0.3858, "step": 10211 }, { "epoch": 0.6127077458450831, "grad_norm": 1.3686212301254272, "learning_rate": 2.4110687827732523e-06, "loss": 0.4007, "step": 10212 }, { "epoch": 0.6127677446451071, "grad_norm": 1.2871685028076172, "learning_rate": 2.410422393628097e-06, "loss": 0.4068, "step": 10213 }, { "epoch": 0.6128277434451311, "grad_norm": 1.319354772567749, "learning_rate": 2.409776045631325e-06, "loss": 0.4276, "step": 10214 }, { "epoch": 0.6128877422451551, "grad_norm": 1.436977744102478, "learning_rate": 2.409129738807344e-06, "loss": 0.4035, "step": 10215 }, { "epoch": 0.6129477410451791, "grad_norm": 1.2515387535095215, "learning_rate": 2.4084834731805633e-06, "loss": 0.3494, "step": 10216 }, { "epoch": 0.6130077398452031, "grad_norm": 1.458276391029358, "learning_rate": 2.407837248775388e-06, "loss": 0.395, "step": 10217 }, { "epoch": 0.6130677386452271, "grad_norm": 1.248511552810669, "learning_rate": 2.4071910656162233e-06, "loss": 0.3881, "step": 10218 }, { "epoch": 0.6131277374452511, "grad_norm": 1.363012671470642, "learning_rate": 2.4065449237274757e-06, "loss": 0.4213, "step": 10219 }, { "epoch": 0.6131877362452751, "grad_norm": 1.170915961265564, "learning_rate": 2.4058988231335417e-06, "loss": 0.402, "step": 10220 }, { "epoch": 0.6132477350452991, "grad_norm": 1.2445790767669678, "learning_rate": 2.405252763858827e-06, "loss": 0.3493, "step": 10221 }, { "epoch": 0.6133077338453231, "grad_norm": 1.281814694404602, "learning_rate": 2.4046067459277256e-06, "loss": 0.4154, "step": 10222 }, { "epoch": 0.613367732645347, "grad_norm": 1.4099712371826172, "learning_rate": 2.4039607693646376e-06, "loss": 0.4365, "step": 10223 }, { "epoch": 0.6134277314453711, "grad_norm": 1.1640464067459106, "learning_rate": 2.403314834193959e-06, "loss": 0.3793, "step": 10224 }, { "epoch": 0.613487730245395, "grad_norm": 1.2975186109542847, "learning_rate": 2.4026689404400813e-06, "loss": 0.397, "step": 10225 }, { "epoch": 0.6135477290454191, "grad_norm": 1.2771393060684204, "learning_rate": 2.402023088127399e-06, "loss": 0.3641, "step": 10226 }, { "epoch": 0.613607727845443, "grad_norm": 1.254449725151062, "learning_rate": 2.4013772772803015e-06, "loss": 0.3254, "step": 10227 }, { "epoch": 0.6136677266454671, "grad_norm": 1.2527025938034058, "learning_rate": 2.40073150792318e-06, "loss": 0.3667, "step": 10228 }, { "epoch": 0.613727725445491, "grad_norm": 1.434330940246582, "learning_rate": 2.40008578008042e-06, "loss": 0.4334, "step": 10229 }, { "epoch": 0.6137877242455151, "grad_norm": 1.3112244606018066, "learning_rate": 2.3994400937764085e-06, "loss": 0.3923, "step": 10230 }, { "epoch": 0.6138477230455391, "grad_norm": 1.388472557067871, "learning_rate": 2.398794449035533e-06, "loss": 0.3677, "step": 10231 }, { "epoch": 0.6139077218455631, "grad_norm": 1.3267639875411987, "learning_rate": 2.398148845882171e-06, "loss": 0.4017, "step": 10232 }, { "epoch": 0.6139677206455871, "grad_norm": 1.3188923597335815, "learning_rate": 2.39750328434071e-06, "loss": 0.4418, "step": 10233 }, { "epoch": 0.6140277194456111, "grad_norm": 1.3066998720169067, "learning_rate": 2.396857764435525e-06, "loss": 0.3545, "step": 10234 }, { "epoch": 0.6140877182456351, "grad_norm": 1.2712973356246948, "learning_rate": 2.396212286190996e-06, "loss": 0.4498, "step": 10235 }, { "epoch": 0.6141477170456591, "grad_norm": 1.2202138900756836, "learning_rate": 2.3955668496315015e-06, "loss": 0.3861, "step": 10236 }, { "epoch": 0.6142077158456831, "grad_norm": 1.307410717010498, "learning_rate": 2.394921454781415e-06, "loss": 0.3682, "step": 10237 }, { "epoch": 0.6142677146457071, "grad_norm": 1.4193655252456665, "learning_rate": 2.3942761016651103e-06, "loss": 0.4203, "step": 10238 }, { "epoch": 0.6143277134457311, "grad_norm": 1.3399150371551514, "learning_rate": 2.3936307903069596e-06, "loss": 0.3908, "step": 10239 }, { "epoch": 0.6143877122457551, "grad_norm": 1.2326244115829468, "learning_rate": 2.3929855207313335e-06, "loss": 0.3783, "step": 10240 }, { "epoch": 0.6144477110457791, "grad_norm": 1.299831748008728, "learning_rate": 2.3923402929626e-06, "loss": 0.4063, "step": 10241 }, { "epoch": 0.614507709845803, "grad_norm": 1.2445399761199951, "learning_rate": 2.3916951070251273e-06, "loss": 0.3428, "step": 10242 }, { "epoch": 0.6145677086458271, "grad_norm": 1.2225301265716553, "learning_rate": 2.391049962943282e-06, "loss": 0.4029, "step": 10243 }, { "epoch": 0.614627707445851, "grad_norm": 1.225390911102295, "learning_rate": 2.3904048607414264e-06, "loss": 0.4182, "step": 10244 }, { "epoch": 0.6146877062458751, "grad_norm": 1.2277649641036987, "learning_rate": 2.3897598004439243e-06, "loss": 0.3902, "step": 10245 }, { "epoch": 0.614747705045899, "grad_norm": 1.2523736953735352, "learning_rate": 2.3891147820751362e-06, "loss": 0.3776, "step": 10246 }, { "epoch": 0.6148077038459231, "grad_norm": 1.255807876586914, "learning_rate": 2.3884698056594204e-06, "loss": 0.3673, "step": 10247 }, { "epoch": 0.614867702645947, "grad_norm": 1.2435821294784546, "learning_rate": 2.3878248712211382e-06, "loss": 0.3393, "step": 10248 }, { "epoch": 0.6149277014459711, "grad_norm": 1.2070019245147705, "learning_rate": 2.387179978784641e-06, "loss": 0.4454, "step": 10249 }, { "epoch": 0.614987700245995, "grad_norm": 1.358673095703125, "learning_rate": 2.386535128374289e-06, "loss": 0.4279, "step": 10250 }, { "epoch": 0.6150476990460191, "grad_norm": 1.195975422859192, "learning_rate": 2.3858903200144296e-06, "loss": 0.4017, "step": 10251 }, { "epoch": 0.6151076978460431, "grad_norm": 1.372786521911621, "learning_rate": 2.3852455537294195e-06, "loss": 0.4012, "step": 10252 }, { "epoch": 0.6151676966460671, "grad_norm": 1.3016446828842163, "learning_rate": 2.3846008295436032e-06, "loss": 0.4017, "step": 10253 }, { "epoch": 0.6152276954460911, "grad_norm": 1.2820637226104736, "learning_rate": 2.3839561474813326e-06, "loss": 0.3634, "step": 10254 }, { "epoch": 0.6152876942461151, "grad_norm": 1.3403747081756592, "learning_rate": 2.3833115075669548e-06, "loss": 0.3603, "step": 10255 }, { "epoch": 0.6153476930461391, "grad_norm": 1.5233765840530396, "learning_rate": 2.3826669098248125e-06, "loss": 0.4479, "step": 10256 }, { "epoch": 0.6154076918461631, "grad_norm": 1.1697516441345215, "learning_rate": 2.382022354279252e-06, "loss": 0.3663, "step": 10257 }, { "epoch": 0.6154676906461871, "grad_norm": 1.2439167499542236, "learning_rate": 2.3813778409546123e-06, "loss": 0.4225, "step": 10258 }, { "epoch": 0.6155276894462111, "grad_norm": 1.1820141077041626, "learning_rate": 2.3807333698752355e-06, "loss": 0.3692, "step": 10259 }, { "epoch": 0.6155876882462351, "grad_norm": 1.2887660264968872, "learning_rate": 2.3800889410654603e-06, "loss": 0.3477, "step": 10260 }, { "epoch": 0.615647687046259, "grad_norm": 1.2739042043685913, "learning_rate": 2.379444554549623e-06, "loss": 0.4047, "step": 10261 }, { "epoch": 0.6157076858462831, "grad_norm": 1.220259428024292, "learning_rate": 2.37880021035206e-06, "loss": 0.3744, "step": 10262 }, { "epoch": 0.615767684646307, "grad_norm": 1.4031051397323608, "learning_rate": 2.3781559084971047e-06, "loss": 0.395, "step": 10263 }, { "epoch": 0.6158276834463311, "grad_norm": 1.3669023513793945, "learning_rate": 2.3775116490090886e-06, "loss": 0.419, "step": 10264 }, { "epoch": 0.615887682246355, "grad_norm": 1.2998225688934326, "learning_rate": 2.376867431912345e-06, "loss": 0.3466, "step": 10265 }, { "epoch": 0.6159476810463791, "grad_norm": 1.2731044292449951, "learning_rate": 2.376223257231201e-06, "loss": 0.3902, "step": 10266 }, { "epoch": 0.616007679846403, "grad_norm": 1.265771746635437, "learning_rate": 2.375579124989985e-06, "loss": 0.3818, "step": 10267 }, { "epoch": 0.6160676786464271, "grad_norm": 1.3408113718032837, "learning_rate": 2.374935035213022e-06, "loss": 0.423, "step": 10268 }, { "epoch": 0.616127677446451, "grad_norm": 1.432759404182434, "learning_rate": 2.374290987924638e-06, "loss": 0.3801, "step": 10269 }, { "epoch": 0.6161876762464751, "grad_norm": 1.2398160696029663, "learning_rate": 2.3736469831491535e-06, "loss": 0.3621, "step": 10270 }, { "epoch": 0.616247675046499, "grad_norm": 1.4267557859420776, "learning_rate": 2.3730030209108903e-06, "loss": 0.3947, "step": 10271 }, { "epoch": 0.6163076738465231, "grad_norm": 1.3927851915359497, "learning_rate": 2.3723591012341704e-06, "loss": 0.3822, "step": 10272 }, { "epoch": 0.616367672646547, "grad_norm": 1.236844539642334, "learning_rate": 2.371715224143308e-06, "loss": 0.3714, "step": 10273 }, { "epoch": 0.6164276714465711, "grad_norm": 1.3537349700927734, "learning_rate": 2.371071389662622e-06, "loss": 0.4003, "step": 10274 }, { "epoch": 0.6164876702465951, "grad_norm": 1.2533867359161377, "learning_rate": 2.370427597816426e-06, "loss": 0.3749, "step": 10275 }, { "epoch": 0.6165476690466191, "grad_norm": 1.3131684064865112, "learning_rate": 2.3697838486290325e-06, "loss": 0.3908, "step": 10276 }, { "epoch": 0.6166076678466431, "grad_norm": 1.2932415008544922, "learning_rate": 2.3691401421247557e-06, "loss": 0.3961, "step": 10277 }, { "epoch": 0.6166676666466671, "grad_norm": 1.336118459701538, "learning_rate": 2.3684964783279013e-06, "loss": 0.3982, "step": 10278 }, { "epoch": 0.6167276654466911, "grad_norm": 1.2045966386795044, "learning_rate": 2.367852857262782e-06, "loss": 0.3189, "step": 10279 }, { "epoch": 0.616787664246715, "grad_norm": 1.2079119682312012, "learning_rate": 2.3672092789537006e-06, "loss": 0.3351, "step": 10280 }, { "epoch": 0.6168476630467391, "grad_norm": 1.2496920824050903, "learning_rate": 2.3665657434249656e-06, "loss": 0.4065, "step": 10281 }, { "epoch": 0.616907661846763, "grad_norm": 1.3368403911590576, "learning_rate": 2.365922250700876e-06, "loss": 0.4419, "step": 10282 }, { "epoch": 0.6169676606467871, "grad_norm": 1.2686539888381958, "learning_rate": 2.365278800805737e-06, "loss": 0.3514, "step": 10283 }, { "epoch": 0.617027659446811, "grad_norm": 1.2062033414840698, "learning_rate": 2.364635393763849e-06, "loss": 0.4016, "step": 10284 }, { "epoch": 0.6170876582468351, "grad_norm": 1.2009867429733276, "learning_rate": 2.3639920295995083e-06, "loss": 0.3718, "step": 10285 }, { "epoch": 0.617147657046859, "grad_norm": 1.774511694908142, "learning_rate": 2.363348708337014e-06, "loss": 0.4155, "step": 10286 }, { "epoch": 0.6172076558468831, "grad_norm": 1.163481593132019, "learning_rate": 2.3627054300006597e-06, "loss": 0.3563, "step": 10287 }, { "epoch": 0.617267654646907, "grad_norm": 1.3770794868469238, "learning_rate": 2.3620621946147398e-06, "loss": 0.4264, "step": 10288 }, { "epoch": 0.6173276534469311, "grad_norm": 1.2553967237472534, "learning_rate": 2.361419002203547e-06, "loss": 0.3998, "step": 10289 }, { "epoch": 0.617387652246955, "grad_norm": 1.3700988292694092, "learning_rate": 2.3607758527913706e-06, "loss": 0.4054, "step": 10290 }, { "epoch": 0.6174476510469791, "grad_norm": 1.2693506479263306, "learning_rate": 2.3601327464025004e-06, "loss": 0.443, "step": 10291 }, { "epoch": 0.617507649847003, "grad_norm": 1.276442289352417, "learning_rate": 2.3594896830612227e-06, "loss": 0.3591, "step": 10292 }, { "epoch": 0.6175676486470271, "grad_norm": 1.2999807596206665, "learning_rate": 2.358846662791824e-06, "loss": 0.3801, "step": 10293 }, { "epoch": 0.617627647447051, "grad_norm": 1.41158926486969, "learning_rate": 2.3582036856185867e-06, "loss": 0.387, "step": 10294 }, { "epoch": 0.6176876462470751, "grad_norm": 1.2454593181610107, "learning_rate": 2.3575607515657944e-06, "loss": 0.3979, "step": 10295 }, { "epoch": 0.617747645047099, "grad_norm": 1.2397055625915527, "learning_rate": 2.3569178606577294e-06, "loss": 0.3799, "step": 10296 }, { "epoch": 0.6178076438471231, "grad_norm": 1.2386904954910278, "learning_rate": 2.3562750129186663e-06, "loss": 0.3338, "step": 10297 }, { "epoch": 0.6178676426471471, "grad_norm": 1.3395105600357056, "learning_rate": 2.355632208372888e-06, "loss": 0.3747, "step": 10298 }, { "epoch": 0.617927641447171, "grad_norm": 1.3506214618682861, "learning_rate": 2.3549894470446643e-06, "loss": 0.3925, "step": 10299 }, { "epoch": 0.6179876402471951, "grad_norm": 1.3373781442642212, "learning_rate": 2.354346728958274e-06, "loss": 0.4136, "step": 10300 }, { "epoch": 0.618047639047219, "grad_norm": 1.318530797958374, "learning_rate": 2.353704054137989e-06, "loss": 0.4262, "step": 10301 }, { "epoch": 0.6181076378472431, "grad_norm": 1.23821222782135, "learning_rate": 2.353061422608078e-06, "loss": 0.3733, "step": 10302 }, { "epoch": 0.618167636647267, "grad_norm": 1.3041791915893555, "learning_rate": 2.3524188343928133e-06, "loss": 0.3895, "step": 10303 }, { "epoch": 0.6182276354472911, "grad_norm": 1.2134308815002441, "learning_rate": 2.3517762895164592e-06, "loss": 0.4282, "step": 10304 }, { "epoch": 0.618287634247315, "grad_norm": 1.3720077276229858, "learning_rate": 2.351133788003284e-06, "loss": 0.3787, "step": 10305 }, { "epoch": 0.6183476330473391, "grad_norm": 1.3206260204315186, "learning_rate": 2.350491329877552e-06, "loss": 0.3243, "step": 10306 }, { "epoch": 0.618407631847363, "grad_norm": 1.3778890371322632, "learning_rate": 2.3498489151635244e-06, "loss": 0.3428, "step": 10307 }, { "epoch": 0.6184676306473871, "grad_norm": 1.2162346839904785, "learning_rate": 2.349206543885464e-06, "loss": 0.3509, "step": 10308 }, { "epoch": 0.618527629447411, "grad_norm": 1.262178659439087, "learning_rate": 2.3485642160676284e-06, "loss": 0.3549, "step": 10309 }, { "epoch": 0.6185876282474351, "grad_norm": 1.1120458841323853, "learning_rate": 2.3479219317342777e-06, "loss": 0.3964, "step": 10310 }, { "epoch": 0.618647627047459, "grad_norm": 1.261841893196106, "learning_rate": 2.3472796909096653e-06, "loss": 0.343, "step": 10311 }, { "epoch": 0.6187076258474831, "grad_norm": 1.453540325164795, "learning_rate": 2.346637493618048e-06, "loss": 0.4281, "step": 10312 }, { "epoch": 0.618767624647507, "grad_norm": 1.3529859781265259, "learning_rate": 2.345995339883678e-06, "loss": 0.4209, "step": 10313 }, { "epoch": 0.6188276234475311, "grad_norm": 1.4165810346603394, "learning_rate": 2.3453532297308057e-06, "loss": 0.4472, "step": 10314 }, { "epoch": 0.618887622247555, "grad_norm": 1.275515079498291, "learning_rate": 2.3447111631836826e-06, "loss": 0.3642, "step": 10315 }, { "epoch": 0.618947621047579, "grad_norm": 1.2269481420516968, "learning_rate": 2.3440691402665545e-06, "loss": 0.4009, "step": 10316 }, { "epoch": 0.619007619847603, "grad_norm": 1.3049393892288208, "learning_rate": 2.343427161003668e-06, "loss": 0.3798, "step": 10317 }, { "epoch": 0.619067618647627, "grad_norm": 1.4038233757019043, "learning_rate": 2.3427852254192698e-06, "loss": 0.4035, "step": 10318 }, { "epoch": 0.6191276174476511, "grad_norm": 1.2062973976135254, "learning_rate": 2.3421433335376007e-06, "loss": 0.3578, "step": 10319 }, { "epoch": 0.619187616247675, "grad_norm": 1.3451457023620605, "learning_rate": 2.3415014853829027e-06, "loss": 0.4132, "step": 10320 }, { "epoch": 0.6192476150476991, "grad_norm": 1.3004870414733887, "learning_rate": 2.340859680979415e-06, "loss": 0.3872, "step": 10321 }, { "epoch": 0.619307613847723, "grad_norm": 1.2989102602005005, "learning_rate": 2.340217920351377e-06, "loss": 0.3987, "step": 10322 }, { "epoch": 0.6193676126477471, "grad_norm": 1.2479559183120728, "learning_rate": 2.3395762035230236e-06, "loss": 0.4028, "step": 10323 }, { "epoch": 0.619427611447771, "grad_norm": 1.2961145639419556, "learning_rate": 2.3389345305185894e-06, "loss": 0.3881, "step": 10324 }, { "epoch": 0.6194876102477951, "grad_norm": 1.195720911026001, "learning_rate": 2.33829290136231e-06, "loss": 0.3645, "step": 10325 }, { "epoch": 0.619547609047819, "grad_norm": 1.2285118103027344, "learning_rate": 2.3376513160784133e-06, "loss": 0.4227, "step": 10326 }, { "epoch": 0.6196076078478431, "grad_norm": 1.3401012420654297, "learning_rate": 2.3370097746911326e-06, "loss": 0.4222, "step": 10327 }, { "epoch": 0.619667606647867, "grad_norm": 1.303445816040039, "learning_rate": 2.336368277224692e-06, "loss": 0.4368, "step": 10328 }, { "epoch": 0.6197276054478911, "grad_norm": 1.2807371616363525, "learning_rate": 2.3357268237033206e-06, "loss": 0.4154, "step": 10329 }, { "epoch": 0.619787604247915, "grad_norm": 1.2992926836013794, "learning_rate": 2.335085414151244e-06, "loss": 0.4128, "step": 10330 }, { "epoch": 0.6198476030479391, "grad_norm": 1.2018581628799438, "learning_rate": 2.3344440485926827e-06, "loss": 0.3978, "step": 10331 }, { "epoch": 0.619907601847963, "grad_norm": 1.4972738027572632, "learning_rate": 2.33380272705186e-06, "loss": 0.374, "step": 10332 }, { "epoch": 0.6199676006479871, "grad_norm": 1.36042058467865, "learning_rate": 2.3331614495529945e-06, "loss": 0.3815, "step": 10333 }, { "epoch": 0.620027599448011, "grad_norm": 1.3679558038711548, "learning_rate": 2.332520216120306e-06, "loss": 0.3841, "step": 10334 }, { "epoch": 0.620087598248035, "grad_norm": 1.1870622634887695, "learning_rate": 2.331879026778009e-06, "loss": 0.4182, "step": 10335 }, { "epoch": 0.620147597048059, "grad_norm": 1.2345128059387207, "learning_rate": 2.3312378815503193e-06, "loss": 0.4099, "step": 10336 }, { "epoch": 0.620207595848083, "grad_norm": 1.3737715482711792, "learning_rate": 2.3305967804614508e-06, "loss": 0.4329, "step": 10337 }, { "epoch": 0.620267594648107, "grad_norm": 1.2812576293945312, "learning_rate": 2.329955723535613e-06, "loss": 0.3684, "step": 10338 }, { "epoch": 0.620327593448131, "grad_norm": 1.1800416707992554, "learning_rate": 2.3293147107970174e-06, "loss": 0.3538, "step": 10339 }, { "epoch": 0.620387592248155, "grad_norm": 1.2163257598876953, "learning_rate": 2.3286737422698716e-06, "loss": 0.4028, "step": 10340 }, { "epoch": 0.620447591048179, "grad_norm": 1.173958659172058, "learning_rate": 2.328032817978381e-06, "loss": 0.398, "step": 10341 }, { "epoch": 0.6205075898482031, "grad_norm": 1.3974426984786987, "learning_rate": 2.327391937946753e-06, "loss": 0.3833, "step": 10342 }, { "epoch": 0.620567588648227, "grad_norm": 1.2000148296356201, "learning_rate": 2.326751102199187e-06, "loss": 0.3723, "step": 10343 }, { "epoch": 0.6206275874482511, "grad_norm": 1.2157584428787231, "learning_rate": 2.3261103107598893e-06, "loss": 0.36, "step": 10344 }, { "epoch": 0.620687586248275, "grad_norm": 1.2442246675491333, "learning_rate": 2.3254695636530546e-06, "loss": 0.3703, "step": 10345 }, { "epoch": 0.6207475850482991, "grad_norm": 1.4314122200012207, "learning_rate": 2.324828860902884e-06, "loss": 0.4177, "step": 10346 }, { "epoch": 0.620807583848323, "grad_norm": 1.268203616142273, "learning_rate": 2.3241882025335743e-06, "loss": 0.3924, "step": 10347 }, { "epoch": 0.6208675826483471, "grad_norm": 1.4071182012557983, "learning_rate": 2.323547588569318e-06, "loss": 0.4578, "step": 10348 }, { "epoch": 0.620927581448371, "grad_norm": 1.2888277769088745, "learning_rate": 2.3229070190343103e-06, "loss": 0.3432, "step": 10349 }, { "epoch": 0.6209875802483951, "grad_norm": 1.3247853517532349, "learning_rate": 2.322266493952741e-06, "loss": 0.4069, "step": 10350 }, { "epoch": 0.621047579048419, "grad_norm": 1.3170289993286133, "learning_rate": 2.3216260133488007e-06, "loss": 0.4205, "step": 10351 }, { "epoch": 0.6211075778484431, "grad_norm": 1.2989081144332886, "learning_rate": 2.320985577246677e-06, "loss": 0.3773, "step": 10352 }, { "epoch": 0.621167576648467, "grad_norm": 1.2196208238601685, "learning_rate": 2.3203451856705565e-06, "loss": 0.379, "step": 10353 }, { "epoch": 0.621227575448491, "grad_norm": 1.4089800119400024, "learning_rate": 2.3197048386446247e-06, "loss": 0.3568, "step": 10354 }, { "epoch": 0.621287574248515, "grad_norm": 1.2968477010726929, "learning_rate": 2.3190645361930626e-06, "loss": 0.4012, "step": 10355 }, { "epoch": 0.621347573048539, "grad_norm": 1.25113046169281, "learning_rate": 2.3184242783400535e-06, "loss": 0.3791, "step": 10356 }, { "epoch": 0.621407571848563, "grad_norm": 1.2637953758239746, "learning_rate": 2.317784065109775e-06, "loss": 0.3725, "step": 10357 }, { "epoch": 0.621467570648587, "grad_norm": 1.2880090475082397, "learning_rate": 2.317143896526406e-06, "loss": 0.3801, "step": 10358 }, { "epoch": 0.621527569448611, "grad_norm": 1.398497223854065, "learning_rate": 2.316503772614125e-06, "loss": 0.4084, "step": 10359 }, { "epoch": 0.621587568248635, "grad_norm": 1.2556474208831787, "learning_rate": 2.3158636933971026e-06, "loss": 0.3788, "step": 10360 }, { "epoch": 0.621647567048659, "grad_norm": 1.451095700263977, "learning_rate": 2.315223658899515e-06, "loss": 0.3953, "step": 10361 }, { "epoch": 0.621707565848683, "grad_norm": 1.2741938829421997, "learning_rate": 2.314583669145531e-06, "loss": 0.3681, "step": 10362 }, { "epoch": 0.621767564648707, "grad_norm": 1.2337846755981445, "learning_rate": 2.313943724159322e-06, "loss": 0.3842, "step": 10363 }, { "epoch": 0.621827563448731, "grad_norm": 1.309970498085022, "learning_rate": 2.313303823965054e-06, "loss": 0.3872, "step": 10364 }, { "epoch": 0.6218875622487551, "grad_norm": 1.4805965423583984, "learning_rate": 2.312663968586894e-06, "loss": 0.4013, "step": 10365 }, { "epoch": 0.621947561048779, "grad_norm": 1.2705317735671997, "learning_rate": 2.312024158049007e-06, "loss": 0.4057, "step": 10366 }, { "epoch": 0.6220075598488031, "grad_norm": 1.406261682510376, "learning_rate": 2.3113843923755546e-06, "loss": 0.416, "step": 10367 }, { "epoch": 0.622067558648827, "grad_norm": 1.4045236110687256, "learning_rate": 2.3107446715906993e-06, "loss": 0.4621, "step": 10368 }, { "epoch": 0.6221275574488511, "grad_norm": 1.3441709280014038, "learning_rate": 2.3101049957185983e-06, "loss": 0.3644, "step": 10369 }, { "epoch": 0.622187556248875, "grad_norm": 1.567991852760315, "learning_rate": 2.3094653647834104e-06, "loss": 0.3654, "step": 10370 }, { "epoch": 0.6222475550488991, "grad_norm": 1.2673183679580688, "learning_rate": 2.3088257788092933e-06, "loss": 0.3929, "step": 10371 }, { "epoch": 0.622307553848923, "grad_norm": 1.387085199356079, "learning_rate": 2.3081862378203973e-06, "loss": 0.392, "step": 10372 }, { "epoch": 0.622367552648947, "grad_norm": 1.3362932205200195, "learning_rate": 2.3075467418408796e-06, "loss": 0.3867, "step": 10373 }, { "epoch": 0.622427551448971, "grad_norm": 1.4926550388336182, "learning_rate": 2.3069072908948864e-06, "loss": 0.3952, "step": 10374 }, { "epoch": 0.622487550248995, "grad_norm": 1.2757585048675537, "learning_rate": 2.3062678850065714e-06, "loss": 0.3559, "step": 10375 }, { "epoch": 0.622547549049019, "grad_norm": 1.3381973505020142, "learning_rate": 2.3056285242000774e-06, "loss": 0.3174, "step": 10376 }, { "epoch": 0.622607547849043, "grad_norm": 1.2906858921051025, "learning_rate": 2.3049892084995535e-06, "loss": 0.3908, "step": 10377 }, { "epoch": 0.622667546649067, "grad_norm": 1.3280092477798462, "learning_rate": 2.3043499379291434e-06, "loss": 0.4297, "step": 10378 }, { "epoch": 0.622727545449091, "grad_norm": 1.42340886592865, "learning_rate": 2.3037107125129877e-06, "loss": 0.4129, "step": 10379 }, { "epoch": 0.622787544249115, "grad_norm": 1.240283489227295, "learning_rate": 2.303071532275229e-06, "loss": 0.3839, "step": 10380 }, { "epoch": 0.622847543049139, "grad_norm": 1.1848646402359009, "learning_rate": 2.302432397240005e-06, "loss": 0.3158, "step": 10381 }, { "epoch": 0.622907541849163, "grad_norm": 1.3180581331253052, "learning_rate": 2.301793307431453e-06, "loss": 0.3785, "step": 10382 }, { "epoch": 0.622967540649187, "grad_norm": 1.2327845096588135, "learning_rate": 2.3011542628737094e-06, "loss": 0.3871, "step": 10383 }, { "epoch": 0.623027539449211, "grad_norm": 1.2454559803009033, "learning_rate": 2.3005152635909065e-06, "loss": 0.4209, "step": 10384 }, { "epoch": 0.623087538249235, "grad_norm": 1.2200164794921875, "learning_rate": 2.2998763096071783e-06, "loss": 0.3841, "step": 10385 }, { "epoch": 0.623147537049259, "grad_norm": 1.2240662574768066, "learning_rate": 2.299237400946653e-06, "loss": 0.3388, "step": 10386 }, { "epoch": 0.623207535849283, "grad_norm": 1.2829246520996094, "learning_rate": 2.2985985376334604e-06, "loss": 0.3954, "step": 10387 }, { "epoch": 0.6232675346493071, "grad_norm": 1.2394012212753296, "learning_rate": 2.297959719691729e-06, "loss": 0.4375, "step": 10388 }, { "epoch": 0.623327533449331, "grad_norm": 1.5319418907165527, "learning_rate": 2.2973209471455807e-06, "loss": 0.4151, "step": 10389 }, { "epoch": 0.6233875322493551, "grad_norm": 1.3511614799499512, "learning_rate": 2.2966822200191424e-06, "loss": 0.3615, "step": 10390 }, { "epoch": 0.623447531049379, "grad_norm": 1.3074184656143188, "learning_rate": 2.296043538336533e-06, "loss": 0.3713, "step": 10391 }, { "epoch": 0.623507529849403, "grad_norm": 1.4896291494369507, "learning_rate": 2.2954049021218744e-06, "loss": 0.424, "step": 10392 }, { "epoch": 0.623567528649427, "grad_norm": 1.3920457363128662, "learning_rate": 2.294766311399285e-06, "loss": 0.4573, "step": 10393 }, { "epoch": 0.623627527449451, "grad_norm": 1.4961216449737549, "learning_rate": 2.2941277661928803e-06, "loss": 0.4112, "step": 10394 }, { "epoch": 0.623687526249475, "grad_norm": 1.4605971574783325, "learning_rate": 2.293489266526776e-06, "loss": 0.3919, "step": 10395 }, { "epoch": 0.623747525049499, "grad_norm": 1.2708839178085327, "learning_rate": 2.2928508124250856e-06, "loss": 0.3638, "step": 10396 }, { "epoch": 0.623807523849523, "grad_norm": 1.3681483268737793, "learning_rate": 2.292212403911921e-06, "loss": 0.4024, "step": 10397 }, { "epoch": 0.623867522649547, "grad_norm": 1.349572777748108, "learning_rate": 2.29157404101139e-06, "loss": 0.4068, "step": 10398 }, { "epoch": 0.623927521449571, "grad_norm": 1.1526159048080444, "learning_rate": 2.290935723747602e-06, "loss": 0.4325, "step": 10399 }, { "epoch": 0.623987520249595, "grad_norm": 1.2598992586135864, "learning_rate": 2.290297452144664e-06, "loss": 0.4015, "step": 10400 }, { "epoch": 0.624047519049619, "grad_norm": 1.1092947721481323, "learning_rate": 2.2896592262266783e-06, "loss": 0.3537, "step": 10401 }, { "epoch": 0.624107517849643, "grad_norm": 1.3883733749389648, "learning_rate": 2.2890210460177516e-06, "loss": 0.3992, "step": 10402 }, { "epoch": 0.624167516649667, "grad_norm": 1.2852226495742798, "learning_rate": 2.2883829115419807e-06, "loss": 0.4032, "step": 10403 }, { "epoch": 0.624227515449691, "grad_norm": 1.338588833808899, "learning_rate": 2.2877448228234692e-06, "loss": 0.4197, "step": 10404 }, { "epoch": 0.624287514249715, "grad_norm": 1.3308738470077515, "learning_rate": 2.2871067798863105e-06, "loss": 0.4114, "step": 10405 }, { "epoch": 0.624347513049739, "grad_norm": 1.158850908279419, "learning_rate": 2.286468782754604e-06, "loss": 0.3874, "step": 10406 }, { "epoch": 0.624407511849763, "grad_norm": 1.3594233989715576, "learning_rate": 2.285830831452443e-06, "loss": 0.397, "step": 10407 }, { "epoch": 0.624467510649787, "grad_norm": 1.225309133529663, "learning_rate": 2.285192926003919e-06, "loss": 0.3801, "step": 10408 }, { "epoch": 0.6245275094498111, "grad_norm": 1.3317171335220337, "learning_rate": 2.2845550664331243e-06, "loss": 0.3922, "step": 10409 }, { "epoch": 0.624587508249835, "grad_norm": 1.2613111734390259, "learning_rate": 2.2839172527641467e-06, "loss": 0.4295, "step": 10410 }, { "epoch": 0.624647507049859, "grad_norm": 1.1435563564300537, "learning_rate": 2.2832794850210736e-06, "loss": 0.3752, "step": 10411 }, { "epoch": 0.624707505849883, "grad_norm": 1.3677536249160767, "learning_rate": 2.2826417632279916e-06, "loss": 0.3963, "step": 10412 }, { "epoch": 0.624767504649907, "grad_norm": 1.16765296459198, "learning_rate": 2.2820040874089835e-06, "loss": 0.3713, "step": 10413 }, { "epoch": 0.624827503449931, "grad_norm": 1.219396948814392, "learning_rate": 2.2813664575881327e-06, "loss": 0.3553, "step": 10414 }, { "epoch": 0.624887502249955, "grad_norm": 1.3487308025360107, "learning_rate": 2.2807288737895178e-06, "loss": 0.4091, "step": 10415 }, { "epoch": 0.624947501049979, "grad_norm": 1.2505258321762085, "learning_rate": 2.2800913360372173e-06, "loss": 0.3684, "step": 10416 }, { "epoch": 0.625007499850003, "grad_norm": 1.410094141960144, "learning_rate": 2.279453844355312e-06, "loss": 0.4143, "step": 10417 }, { "epoch": 0.625067498650027, "grad_norm": 1.3023030757904053, "learning_rate": 2.2788163987678714e-06, "loss": 0.4163, "step": 10418 }, { "epoch": 0.625127497450051, "grad_norm": 1.2806072235107422, "learning_rate": 2.2781789992989745e-06, "loss": 0.3857, "step": 10419 }, { "epoch": 0.625187496250075, "grad_norm": 1.359663963317871, "learning_rate": 2.277541645972688e-06, "loss": 0.3568, "step": 10420 }, { "epoch": 0.625247495050099, "grad_norm": 1.290508508682251, "learning_rate": 2.276904338813086e-06, "loss": 0.3899, "step": 10421 }, { "epoch": 0.625307493850123, "grad_norm": 1.3287134170532227, "learning_rate": 2.2762670778442323e-06, "loss": 0.3788, "step": 10422 }, { "epoch": 0.625367492650147, "grad_norm": 1.2871778011322021, "learning_rate": 2.275629863090197e-06, "loss": 0.3801, "step": 10423 }, { "epoch": 0.625427491450171, "grad_norm": 1.2511929273605347, "learning_rate": 2.2749926945750437e-06, "loss": 0.3917, "step": 10424 }, { "epoch": 0.625487490250195, "grad_norm": 1.1849796772003174, "learning_rate": 2.2743555723228347e-06, "loss": 0.3418, "step": 10425 }, { "epoch": 0.625547489050219, "grad_norm": 1.2983001470565796, "learning_rate": 2.2737184963576327e-06, "loss": 0.3462, "step": 10426 }, { "epoch": 0.625607487850243, "grad_norm": 1.3204091787338257, "learning_rate": 2.2730814667034954e-06, "loss": 0.3603, "step": 10427 }, { "epoch": 0.625667486650267, "grad_norm": 1.3588744401931763, "learning_rate": 2.272444483384481e-06, "loss": 0.3813, "step": 10428 }, { "epoch": 0.625727485450291, "grad_norm": 1.3396228551864624, "learning_rate": 2.271807546424646e-06, "loss": 0.3728, "step": 10429 }, { "epoch": 0.6257874842503149, "grad_norm": 1.2714987993240356, "learning_rate": 2.2711706558480442e-06, "loss": 0.3834, "step": 10430 }, { "epoch": 0.625847483050339, "grad_norm": 1.4140851497650146, "learning_rate": 2.2705338116787292e-06, "loss": 0.4308, "step": 10431 }, { "epoch": 0.625907481850363, "grad_norm": 1.2945148944854736, "learning_rate": 2.2698970139407497e-06, "loss": 0.3445, "step": 10432 }, { "epoch": 0.625967480650387, "grad_norm": 1.3888920545578003, "learning_rate": 2.269260262658156e-06, "loss": 0.388, "step": 10433 }, { "epoch": 0.626027479450411, "grad_norm": 1.2460678815841675, "learning_rate": 2.268623557854995e-06, "loss": 0.3414, "step": 10434 }, { "epoch": 0.626087478250435, "grad_norm": 1.4712285995483398, "learning_rate": 2.2679868995553107e-06, "loss": 0.4516, "step": 10435 }, { "epoch": 0.626147477050459, "grad_norm": 1.2090634107589722, "learning_rate": 2.2673502877831504e-06, "loss": 0.3364, "step": 10436 }, { "epoch": 0.626207475850483, "grad_norm": 1.2351326942443848, "learning_rate": 2.266713722562552e-06, "loss": 0.3898, "step": 10437 }, { "epoch": 0.626267474650507, "grad_norm": 1.152888536453247, "learning_rate": 2.266077203917559e-06, "loss": 0.3456, "step": 10438 }, { "epoch": 0.626327473450531, "grad_norm": 1.3208494186401367, "learning_rate": 2.2654407318722068e-06, "loss": 0.3578, "step": 10439 }, { "epoch": 0.626387472250555, "grad_norm": 1.1523487567901611, "learning_rate": 2.264804306450534e-06, "loss": 0.3627, "step": 10440 }, { "epoch": 0.626447471050579, "grad_norm": 1.2424951791763306, "learning_rate": 2.2641679276765757e-06, "loss": 0.3922, "step": 10441 }, { "epoch": 0.626507469850603, "grad_norm": 1.3152589797973633, "learning_rate": 2.2635315955743637e-06, "loss": 0.4207, "step": 10442 }, { "epoch": 0.626567468650627, "grad_norm": 1.3404476642608643, "learning_rate": 2.2628953101679303e-06, "loss": 0.4265, "step": 10443 }, { "epoch": 0.626627467450651, "grad_norm": 1.3859113454818726, "learning_rate": 2.2622590714813047e-06, "loss": 0.3915, "step": 10444 }, { "epoch": 0.626687466250675, "grad_norm": 1.4029942750930786, "learning_rate": 2.261622879538515e-06, "loss": 0.3781, "step": 10445 }, { "epoch": 0.626747465050699, "grad_norm": 1.3328473567962646, "learning_rate": 2.2609867343635868e-06, "loss": 0.3676, "step": 10446 }, { "epoch": 0.626807463850723, "grad_norm": 1.1794607639312744, "learning_rate": 2.2603506359805437e-06, "loss": 0.3746, "step": 10447 }, { "epoch": 0.626867462650747, "grad_norm": 1.2237893342971802, "learning_rate": 2.2597145844134115e-06, "loss": 0.36, "step": 10448 }, { "epoch": 0.6269274614507709, "grad_norm": 1.161859154701233, "learning_rate": 2.259078579686207e-06, "loss": 0.3913, "step": 10449 }, { "epoch": 0.626987460250795, "grad_norm": 1.4524922370910645, "learning_rate": 2.2584426218229524e-06, "loss": 0.4091, "step": 10450 }, { "epoch": 0.6270474590508189, "grad_norm": 1.328254222869873, "learning_rate": 2.2578067108476617e-06, "loss": 0.3539, "step": 10451 }, { "epoch": 0.627107457850843, "grad_norm": 1.512035608291626, "learning_rate": 2.2571708467843526e-06, "loss": 0.3878, "step": 10452 }, { "epoch": 0.6271674566508669, "grad_norm": 1.341001272201538, "learning_rate": 2.256535029657039e-06, "loss": 0.4189, "step": 10453 }, { "epoch": 0.627227455450891, "grad_norm": 1.3433446884155273, "learning_rate": 2.2558992594897314e-06, "loss": 0.3876, "step": 10454 }, { "epoch": 0.627287454250915, "grad_norm": 1.4966392517089844, "learning_rate": 2.255263536306442e-06, "loss": 0.4667, "step": 10455 }, { "epoch": 0.627347453050939, "grad_norm": 1.2583043575286865, "learning_rate": 2.2546278601311764e-06, "loss": 0.4055, "step": 10456 }, { "epoch": 0.627407451850963, "grad_norm": 1.3004753589630127, "learning_rate": 2.2539922309879427e-06, "loss": 0.3608, "step": 10457 }, { "epoch": 0.627467450650987, "grad_norm": 1.445676326751709, "learning_rate": 2.253356648900747e-06, "loss": 0.368, "step": 10458 }, { "epoch": 0.627527449451011, "grad_norm": 1.311833143234253, "learning_rate": 2.252721113893589e-06, "loss": 0.3575, "step": 10459 }, { "epoch": 0.627587448251035, "grad_norm": 1.3117395639419556, "learning_rate": 2.2520856259904736e-06, "loss": 0.4223, "step": 10460 }, { "epoch": 0.627647447051059, "grad_norm": 1.2943670749664307, "learning_rate": 2.251450185215398e-06, "loss": 0.402, "step": 10461 }, { "epoch": 0.627707445851083, "grad_norm": 1.2263717651367188, "learning_rate": 2.2508147915923603e-06, "loss": 0.4116, "step": 10462 }, { "epoch": 0.627767444651107, "grad_norm": 1.3416543006896973, "learning_rate": 2.2501794451453567e-06, "loss": 0.3571, "step": 10463 }, { "epoch": 0.627827443451131, "grad_norm": 1.2830619812011719, "learning_rate": 2.2495441458983807e-06, "loss": 0.3527, "step": 10464 }, { "epoch": 0.627887442251155, "grad_norm": 1.2068310976028442, "learning_rate": 2.248908893875427e-06, "loss": 0.3908, "step": 10465 }, { "epoch": 0.627947441051179, "grad_norm": 1.208456039428711, "learning_rate": 2.2482736891004826e-06, "loss": 0.3846, "step": 10466 }, { "epoch": 0.628007439851203, "grad_norm": 1.3007115125656128, "learning_rate": 2.2476385315975402e-06, "loss": 0.4116, "step": 10467 }, { "epoch": 0.6280674386512269, "grad_norm": 1.3680603504180908, "learning_rate": 2.2470034213905824e-06, "loss": 0.3769, "step": 10468 }, { "epoch": 0.628127437451251, "grad_norm": 1.2251275777816772, "learning_rate": 2.2463683585035975e-06, "loss": 0.3891, "step": 10469 }, { "epoch": 0.6281874362512749, "grad_norm": 1.3022323846817017, "learning_rate": 2.245733342960569e-06, "loss": 0.4025, "step": 10470 }, { "epoch": 0.628247435051299, "grad_norm": 1.3432230949401855, "learning_rate": 2.2450983747854767e-06, "loss": 0.3529, "step": 10471 }, { "epoch": 0.6283074338513229, "grad_norm": 1.202453851699829, "learning_rate": 2.2444634540023023e-06, "loss": 0.4164, "step": 10472 }, { "epoch": 0.628367432651347, "grad_norm": 1.3606021404266357, "learning_rate": 2.2438285806350222e-06, "loss": 0.3532, "step": 10473 }, { "epoch": 0.6284274314513709, "grad_norm": 1.152575969696045, "learning_rate": 2.2431937547076147e-06, "loss": 0.3859, "step": 10474 }, { "epoch": 0.628487430251395, "grad_norm": 1.4452811479568481, "learning_rate": 2.242558976244052e-06, "loss": 0.4152, "step": 10475 }, { "epoch": 0.628547429051419, "grad_norm": 1.2908536195755005, "learning_rate": 2.241924245268308e-06, "loss": 0.3849, "step": 10476 }, { "epoch": 0.628607427851443, "grad_norm": 1.3726025819778442, "learning_rate": 2.241289561804354e-06, "loss": 0.3904, "step": 10477 }, { "epoch": 0.628667426651467, "grad_norm": 1.313145399093628, "learning_rate": 2.240654925876158e-06, "loss": 0.3509, "step": 10478 }, { "epoch": 0.628727425451491, "grad_norm": 1.2686604261398315, "learning_rate": 2.240020337507689e-06, "loss": 0.3689, "step": 10479 }, { "epoch": 0.628787424251515, "grad_norm": 1.2099511623382568, "learning_rate": 2.2393857967229103e-06, "loss": 0.4308, "step": 10480 }, { "epoch": 0.628847423051539, "grad_norm": 1.2812937498092651, "learning_rate": 2.238751303545786e-06, "loss": 0.3917, "step": 10481 }, { "epoch": 0.628907421851563, "grad_norm": 1.2704557180404663, "learning_rate": 2.2381168580002805e-06, "loss": 0.3508, "step": 10482 }, { "epoch": 0.628967420651587, "grad_norm": 1.2031358480453491, "learning_rate": 2.2374824601103503e-06, "loss": 0.3798, "step": 10483 }, { "epoch": 0.629027419451611, "grad_norm": 1.160112738609314, "learning_rate": 2.236848109899957e-06, "loss": 0.3685, "step": 10484 }, { "epoch": 0.6290874182516349, "grad_norm": 1.316475510597229, "learning_rate": 2.236213807393055e-06, "loss": 0.3734, "step": 10485 }, { "epoch": 0.629147417051659, "grad_norm": 1.2863447666168213, "learning_rate": 2.235579552613601e-06, "loss": 0.4065, "step": 10486 }, { "epoch": 0.6292074158516829, "grad_norm": 1.2470468282699585, "learning_rate": 2.234945345585545e-06, "loss": 0.3828, "step": 10487 }, { "epoch": 0.629267414651707, "grad_norm": 1.247012734413147, "learning_rate": 2.23431118633284e-06, "loss": 0.3748, "step": 10488 }, { "epoch": 0.6293274134517309, "grad_norm": 1.26155686378479, "learning_rate": 2.2336770748794357e-06, "loss": 0.3962, "step": 10489 }, { "epoch": 0.629387412251755, "grad_norm": 1.2232651710510254, "learning_rate": 2.2330430112492783e-06, "loss": 0.3966, "step": 10490 }, { "epoch": 0.6294474110517789, "grad_norm": 1.2059049606323242, "learning_rate": 2.232408995466315e-06, "loss": 0.3713, "step": 10491 }, { "epoch": 0.629507409851803, "grad_norm": 1.2417303323745728, "learning_rate": 2.231775027554488e-06, "loss": 0.408, "step": 10492 }, { "epoch": 0.6295674086518269, "grad_norm": 1.2660194635391235, "learning_rate": 2.2311411075377396e-06, "loss": 0.3751, "step": 10493 }, { "epoch": 0.629627407451851, "grad_norm": 1.3110352754592896, "learning_rate": 2.2305072354400126e-06, "loss": 0.3773, "step": 10494 }, { "epoch": 0.6296874062518749, "grad_norm": 1.3188737630844116, "learning_rate": 2.2298734112852416e-06, "loss": 0.3678, "step": 10495 }, { "epoch": 0.629747405051899, "grad_norm": 1.114975929260254, "learning_rate": 2.229239635097367e-06, "loss": 0.3299, "step": 10496 }, { "epoch": 0.6298074038519229, "grad_norm": 1.3554883003234863, "learning_rate": 2.22860590690032e-06, "loss": 0.3795, "step": 10497 }, { "epoch": 0.629867402651947, "grad_norm": 1.3264505863189697, "learning_rate": 2.2279722267180357e-06, "loss": 0.3655, "step": 10498 }, { "epoch": 0.629927401451971, "grad_norm": 1.2473410367965698, "learning_rate": 2.227338594574446e-06, "loss": 0.3688, "step": 10499 }, { "epoch": 0.629987400251995, "grad_norm": 1.3780791759490967, "learning_rate": 2.2267050104934784e-06, "loss": 0.4085, "step": 10500 }, { "epoch": 0.630047399052019, "grad_norm": 1.3600839376449585, "learning_rate": 2.2260714744990627e-06, "loss": 0.3666, "step": 10501 }, { "epoch": 0.630107397852043, "grad_norm": 1.2414727210998535, "learning_rate": 2.2254379866151224e-06, "loss": 0.4095, "step": 10502 }, { "epoch": 0.630167396652067, "grad_norm": 1.324020504951477, "learning_rate": 2.224804546865583e-06, "loss": 0.3555, "step": 10503 }, { "epoch": 0.6302273954520909, "grad_norm": 1.2992050647735596, "learning_rate": 2.2241711552743657e-06, "loss": 0.4288, "step": 10504 }, { "epoch": 0.630287394252115, "grad_norm": 1.4770219326019287, "learning_rate": 2.223537811865391e-06, "loss": 0.4364, "step": 10505 }, { "epoch": 0.6303473930521389, "grad_norm": 1.4590489864349365, "learning_rate": 2.2229045166625786e-06, "loss": 0.4193, "step": 10506 }, { "epoch": 0.630407391852163, "grad_norm": 1.2577952146530151, "learning_rate": 2.2222712696898433e-06, "loss": 0.4175, "step": 10507 }, { "epoch": 0.6304673906521869, "grad_norm": 1.1991758346557617, "learning_rate": 2.221638070971101e-06, "loss": 0.388, "step": 10508 }, { "epoch": 0.630527389452211, "grad_norm": 1.1900405883789062, "learning_rate": 2.2210049205302647e-06, "loss": 0.3835, "step": 10509 }, { "epoch": 0.6305873882522349, "grad_norm": 1.1818119287490845, "learning_rate": 2.2203718183912445e-06, "loss": 0.352, "step": 10510 }, { "epoch": 0.630647387052259, "grad_norm": 1.3707720041275024, "learning_rate": 2.2197387645779523e-06, "loss": 0.3505, "step": 10511 }, { "epoch": 0.6307073858522829, "grad_norm": 1.3299739360809326, "learning_rate": 2.2191057591142926e-06, "loss": 0.3622, "step": 10512 }, { "epoch": 0.630767384652307, "grad_norm": 1.2985621690750122, "learning_rate": 2.218472802024174e-06, "loss": 0.3918, "step": 10513 }, { "epoch": 0.6308273834523309, "grad_norm": 1.2605681419372559, "learning_rate": 2.2178398933314977e-06, "loss": 0.3463, "step": 10514 }, { "epoch": 0.630887382252355, "grad_norm": 1.1910980939865112, "learning_rate": 2.2172070330601695e-06, "loss": 0.3676, "step": 10515 }, { "epoch": 0.6309473810523789, "grad_norm": 1.3121055364608765, "learning_rate": 2.216574221234085e-06, "loss": 0.4159, "step": 10516 }, { "epoch": 0.631007379852403, "grad_norm": 1.396746277809143, "learning_rate": 2.2159414578771447e-06, "loss": 0.3966, "step": 10517 }, { "epoch": 0.6310673786524269, "grad_norm": 1.3702890872955322, "learning_rate": 2.215308743013247e-06, "loss": 0.3708, "step": 10518 }, { "epoch": 0.631127377452451, "grad_norm": 1.3385331630706787, "learning_rate": 2.214676076666284e-06, "loss": 0.335, "step": 10519 }, { "epoch": 0.6311873762524749, "grad_norm": 1.2732082605361938, "learning_rate": 2.2140434588601503e-06, "loss": 0.4143, "step": 10520 }, { "epoch": 0.631247375052499, "grad_norm": 1.4380494356155396, "learning_rate": 2.213410889618736e-06, "loss": 0.4398, "step": 10521 }, { "epoch": 0.631307373852523, "grad_norm": 1.144508719444275, "learning_rate": 2.21277836896593e-06, "loss": 0.3541, "step": 10522 }, { "epoch": 0.6313673726525469, "grad_norm": 1.3491613864898682, "learning_rate": 2.2121458969256215e-06, "loss": 0.4396, "step": 10523 }, { "epoch": 0.631427371452571, "grad_norm": 1.3361446857452393, "learning_rate": 2.2115134735216948e-06, "loss": 0.3844, "step": 10524 }, { "epoch": 0.6314873702525949, "grad_norm": 1.2723102569580078, "learning_rate": 2.210881098778034e-06, "loss": 0.3549, "step": 10525 }, { "epoch": 0.631547369052619, "grad_norm": 1.215520977973938, "learning_rate": 2.21024877271852e-06, "loss": 0.3766, "step": 10526 }, { "epoch": 0.6316073678526429, "grad_norm": 1.18378746509552, "learning_rate": 2.209616495367035e-06, "loss": 0.386, "step": 10527 }, { "epoch": 0.631667366652667, "grad_norm": 1.2515286207199097, "learning_rate": 2.2089842667474543e-06, "loss": 0.4169, "step": 10528 }, { "epoch": 0.6317273654526909, "grad_norm": 1.4393508434295654, "learning_rate": 2.2083520868836573e-06, "loss": 0.3941, "step": 10529 }, { "epoch": 0.631787364252715, "grad_norm": 1.2135785818099976, "learning_rate": 2.207719955799517e-06, "loss": 0.3378, "step": 10530 }, { "epoch": 0.6318473630527389, "grad_norm": 1.4395091533660889, "learning_rate": 2.2070878735189063e-06, "loss": 0.4323, "step": 10531 }, { "epoch": 0.631907361852763, "grad_norm": 1.2522993087768555, "learning_rate": 2.2064558400656966e-06, "loss": 0.3565, "step": 10532 }, { "epoch": 0.6319673606527869, "grad_norm": 1.236921787261963, "learning_rate": 2.2058238554637556e-06, "loss": 0.362, "step": 10533 }, { "epoch": 0.632027359452811, "grad_norm": 1.3335938453674316, "learning_rate": 2.205191919736952e-06, "loss": 0.3982, "step": 10534 }, { "epoch": 0.6320873582528349, "grad_norm": 1.2003086805343628, "learning_rate": 2.2045600329091504e-06, "loss": 0.4047, "step": 10535 }, { "epoch": 0.632147357052859, "grad_norm": 1.3218432664871216, "learning_rate": 2.2039281950042137e-06, "loss": 0.3798, "step": 10536 }, { "epoch": 0.6322073558528829, "grad_norm": 1.3727589845657349, "learning_rate": 2.2032964060460054e-06, "loss": 0.4014, "step": 10537 }, { "epoch": 0.632267354652907, "grad_norm": 1.3205565214157104, "learning_rate": 2.202664666058383e-06, "loss": 0.4042, "step": 10538 }, { "epoch": 0.6323273534529309, "grad_norm": 1.2291224002838135, "learning_rate": 2.202032975065205e-06, "loss": 0.3703, "step": 10539 }, { "epoch": 0.632387352252955, "grad_norm": 1.197991132736206, "learning_rate": 2.20140133309033e-06, "loss": 0.3715, "step": 10540 }, { "epoch": 0.6324473510529789, "grad_norm": 1.3339145183563232, "learning_rate": 2.2007697401576084e-06, "loss": 0.4154, "step": 10541 }, { "epoch": 0.6325073498530029, "grad_norm": 1.2790179252624512, "learning_rate": 2.200138196290896e-06, "loss": 0.3692, "step": 10542 }, { "epoch": 0.6325673486530269, "grad_norm": 1.1320679187774658, "learning_rate": 2.19950670151404e-06, "loss": 0.3623, "step": 10543 }, { "epoch": 0.6326273474530509, "grad_norm": 1.235764503479004, "learning_rate": 2.198875255850893e-06, "loss": 0.3804, "step": 10544 }, { "epoch": 0.632687346253075, "grad_norm": 1.3491801023483276, "learning_rate": 2.1982438593252974e-06, "loss": 0.3769, "step": 10545 }, { "epoch": 0.6327473450530989, "grad_norm": 1.2776951789855957, "learning_rate": 2.1976125119611013e-06, "loss": 0.4213, "step": 10546 }, { "epoch": 0.632807343853123, "grad_norm": 1.404697060585022, "learning_rate": 2.1969812137821475e-06, "loss": 0.3945, "step": 10547 }, { "epoch": 0.6328673426531469, "grad_norm": 1.1939671039581299, "learning_rate": 2.1963499648122767e-06, "loss": 0.3414, "step": 10548 }, { "epoch": 0.632927341453171, "grad_norm": 1.3699960708618164, "learning_rate": 2.195718765075328e-06, "loss": 0.4295, "step": 10549 }, { "epoch": 0.6329873402531949, "grad_norm": 1.4470762014389038, "learning_rate": 2.195087614595139e-06, "loss": 0.4158, "step": 10550 }, { "epoch": 0.633047339053219, "grad_norm": 1.1666059494018555, "learning_rate": 2.1944565133955458e-06, "loss": 0.3463, "step": 10551 }, { "epoch": 0.6331073378532429, "grad_norm": 1.3397579193115234, "learning_rate": 2.1938254615003826e-06, "loss": 0.3833, "step": 10552 }, { "epoch": 0.633167336653267, "grad_norm": 1.281360149383545, "learning_rate": 2.19319445893348e-06, "loss": 0.4145, "step": 10553 }, { "epoch": 0.6332273354532909, "grad_norm": 1.3682844638824463, "learning_rate": 2.1925635057186694e-06, "loss": 0.4134, "step": 10554 }, { "epoch": 0.633287334253315, "grad_norm": 1.3842723369598389, "learning_rate": 2.191932601879778e-06, "loss": 0.4695, "step": 10555 }, { "epoch": 0.6333473330533389, "grad_norm": 1.2506659030914307, "learning_rate": 2.191301747440634e-06, "loss": 0.3284, "step": 10556 }, { "epoch": 0.633407331853363, "grad_norm": 1.3754304647445679, "learning_rate": 2.1906709424250592e-06, "loss": 0.4325, "step": 10557 }, { "epoch": 0.6334673306533869, "grad_norm": 1.4201643466949463, "learning_rate": 2.190040186856877e-06, "loss": 0.4285, "step": 10558 }, { "epoch": 0.633527329453411, "grad_norm": 1.2168859243392944, "learning_rate": 2.1894094807599108e-06, "loss": 0.3857, "step": 10559 }, { "epoch": 0.6335873282534349, "grad_norm": 1.1926873922348022, "learning_rate": 2.1887788241579754e-06, "loss": 0.3631, "step": 10560 }, { "epoch": 0.6336473270534589, "grad_norm": 1.4103381633758545, "learning_rate": 2.1881482170748923e-06, "loss": 0.3945, "step": 10561 }, { "epoch": 0.6337073258534829, "grad_norm": 1.290669322013855, "learning_rate": 2.187517659534472e-06, "loss": 0.4131, "step": 10562 }, { "epoch": 0.6337673246535069, "grad_norm": 1.2406035661697388, "learning_rate": 2.1868871515605307e-06, "loss": 0.3836, "step": 10563 }, { "epoch": 0.6338273234535309, "grad_norm": 1.372706413269043, "learning_rate": 2.18625669317688e-06, "loss": 0.4163, "step": 10564 }, { "epoch": 0.6338873222535549, "grad_norm": 1.3737930059432983, "learning_rate": 2.185626284407328e-06, "loss": 0.3589, "step": 10565 }, { "epoch": 0.633947321053579, "grad_norm": 1.3369230031967163, "learning_rate": 2.184995925275683e-06, "loss": 0.3428, "step": 10566 }, { "epoch": 0.6340073198536029, "grad_norm": 1.3351815938949585, "learning_rate": 2.1843656158057508e-06, "loss": 0.3927, "step": 10567 }, { "epoch": 0.634067318653627, "grad_norm": 1.311851143836975, "learning_rate": 2.1837353560213358e-06, "loss": 0.4088, "step": 10568 }, { "epoch": 0.6341273174536509, "grad_norm": 1.4080843925476074, "learning_rate": 2.1831051459462385e-06, "loss": 0.3964, "step": 10569 }, { "epoch": 0.634187316253675, "grad_norm": 1.4444117546081543, "learning_rate": 2.1824749856042602e-06, "loss": 0.366, "step": 10570 }, { "epoch": 0.6342473150536989, "grad_norm": 1.3847737312316895, "learning_rate": 2.1818448750192004e-06, "loss": 0.3942, "step": 10571 }, { "epoch": 0.634307313853723, "grad_norm": 1.3803046941757202, "learning_rate": 2.181214814214853e-06, "loss": 0.3679, "step": 10572 }, { "epoch": 0.6343673126537469, "grad_norm": 1.3034793138504028, "learning_rate": 2.1805848032150138e-06, "loss": 0.4033, "step": 10573 }, { "epoch": 0.634427311453771, "grad_norm": 1.324356198310852, "learning_rate": 2.1799548420434746e-06, "loss": 0.3662, "step": 10574 }, { "epoch": 0.6344873102537949, "grad_norm": 1.2040866613388062, "learning_rate": 2.1793249307240277e-06, "loss": 0.386, "step": 10575 }, { "epoch": 0.634547309053819, "grad_norm": 1.403220534324646, "learning_rate": 2.1786950692804613e-06, "loss": 0.4062, "step": 10576 }, { "epoch": 0.6346073078538429, "grad_norm": 1.3671561479568481, "learning_rate": 2.178065257736562e-06, "loss": 0.3844, "step": 10577 }, { "epoch": 0.634667306653867, "grad_norm": 1.29082190990448, "learning_rate": 2.177435496116116e-06, "loss": 0.3996, "step": 10578 }, { "epoch": 0.6347273054538909, "grad_norm": 1.5573954582214355, "learning_rate": 2.1768057844429045e-06, "loss": 0.3894, "step": 10579 }, { "epoch": 0.6347873042539149, "grad_norm": 1.2245140075683594, "learning_rate": 2.1761761227407104e-06, "loss": 0.4124, "step": 10580 }, { "epoch": 0.6348473030539389, "grad_norm": 1.269964575767517, "learning_rate": 2.175546511033314e-06, "loss": 0.4223, "step": 10581 }, { "epoch": 0.6349073018539629, "grad_norm": 1.426295518875122, "learning_rate": 2.1749169493444905e-06, "loss": 0.4088, "step": 10582 }, { "epoch": 0.6349673006539869, "grad_norm": 1.3510117530822754, "learning_rate": 2.1742874376980176e-06, "loss": 0.3461, "step": 10583 }, { "epoch": 0.6350272994540109, "grad_norm": 1.360669732093811, "learning_rate": 2.173657976117667e-06, "loss": 0.3811, "step": 10584 }, { "epoch": 0.6350872982540349, "grad_norm": 1.386742353439331, "learning_rate": 2.173028564627213e-06, "loss": 0.3494, "step": 10585 }, { "epoch": 0.6351472970540589, "grad_norm": 1.4968280792236328, "learning_rate": 2.172399203250424e-06, "loss": 0.4182, "step": 10586 }, { "epoch": 0.6352072958540829, "grad_norm": 1.2685070037841797, "learning_rate": 2.171769892011067e-06, "loss": 0.346, "step": 10587 }, { "epoch": 0.6352672946541069, "grad_norm": 1.352362036705017, "learning_rate": 2.1711406309329124e-06, "loss": 0.4204, "step": 10588 }, { "epoch": 0.635327293454131, "grad_norm": 1.169846773147583, "learning_rate": 2.17051142003972e-06, "loss": 0.3586, "step": 10589 }, { "epoch": 0.6353872922541549, "grad_norm": 1.3169044256210327, "learning_rate": 2.169882259355256e-06, "loss": 0.3251, "step": 10590 }, { "epoch": 0.635447291054179, "grad_norm": 1.1329256296157837, "learning_rate": 2.1692531489032772e-06, "loss": 0.3292, "step": 10591 }, { "epoch": 0.6355072898542029, "grad_norm": 1.1622966527938843, "learning_rate": 2.168624088707545e-06, "loss": 0.3249, "step": 10592 }, { "epoch": 0.635567288654227, "grad_norm": 1.353358268737793, "learning_rate": 2.1679950787918157e-06, "loss": 0.3979, "step": 10593 }, { "epoch": 0.6356272874542509, "grad_norm": 1.3529757261276245, "learning_rate": 2.1673661191798427e-06, "loss": 0.3441, "step": 10594 }, { "epoch": 0.635687286254275, "grad_norm": 1.31923508644104, "learning_rate": 2.1667372098953804e-06, "loss": 0.3746, "step": 10595 }, { "epoch": 0.6357472850542989, "grad_norm": 1.306377649307251, "learning_rate": 2.166108350962179e-06, "loss": 0.363, "step": 10596 }, { "epoch": 0.635807283854323, "grad_norm": 1.3691771030426025, "learning_rate": 2.165479542403989e-06, "loss": 0.3919, "step": 10597 }, { "epoch": 0.6358672826543469, "grad_norm": 1.2245333194732666, "learning_rate": 2.1648507842445554e-06, "loss": 0.3793, "step": 10598 }, { "epoch": 0.6359272814543709, "grad_norm": 1.4054780006408691, "learning_rate": 2.164222076507625e-06, "loss": 0.4199, "step": 10599 }, { "epoch": 0.6359872802543949, "grad_norm": 1.3277513980865479, "learning_rate": 2.163593419216942e-06, "loss": 0.431, "step": 10600 }, { "epoch": 0.6360472790544189, "grad_norm": 1.3811540603637695, "learning_rate": 2.1629648123962455e-06, "loss": 0.3745, "step": 10601 }, { "epoch": 0.6361072778544429, "grad_norm": 1.2420607805252075, "learning_rate": 2.162336256069278e-06, "loss": 0.4022, "step": 10602 }, { "epoch": 0.6361672766544669, "grad_norm": 1.4308146238327026, "learning_rate": 2.1617077502597744e-06, "loss": 0.4119, "step": 10603 }, { "epoch": 0.6362272754544909, "grad_norm": 1.3027002811431885, "learning_rate": 2.161079294991471e-06, "loss": 0.3723, "step": 10604 }, { "epoch": 0.6362872742545149, "grad_norm": 1.3328241109848022, "learning_rate": 2.1604508902881054e-06, "loss": 0.4003, "step": 10605 }, { "epoch": 0.6363472730545389, "grad_norm": 1.1085419654846191, "learning_rate": 2.1598225361734042e-06, "loss": 0.3509, "step": 10606 }, { "epoch": 0.6364072718545629, "grad_norm": 1.2713522911071777, "learning_rate": 2.159194232671102e-06, "loss": 0.372, "step": 10607 }, { "epoch": 0.6364672706545869, "grad_norm": 1.354716420173645, "learning_rate": 2.1585659798049226e-06, "loss": 0.3411, "step": 10608 }, { "epoch": 0.6365272694546109, "grad_norm": 1.301721215248108, "learning_rate": 2.1579377775985967e-06, "loss": 0.4187, "step": 10609 }, { "epoch": 0.6365872682546349, "grad_norm": 1.2981526851654053, "learning_rate": 2.157309626075845e-06, "loss": 0.3929, "step": 10610 }, { "epoch": 0.6366472670546589, "grad_norm": 1.253040075302124, "learning_rate": 2.1566815252603917e-06, "loss": 0.3992, "step": 10611 }, { "epoch": 0.636707265854683, "grad_norm": 1.3846490383148193, "learning_rate": 2.1560534751759583e-06, "loss": 0.3843, "step": 10612 }, { "epoch": 0.6367672646547069, "grad_norm": 1.2144829034805298, "learning_rate": 2.155425475846261e-06, "loss": 0.3925, "step": 10613 }, { "epoch": 0.636827263454731, "grad_norm": 1.4377411603927612, "learning_rate": 2.1547975272950183e-06, "loss": 0.3866, "step": 10614 }, { "epoch": 0.6368872622547549, "grad_norm": 1.3608369827270508, "learning_rate": 2.154169629545943e-06, "loss": 0.3773, "step": 10615 }, { "epoch": 0.636947261054779, "grad_norm": 1.2480911016464233, "learning_rate": 2.15354178262275e-06, "loss": 0.3703, "step": 10616 }, { "epoch": 0.6370072598548029, "grad_norm": 1.3920316696166992, "learning_rate": 2.1529139865491504e-06, "loss": 0.386, "step": 10617 }, { "epoch": 0.6370672586548269, "grad_norm": 1.2290774583816528, "learning_rate": 2.1522862413488506e-06, "loss": 0.4316, "step": 10618 }, { "epoch": 0.6371272574548509, "grad_norm": 1.3029115200042725, "learning_rate": 2.1516585470455604e-06, "loss": 0.3721, "step": 10619 }, { "epoch": 0.6371872562548749, "grad_norm": 1.2702269554138184, "learning_rate": 2.1510309036629822e-06, "loss": 0.4047, "step": 10620 }, { "epoch": 0.6372472550548989, "grad_norm": 1.2476303577423096, "learning_rate": 2.150403311224822e-06, "loss": 0.3869, "step": 10621 }, { "epoch": 0.6373072538549229, "grad_norm": 1.2430797815322876, "learning_rate": 2.1497757697547803e-06, "loss": 0.3615, "step": 10622 }, { "epoch": 0.6373672526549469, "grad_norm": 1.2154130935668945, "learning_rate": 2.149148279276556e-06, "loss": 0.3931, "step": 10623 }, { "epoch": 0.6374272514549709, "grad_norm": 1.3281807899475098, "learning_rate": 2.148520839813847e-06, "loss": 0.4202, "step": 10624 }, { "epoch": 0.6374872502549949, "grad_norm": 1.1979362964630127, "learning_rate": 2.1478934513903482e-06, "loss": 0.3524, "step": 10625 }, { "epoch": 0.6375472490550189, "grad_norm": 1.3005766868591309, "learning_rate": 2.1472661140297544e-06, "loss": 0.359, "step": 10626 }, { "epoch": 0.6376072478550429, "grad_norm": 1.3659212589263916, "learning_rate": 2.1466388277557552e-06, "loss": 0.3861, "step": 10627 }, { "epoch": 0.6376672466550669, "grad_norm": 1.2771514654159546, "learning_rate": 2.1460115925920417e-06, "loss": 0.4008, "step": 10628 }, { "epoch": 0.6377272454550909, "grad_norm": 1.2654038667678833, "learning_rate": 2.145384408562302e-06, "loss": 0.3918, "step": 10629 }, { "epoch": 0.6377872442551149, "grad_norm": 1.2422964572906494, "learning_rate": 2.1447572756902216e-06, "loss": 0.3407, "step": 10630 }, { "epoch": 0.6378472430551388, "grad_norm": 1.294189453125, "learning_rate": 2.144130193999485e-06, "loss": 0.418, "step": 10631 }, { "epoch": 0.6379072418551629, "grad_norm": 1.233272910118103, "learning_rate": 2.1435031635137725e-06, "loss": 0.4009, "step": 10632 }, { "epoch": 0.637967240655187, "grad_norm": 1.4422658681869507, "learning_rate": 2.1428761842567648e-06, "loss": 0.4064, "step": 10633 }, { "epoch": 0.6380272394552109, "grad_norm": 1.3578969240188599, "learning_rate": 2.1422492562521426e-06, "loss": 0.3765, "step": 10634 }, { "epoch": 0.638087238255235, "grad_norm": 1.3779665231704712, "learning_rate": 2.141622379523578e-06, "loss": 0.436, "step": 10635 }, { "epoch": 0.6381472370552589, "grad_norm": 1.263506531715393, "learning_rate": 2.1409955540947496e-06, "loss": 0.373, "step": 10636 }, { "epoch": 0.6382072358552829, "grad_norm": 1.3984906673431396, "learning_rate": 2.1403687799893254e-06, "loss": 0.4075, "step": 10637 }, { "epoch": 0.6382672346553069, "grad_norm": 1.3106013536453247, "learning_rate": 2.13974205723098e-06, "loss": 0.3833, "step": 10638 }, { "epoch": 0.6383272334553309, "grad_norm": 1.1604905128479004, "learning_rate": 2.1391153858433778e-06, "loss": 0.3675, "step": 10639 }, { "epoch": 0.6383872322553549, "grad_norm": 1.303244709968567, "learning_rate": 2.1384887658501875e-06, "loss": 0.4275, "step": 10640 }, { "epoch": 0.6384472310553789, "grad_norm": 1.4894206523895264, "learning_rate": 2.137862197275075e-06, "loss": 0.372, "step": 10641 }, { "epoch": 0.6385072298554029, "grad_norm": 1.30782949924469, "learning_rate": 2.1372356801417e-06, "loss": 0.3896, "step": 10642 }, { "epoch": 0.6385672286554269, "grad_norm": 1.4090449810028076, "learning_rate": 2.136609214473726e-06, "loss": 0.3901, "step": 10643 }, { "epoch": 0.6386272274554509, "grad_norm": 1.3224742412567139, "learning_rate": 2.1359828002948092e-06, "loss": 0.3768, "step": 10644 }, { "epoch": 0.6386872262554749, "grad_norm": 1.4893265962600708, "learning_rate": 2.1353564376286075e-06, "loss": 0.4081, "step": 10645 }, { "epoch": 0.6387472250554989, "grad_norm": 1.4031829833984375, "learning_rate": 2.134730126498777e-06, "loss": 0.4192, "step": 10646 }, { "epoch": 0.6388072238555229, "grad_norm": 1.247851848602295, "learning_rate": 2.1341038669289684e-06, "loss": 0.378, "step": 10647 }, { "epoch": 0.6388672226555469, "grad_norm": 1.3056186437606812, "learning_rate": 2.1334776589428345e-06, "loss": 0.3587, "step": 10648 }, { "epoch": 0.6389272214555709, "grad_norm": 1.2276084423065186, "learning_rate": 2.1328515025640227e-06, "loss": 0.3747, "step": 10649 }, { "epoch": 0.6389872202555948, "grad_norm": 1.2742762565612793, "learning_rate": 2.132225397816182e-06, "loss": 0.4179, "step": 10650 }, { "epoch": 0.6390472190556189, "grad_norm": 1.4635891914367676, "learning_rate": 2.1315993447229555e-06, "loss": 0.3789, "step": 10651 }, { "epoch": 0.6391072178556428, "grad_norm": 1.3805969953536987, "learning_rate": 2.1309733433079867e-06, "loss": 0.3963, "step": 10652 }, { "epoch": 0.6391672166556669, "grad_norm": 1.3228398561477661, "learning_rate": 2.13034739359492e-06, "loss": 0.4125, "step": 10653 }, { "epoch": 0.6392272154556908, "grad_norm": 1.2550244331359863, "learning_rate": 2.1297214956073894e-06, "loss": 0.3649, "step": 10654 }, { "epoch": 0.6392872142557149, "grad_norm": 1.395364761352539, "learning_rate": 2.129095649369036e-06, "loss": 0.4063, "step": 10655 }, { "epoch": 0.6393472130557389, "grad_norm": 1.2004634141921997, "learning_rate": 2.128469854903494e-06, "loss": 0.3727, "step": 10656 }, { "epoch": 0.6394072118557629, "grad_norm": 1.2096511125564575, "learning_rate": 2.1278441122343974e-06, "loss": 0.3862, "step": 10657 }, { "epoch": 0.6394672106557869, "grad_norm": 1.2919501066207886, "learning_rate": 2.127218421385377e-06, "loss": 0.3966, "step": 10658 }, { "epoch": 0.6395272094558109, "grad_norm": 1.2673074007034302, "learning_rate": 2.126592782380062e-06, "loss": 0.3812, "step": 10659 }, { "epoch": 0.6395872082558349, "grad_norm": 1.3547217845916748, "learning_rate": 2.125967195242081e-06, "loss": 0.4391, "step": 10660 }, { "epoch": 0.6396472070558589, "grad_norm": 1.3501267433166504, "learning_rate": 2.1253416599950586e-06, "loss": 0.4045, "step": 10661 }, { "epoch": 0.6397072058558829, "grad_norm": 1.2729417085647583, "learning_rate": 2.1247161766626186e-06, "loss": 0.3748, "step": 10662 }, { "epoch": 0.6397672046559069, "grad_norm": 1.3490103483200073, "learning_rate": 2.124090745268383e-06, "loss": 0.3051, "step": 10663 }, { "epoch": 0.6398272034559309, "grad_norm": 1.305942177772522, "learning_rate": 2.123465365835971e-06, "loss": 0.3994, "step": 10664 }, { "epoch": 0.6398872022559549, "grad_norm": 1.2239062786102295, "learning_rate": 2.122840038389002e-06, "loss": 0.3753, "step": 10665 }, { "epoch": 0.6399472010559789, "grad_norm": 1.2566561698913574, "learning_rate": 2.1222147629510886e-06, "loss": 0.4068, "step": 10666 }, { "epoch": 0.6400071998560029, "grad_norm": 1.2537728548049927, "learning_rate": 2.1215895395458484e-06, "loss": 0.3659, "step": 10667 }, { "epoch": 0.6400671986560269, "grad_norm": 1.1580915451049805, "learning_rate": 2.120964368196889e-06, "loss": 0.359, "step": 10668 }, { "epoch": 0.6401271974560508, "grad_norm": 1.285893201828003, "learning_rate": 2.1203392489278233e-06, "loss": 0.3671, "step": 10669 }, { "epoch": 0.6401871962560749, "grad_norm": 1.272079348564148, "learning_rate": 2.1197141817622592e-06, "loss": 0.3716, "step": 10670 }, { "epoch": 0.6402471950560988, "grad_norm": 1.3512979745864868, "learning_rate": 2.119089166723801e-06, "loss": 0.3749, "step": 10671 }, { "epoch": 0.6403071938561229, "grad_norm": 1.2398059368133545, "learning_rate": 2.118464203836055e-06, "loss": 0.3628, "step": 10672 }, { "epoch": 0.6403671926561468, "grad_norm": 1.2867521047592163, "learning_rate": 2.1178392931226207e-06, "loss": 0.3972, "step": 10673 }, { "epoch": 0.6404271914561709, "grad_norm": 1.4009679555892944, "learning_rate": 2.1172144346070985e-06, "loss": 0.4018, "step": 10674 }, { "epoch": 0.6404871902561948, "grad_norm": 1.2509403228759766, "learning_rate": 2.1165896283130886e-06, "loss": 0.3784, "step": 10675 }, { "epoch": 0.6405471890562189, "grad_norm": 1.3856415748596191, "learning_rate": 2.115964874264185e-06, "loss": 0.3942, "step": 10676 }, { "epoch": 0.6406071878562428, "grad_norm": 1.3744111061096191, "learning_rate": 2.115340172483983e-06, "loss": 0.4049, "step": 10677 }, { "epoch": 0.6406671866562669, "grad_norm": 1.1483219861984253, "learning_rate": 2.1147155229960735e-06, "loss": 0.3356, "step": 10678 }, { "epoch": 0.6407271854562909, "grad_norm": 1.3341339826583862, "learning_rate": 2.1140909258240477e-06, "loss": 0.4112, "step": 10679 }, { "epoch": 0.6407871842563149, "grad_norm": 1.2330461740493774, "learning_rate": 2.1134663809914933e-06, "loss": 0.4072, "step": 10680 }, { "epoch": 0.6408471830563389, "grad_norm": 1.2828786373138428, "learning_rate": 2.1128418885219953e-06, "loss": 0.3834, "step": 10681 }, { "epoch": 0.6409071818563629, "grad_norm": 1.3047574758529663, "learning_rate": 2.1122174484391417e-06, "loss": 0.3904, "step": 10682 }, { "epoch": 0.6409671806563869, "grad_norm": 1.2950303554534912, "learning_rate": 2.1115930607665102e-06, "loss": 0.3818, "step": 10683 }, { "epoch": 0.6410271794564109, "grad_norm": 1.3868900537490845, "learning_rate": 2.1109687255276856e-06, "loss": 0.3615, "step": 10684 }, { "epoch": 0.6410871782564349, "grad_norm": 1.395589828491211, "learning_rate": 2.1103444427462415e-06, "loss": 0.4325, "step": 10685 }, { "epoch": 0.6411471770564589, "grad_norm": 1.284706711769104, "learning_rate": 2.1097202124457574e-06, "loss": 0.3918, "step": 10686 }, { "epoch": 0.6412071758564829, "grad_norm": 1.309627890586853, "learning_rate": 2.1090960346498078e-06, "loss": 0.3591, "step": 10687 }, { "epoch": 0.6412671746565068, "grad_norm": 1.2741427421569824, "learning_rate": 2.1084719093819626e-06, "loss": 0.3808, "step": 10688 }, { "epoch": 0.6413271734565309, "grad_norm": 1.355820894241333, "learning_rate": 2.1078478366657944e-06, "loss": 0.4013, "step": 10689 }, { "epoch": 0.6413871722565548, "grad_norm": 1.3410662412643433, "learning_rate": 2.1072238165248707e-06, "loss": 0.3998, "step": 10690 }, { "epoch": 0.6414471710565789, "grad_norm": 1.197617530822754, "learning_rate": 2.106599848982758e-06, "loss": 0.3868, "step": 10691 }, { "epoch": 0.6415071698566028, "grad_norm": 1.3830962181091309, "learning_rate": 2.10597593406302e-06, "loss": 0.4314, "step": 10692 }, { "epoch": 0.6415671686566269, "grad_norm": 1.2449419498443604, "learning_rate": 2.10535207178922e-06, "loss": 0.3516, "step": 10693 }, { "epoch": 0.6416271674566508, "grad_norm": 1.1903131008148193, "learning_rate": 2.1047282621849193e-06, "loss": 0.3619, "step": 10694 }, { "epoch": 0.6416871662566749, "grad_norm": 1.329844355583191, "learning_rate": 2.1041045052736747e-06, "loss": 0.3851, "step": 10695 }, { "epoch": 0.6417471650566988, "grad_norm": 1.2693451642990112, "learning_rate": 2.103480801079043e-06, "loss": 0.3796, "step": 10696 }, { "epoch": 0.6418071638567229, "grad_norm": 1.3230924606323242, "learning_rate": 2.102857149624579e-06, "loss": 0.4197, "step": 10697 }, { "epoch": 0.6418671626567468, "grad_norm": 1.2664811611175537, "learning_rate": 2.102233550933834e-06, "loss": 0.356, "step": 10698 }, { "epoch": 0.6419271614567709, "grad_norm": 1.2619329690933228, "learning_rate": 2.101610005030362e-06, "loss": 0.3371, "step": 10699 }, { "epoch": 0.6419871602567948, "grad_norm": 1.3940558433532715, "learning_rate": 2.1009865119377063e-06, "loss": 0.3834, "step": 10700 }, { "epoch": 0.6420471590568189, "grad_norm": 1.3319653272628784, "learning_rate": 2.1003630716794185e-06, "loss": 0.3948, "step": 10701 }, { "epoch": 0.6421071578568429, "grad_norm": 1.2780824899673462, "learning_rate": 2.0997396842790393e-06, "loss": 0.3502, "step": 10702 }, { "epoch": 0.6421671566568669, "grad_norm": 1.2885662317276, "learning_rate": 2.0991163497601132e-06, "loss": 0.3972, "step": 10703 }, { "epoch": 0.6422271554568909, "grad_norm": 1.3067996501922607, "learning_rate": 2.0984930681461807e-06, "loss": 0.3743, "step": 10704 }, { "epoch": 0.6422871542569148, "grad_norm": 1.369059443473816, "learning_rate": 2.097869839460779e-06, "loss": 0.3627, "step": 10705 }, { "epoch": 0.6423471530569389, "grad_norm": 1.3736552000045776, "learning_rate": 2.097246663727446e-06, "loss": 0.3681, "step": 10706 }, { "epoch": 0.6424071518569628, "grad_norm": 1.2214206457138062, "learning_rate": 2.0966235409697145e-06, "loss": 0.3776, "step": 10707 }, { "epoch": 0.6424671506569869, "grad_norm": 1.2725435495376587, "learning_rate": 2.0960004712111196e-06, "loss": 0.3491, "step": 10708 }, { "epoch": 0.6425271494570108, "grad_norm": 1.2182079553604126, "learning_rate": 2.095377454475189e-06, "loss": 0.3861, "step": 10709 }, { "epoch": 0.6425871482570349, "grad_norm": 1.2792490720748901, "learning_rate": 2.094754490785452e-06, "loss": 0.3952, "step": 10710 }, { "epoch": 0.6426471470570588, "grad_norm": 1.421584963798523, "learning_rate": 2.094131580165438e-06, "loss": 0.3946, "step": 10711 }, { "epoch": 0.6427071458570829, "grad_norm": 1.384661316871643, "learning_rate": 2.0935087226386663e-06, "loss": 0.4202, "step": 10712 }, { "epoch": 0.6427671446571068, "grad_norm": 1.3309921026229858, "learning_rate": 2.092885918228665e-06, "loss": 0.3497, "step": 10713 }, { "epoch": 0.6428271434571309, "grad_norm": 1.1446003913879395, "learning_rate": 2.092263166958949e-06, "loss": 0.3588, "step": 10714 }, { "epoch": 0.6428871422571548, "grad_norm": 1.3872219324111938, "learning_rate": 2.091640468853041e-06, "loss": 0.41, "step": 10715 }, { "epoch": 0.6429471410571789, "grad_norm": 1.4299236536026, "learning_rate": 2.091017823934456e-06, "loss": 0.3652, "step": 10716 }, { "epoch": 0.6430071398572028, "grad_norm": 1.2145971059799194, "learning_rate": 2.0903952322267084e-06, "loss": 0.3618, "step": 10717 }, { "epoch": 0.6430671386572269, "grad_norm": 1.2505855560302734, "learning_rate": 2.0897726937533115e-06, "loss": 0.3811, "step": 10718 }, { "epoch": 0.6431271374572508, "grad_norm": 1.323587417602539, "learning_rate": 2.089150208537774e-06, "loss": 0.3925, "step": 10719 }, { "epoch": 0.6431871362572749, "grad_norm": 1.1608035564422607, "learning_rate": 2.088527776603606e-06, "loss": 0.3548, "step": 10720 }, { "epoch": 0.6432471350572988, "grad_norm": 1.3365545272827148, "learning_rate": 2.0879053979743123e-06, "loss": 0.401, "step": 10721 }, { "epoch": 0.6433071338573229, "grad_norm": 1.2028002738952637, "learning_rate": 2.0872830726733983e-06, "loss": 0.3785, "step": 10722 }, { "epoch": 0.6433671326573469, "grad_norm": 1.2571049928665161, "learning_rate": 2.086660800724367e-06, "loss": 0.3629, "step": 10723 }, { "epoch": 0.6434271314573708, "grad_norm": 1.382347822189331, "learning_rate": 2.0860385821507177e-06, "loss": 0.3696, "step": 10724 }, { "epoch": 0.6434871302573949, "grad_norm": 1.4812982082366943, "learning_rate": 2.0854164169759495e-06, "loss": 0.4213, "step": 10725 }, { "epoch": 0.6435471290574188, "grad_norm": 1.2985661029815674, "learning_rate": 2.084794305223558e-06, "loss": 0.3837, "step": 10726 }, { "epoch": 0.6436071278574429, "grad_norm": 1.4605262279510498, "learning_rate": 2.084172246917037e-06, "loss": 0.4293, "step": 10727 }, { "epoch": 0.6436671266574668, "grad_norm": 1.4180943965911865, "learning_rate": 2.0835502420798824e-06, "loss": 0.4352, "step": 10728 }, { "epoch": 0.6437271254574909, "grad_norm": 1.2342000007629395, "learning_rate": 2.0829282907355794e-06, "loss": 0.4043, "step": 10729 }, { "epoch": 0.6437871242575148, "grad_norm": 1.39474618434906, "learning_rate": 2.0823063929076202e-06, "loss": 0.3879, "step": 10730 }, { "epoch": 0.6438471230575389, "grad_norm": 1.21323561668396, "learning_rate": 2.081684548619488e-06, "loss": 0.3506, "step": 10731 }, { "epoch": 0.6439071218575628, "grad_norm": 1.3931249380111694, "learning_rate": 2.0810627578946707e-06, "loss": 0.4593, "step": 10732 }, { "epoch": 0.6439671206575869, "grad_norm": 1.3408149480819702, "learning_rate": 2.0804410207566462e-06, "loss": 0.3581, "step": 10733 }, { "epoch": 0.6440271194576108, "grad_norm": 1.252903699874878, "learning_rate": 2.0798193372288977e-06, "loss": 0.4083, "step": 10734 }, { "epoch": 0.6440871182576349, "grad_norm": 1.3007315397262573, "learning_rate": 2.0791977073349026e-06, "loss": 0.3912, "step": 10735 }, { "epoch": 0.6441471170576588, "grad_norm": 1.2453373670578003, "learning_rate": 2.078576131098137e-06, "loss": 0.3538, "step": 10736 }, { "epoch": 0.6442071158576829, "grad_norm": 1.321689248085022, "learning_rate": 2.077954608542075e-06, "loss": 0.3936, "step": 10737 }, { "epoch": 0.6442671146577068, "grad_norm": 1.1995570659637451, "learning_rate": 2.0773331396901883e-06, "loss": 0.373, "step": 10738 }, { "epoch": 0.6443271134577309, "grad_norm": 1.2828816175460815, "learning_rate": 2.076711724565947e-06, "loss": 0.3852, "step": 10739 }, { "epoch": 0.6443871122577548, "grad_norm": 1.2859668731689453, "learning_rate": 2.0760903631928205e-06, "loss": 0.3911, "step": 10740 }, { "epoch": 0.6444471110577789, "grad_norm": 1.1758393049240112, "learning_rate": 2.0754690555942728e-06, "loss": 0.3319, "step": 10741 }, { "epoch": 0.6445071098578028, "grad_norm": 1.1934176683425903, "learning_rate": 2.074847801793769e-06, "loss": 0.3806, "step": 10742 }, { "epoch": 0.6445671086578268, "grad_norm": 1.4095126390457153, "learning_rate": 2.074226601814771e-06, "loss": 0.3578, "step": 10743 }, { "epoch": 0.6446271074578508, "grad_norm": 1.328747272491455, "learning_rate": 2.0736054556807366e-06, "loss": 0.4002, "step": 10744 }, { "epoch": 0.6446871062578748, "grad_norm": 1.4652382135391235, "learning_rate": 2.0729843634151283e-06, "loss": 0.4137, "step": 10745 }, { "epoch": 0.6447471050578989, "grad_norm": 1.2556182146072388, "learning_rate": 2.0723633250413967e-06, "loss": 0.3429, "step": 10746 }, { "epoch": 0.6448071038579228, "grad_norm": 1.1666090488433838, "learning_rate": 2.071742340583e-06, "loss": 0.4044, "step": 10747 }, { "epoch": 0.6448671026579469, "grad_norm": 1.2312918901443481, "learning_rate": 2.0711214100633863e-06, "loss": 0.3866, "step": 10748 }, { "epoch": 0.6449271014579708, "grad_norm": 1.372987151145935, "learning_rate": 2.070500533506008e-06, "loss": 0.3953, "step": 10749 }, { "epoch": 0.6449871002579949, "grad_norm": 1.4221677780151367, "learning_rate": 2.069879710934312e-06, "loss": 0.4264, "step": 10750 }, { "epoch": 0.6450470990580188, "grad_norm": 1.3307380676269531, "learning_rate": 2.0692589423717426e-06, "loss": 0.458, "step": 10751 }, { "epoch": 0.6451070978580429, "grad_norm": 1.2959853410720825, "learning_rate": 2.0686382278417456e-06, "loss": 0.3988, "step": 10752 }, { "epoch": 0.6451670966580668, "grad_norm": 1.2286673784255981, "learning_rate": 2.068017567367761e-06, "loss": 0.3699, "step": 10753 }, { "epoch": 0.6452270954580909, "grad_norm": 1.4426544904708862, "learning_rate": 2.0673969609732293e-06, "loss": 0.4407, "step": 10754 }, { "epoch": 0.6452870942581148, "grad_norm": 1.2416985034942627, "learning_rate": 2.0667764086815865e-06, "loss": 0.3765, "step": 10755 }, { "epoch": 0.6453470930581389, "grad_norm": 1.2116005420684814, "learning_rate": 2.0661559105162684e-06, "loss": 0.4019, "step": 10756 }, { "epoch": 0.6454070918581628, "grad_norm": 1.3069920539855957, "learning_rate": 2.0655354665007117e-06, "loss": 0.3672, "step": 10757 }, { "epoch": 0.6454670906581869, "grad_norm": 1.2089072465896606, "learning_rate": 2.064915076658342e-06, "loss": 0.3584, "step": 10758 }, { "epoch": 0.6455270894582108, "grad_norm": 1.4658386707305908, "learning_rate": 2.0642947410125943e-06, "loss": 0.4038, "step": 10759 }, { "epoch": 0.6455870882582349, "grad_norm": 1.3801233768463135, "learning_rate": 2.063674459586891e-06, "loss": 0.396, "step": 10760 }, { "epoch": 0.6456470870582588, "grad_norm": 1.3272335529327393, "learning_rate": 2.0630542324046613e-06, "loss": 0.3461, "step": 10761 }, { "epoch": 0.6457070858582828, "grad_norm": 1.2907092571258545, "learning_rate": 2.0624340594893245e-06, "loss": 0.391, "step": 10762 }, { "epoch": 0.6457670846583068, "grad_norm": 1.282659649848938, "learning_rate": 2.061813940864305e-06, "loss": 0.4, "step": 10763 }, { "epoch": 0.6458270834583308, "grad_norm": 1.262487530708313, "learning_rate": 2.0611938765530207e-06, "loss": 0.3833, "step": 10764 }, { "epoch": 0.6458870822583548, "grad_norm": 1.2630691528320312, "learning_rate": 2.060573866578888e-06, "loss": 0.3827, "step": 10765 }, { "epoch": 0.6459470810583788, "grad_norm": 1.3493558168411255, "learning_rate": 2.059953910965323e-06, "loss": 0.3594, "step": 10766 }, { "epoch": 0.6460070798584028, "grad_norm": 1.1679171323776245, "learning_rate": 2.0593340097357374e-06, "loss": 0.3469, "step": 10767 }, { "epoch": 0.6460670786584268, "grad_norm": 1.300149917602539, "learning_rate": 2.0587141629135426e-06, "loss": 0.3773, "step": 10768 }, { "epoch": 0.6461270774584509, "grad_norm": 1.2446098327636719, "learning_rate": 2.058094370522148e-06, "loss": 0.3718, "step": 10769 }, { "epoch": 0.6461870762584748, "grad_norm": 1.2385659217834473, "learning_rate": 2.0574746325849585e-06, "loss": 0.4058, "step": 10770 }, { "epoch": 0.6462470750584989, "grad_norm": 1.2841346263885498, "learning_rate": 2.0568549491253813e-06, "loss": 0.3673, "step": 10771 }, { "epoch": 0.6463070738585228, "grad_norm": 1.4636436700820923, "learning_rate": 2.056235320166817e-06, "loss": 0.3971, "step": 10772 }, { "epoch": 0.6463670726585469, "grad_norm": 1.2457401752471924, "learning_rate": 2.0556157457326675e-06, "loss": 0.375, "step": 10773 }, { "epoch": 0.6464270714585708, "grad_norm": 1.1790924072265625, "learning_rate": 2.05499622584633e-06, "loss": 0.3516, "step": 10774 }, { "epoch": 0.6464870702585949, "grad_norm": 1.1989738941192627, "learning_rate": 2.0543767605312004e-06, "loss": 0.331, "step": 10775 }, { "epoch": 0.6465470690586188, "grad_norm": 1.412000298500061, "learning_rate": 2.0537573498106773e-06, "loss": 0.4023, "step": 10776 }, { "epoch": 0.6466070678586429, "grad_norm": 1.2658209800720215, "learning_rate": 2.0531379937081474e-06, "loss": 0.3779, "step": 10777 }, { "epoch": 0.6466670666586668, "grad_norm": 1.2017021179199219, "learning_rate": 2.0525186922470057e-06, "loss": 0.3941, "step": 10778 }, { "epoch": 0.6467270654586909, "grad_norm": 1.2594578266143799, "learning_rate": 2.0518994454506366e-06, "loss": 0.3976, "step": 10779 }, { "epoch": 0.6467870642587148, "grad_norm": 1.3284568786621094, "learning_rate": 2.051280253342428e-06, "loss": 0.3549, "step": 10780 }, { "epoch": 0.6468470630587388, "grad_norm": 1.3132297992706299, "learning_rate": 2.0506611159457654e-06, "loss": 0.3387, "step": 10781 }, { "epoch": 0.6469070618587628, "grad_norm": 1.3322482109069824, "learning_rate": 2.0500420332840277e-06, "loss": 0.3367, "step": 10782 }, { "epoch": 0.6469670606587868, "grad_norm": 1.4190186262130737, "learning_rate": 2.049423005380597e-06, "loss": 0.4314, "step": 10783 }, { "epoch": 0.6470270594588108, "grad_norm": 1.2719026803970337, "learning_rate": 2.048804032258851e-06, "loss": 0.3906, "step": 10784 }, { "epoch": 0.6470870582588348, "grad_norm": 1.356459140777588, "learning_rate": 2.0481851139421643e-06, "loss": 0.4504, "step": 10785 }, { "epoch": 0.6471470570588588, "grad_norm": 1.3724472522735596, "learning_rate": 2.0475662504539125e-06, "loss": 0.4003, "step": 10786 }, { "epoch": 0.6472070558588828, "grad_norm": 1.2473338842391968, "learning_rate": 2.0469474418174646e-06, "loss": 0.3546, "step": 10787 }, { "epoch": 0.6472670546589068, "grad_norm": 1.2532789707183838, "learning_rate": 2.046328688056193e-06, "loss": 0.3419, "step": 10788 }, { "epoch": 0.6473270534589308, "grad_norm": 1.2184427976608276, "learning_rate": 2.045709989193463e-06, "loss": 0.3681, "step": 10789 }, { "epoch": 0.6473870522589549, "grad_norm": 1.2135827541351318, "learning_rate": 2.045091345252642e-06, "loss": 0.3158, "step": 10790 }, { "epoch": 0.6474470510589788, "grad_norm": 1.4104859828948975, "learning_rate": 2.044472756257091e-06, "loss": 0.4442, "step": 10791 }, { "epoch": 0.6475070498590029, "grad_norm": 1.2640255689620972, "learning_rate": 2.0438542222301734e-06, "loss": 0.3711, "step": 10792 }, { "epoch": 0.6475670486590268, "grad_norm": 1.264126181602478, "learning_rate": 2.0432357431952482e-06, "loss": 0.3912, "step": 10793 }, { "epoch": 0.6476270474590509, "grad_norm": 1.2749998569488525, "learning_rate": 2.042617319175672e-06, "loss": 0.4052, "step": 10794 }, { "epoch": 0.6476870462590748, "grad_norm": 1.4608745574951172, "learning_rate": 2.0419989501947997e-06, "loss": 0.3889, "step": 10795 }, { "epoch": 0.6477470450590989, "grad_norm": 1.3117921352386475, "learning_rate": 2.0413806362759834e-06, "loss": 0.3443, "step": 10796 }, { "epoch": 0.6478070438591228, "grad_norm": 1.1712408065795898, "learning_rate": 2.0407623774425745e-06, "loss": 0.3726, "step": 10797 }, { "epoch": 0.6478670426591469, "grad_norm": 1.377000331878662, "learning_rate": 2.040144173717925e-06, "loss": 0.3688, "step": 10798 }, { "epoch": 0.6479270414591708, "grad_norm": 1.3684157133102417, "learning_rate": 2.0395260251253765e-06, "loss": 0.4131, "step": 10799 }, { "epoch": 0.6479870402591948, "grad_norm": 1.2731568813323975, "learning_rate": 2.038907931688277e-06, "loss": 0.4099, "step": 10800 }, { "epoch": 0.6480470390592188, "grad_norm": 1.2341563701629639, "learning_rate": 2.0382898934299676e-06, "loss": 0.4088, "step": 10801 }, { "epoch": 0.6481070378592428, "grad_norm": 1.263900876045227, "learning_rate": 2.037671910373792e-06, "loss": 0.391, "step": 10802 }, { "epoch": 0.6481670366592668, "grad_norm": 1.286760687828064, "learning_rate": 2.037053982543083e-06, "loss": 0.353, "step": 10803 }, { "epoch": 0.6482270354592908, "grad_norm": 1.1428911685943604, "learning_rate": 2.03643610996118e-06, "loss": 0.3548, "step": 10804 }, { "epoch": 0.6482870342593148, "grad_norm": 1.3573458194732666, "learning_rate": 2.035818292651419e-06, "loss": 0.3631, "step": 10805 }, { "epoch": 0.6483470330593388, "grad_norm": 1.3617339134216309, "learning_rate": 2.0352005306371297e-06, "loss": 0.3936, "step": 10806 }, { "epoch": 0.6484070318593628, "grad_norm": 1.3226556777954102, "learning_rate": 2.0345828239416427e-06, "loss": 0.3816, "step": 10807 }, { "epoch": 0.6484670306593868, "grad_norm": 1.4019445180892944, "learning_rate": 2.0339651725882854e-06, "loss": 0.3977, "step": 10808 }, { "epoch": 0.6485270294594108, "grad_norm": 1.3563511371612549, "learning_rate": 2.0333475766003844e-06, "loss": 0.3837, "step": 10809 }, { "epoch": 0.6485870282594348, "grad_norm": 1.2625184059143066, "learning_rate": 2.0327300360012643e-06, "loss": 0.3151, "step": 10810 }, { "epoch": 0.6486470270594588, "grad_norm": 1.4603028297424316, "learning_rate": 2.0321125508142443e-06, "loss": 0.381, "step": 10811 }, { "epoch": 0.6487070258594828, "grad_norm": 1.1876856088638306, "learning_rate": 2.031495121062647e-06, "loss": 0.3428, "step": 10812 }, { "epoch": 0.6487670246595069, "grad_norm": 1.4121044874191284, "learning_rate": 2.030877746769788e-06, "loss": 0.3792, "step": 10813 }, { "epoch": 0.6488270234595308, "grad_norm": 1.292050838470459, "learning_rate": 2.0302604279589823e-06, "loss": 0.4099, "step": 10814 }, { "epoch": 0.6488870222595549, "grad_norm": 1.313799262046814, "learning_rate": 2.0296431646535455e-06, "loss": 0.3847, "step": 10815 }, { "epoch": 0.6489470210595788, "grad_norm": 1.4136093854904175, "learning_rate": 2.029025956876786e-06, "loss": 0.3887, "step": 10816 }, { "epoch": 0.6490070198596029, "grad_norm": 1.2352638244628906, "learning_rate": 2.028408804652016e-06, "loss": 0.3594, "step": 10817 }, { "epoch": 0.6490670186596268, "grad_norm": 1.1931660175323486, "learning_rate": 2.0277917080025405e-06, "loss": 0.3782, "step": 10818 }, { "epoch": 0.6491270174596508, "grad_norm": 1.1474281549453735, "learning_rate": 2.0271746669516653e-06, "loss": 0.3956, "step": 10819 }, { "epoch": 0.6491870162596748, "grad_norm": 1.3798668384552002, "learning_rate": 2.026557681522691e-06, "loss": 0.3596, "step": 10820 }, { "epoch": 0.6492470150596988, "grad_norm": 1.486830472946167, "learning_rate": 2.025940751738921e-06, "loss": 0.3768, "step": 10821 }, { "epoch": 0.6493070138597228, "grad_norm": 1.4151742458343506, "learning_rate": 2.0253238776236534e-06, "loss": 0.4218, "step": 10822 }, { "epoch": 0.6493670126597468, "grad_norm": 1.201846957206726, "learning_rate": 2.0247070592001837e-06, "loss": 0.3635, "step": 10823 }, { "epoch": 0.6494270114597708, "grad_norm": 1.555564045906067, "learning_rate": 2.0240902964918074e-06, "loss": 0.3859, "step": 10824 }, { "epoch": 0.6494870102597948, "grad_norm": 1.4531142711639404, "learning_rate": 2.0234735895218168e-06, "loss": 0.4077, "step": 10825 }, { "epoch": 0.6495470090598188, "grad_norm": 1.2399989366531372, "learning_rate": 2.0228569383135e-06, "loss": 0.3343, "step": 10826 }, { "epoch": 0.6496070078598428, "grad_norm": 1.3358941078186035, "learning_rate": 2.0222403428901485e-06, "loss": 0.4315, "step": 10827 }, { "epoch": 0.6496670066598668, "grad_norm": 1.2762855291366577, "learning_rate": 2.021623803275045e-06, "loss": 0.3765, "step": 10828 }, { "epoch": 0.6497270054598908, "grad_norm": 1.241624116897583, "learning_rate": 2.021007319491478e-06, "loss": 0.4111, "step": 10829 }, { "epoch": 0.6497870042599148, "grad_norm": 1.3872851133346558, "learning_rate": 2.0203908915627232e-06, "loss": 0.3992, "step": 10830 }, { "epoch": 0.6498470030599388, "grad_norm": 1.2885239124298096, "learning_rate": 2.0197745195120646e-06, "loss": 0.4107, "step": 10831 }, { "epoch": 0.6499070018599628, "grad_norm": 1.2856042385101318, "learning_rate": 2.019158203362777e-06, "loss": 0.4115, "step": 10832 }, { "epoch": 0.6499670006599868, "grad_norm": 1.3809242248535156, "learning_rate": 2.0185419431381397e-06, "loss": 0.435, "step": 10833 }, { "epoch": 0.6500269994600107, "grad_norm": 1.2498202323913574, "learning_rate": 2.017925738861423e-06, "loss": 0.3837, "step": 10834 }, { "epoch": 0.6500869982600348, "grad_norm": 1.1968910694122314, "learning_rate": 2.017309590555897e-06, "loss": 0.3463, "step": 10835 }, { "epoch": 0.6501469970600589, "grad_norm": 1.2160007953643799, "learning_rate": 2.0166934982448353e-06, "loss": 0.3715, "step": 10836 }, { "epoch": 0.6502069958600828, "grad_norm": 1.2198330163955688, "learning_rate": 2.0160774619515013e-06, "loss": 0.3785, "step": 10837 }, { "epoch": 0.6502669946601068, "grad_norm": 1.2984497547149658, "learning_rate": 2.0154614816991596e-06, "loss": 0.3649, "step": 10838 }, { "epoch": 0.6503269934601308, "grad_norm": 1.3259891271591187, "learning_rate": 2.0148455575110757e-06, "loss": 0.4099, "step": 10839 }, { "epoch": 0.6503869922601548, "grad_norm": 1.3171451091766357, "learning_rate": 2.014229689410509e-06, "loss": 0.3867, "step": 10840 }, { "epoch": 0.6504469910601788, "grad_norm": 1.2626522779464722, "learning_rate": 2.013613877420717e-06, "loss": 0.3938, "step": 10841 }, { "epoch": 0.6505069898602028, "grad_norm": 1.2804958820343018, "learning_rate": 2.0129981215649565e-06, "loss": 0.4493, "step": 10842 }, { "epoch": 0.6505669886602268, "grad_norm": 1.424817681312561, "learning_rate": 2.012382421866483e-06, "loss": 0.3912, "step": 10843 }, { "epoch": 0.6506269874602508, "grad_norm": 1.2136621475219727, "learning_rate": 2.011766778348547e-06, "loss": 0.3787, "step": 10844 }, { "epoch": 0.6506869862602748, "grad_norm": 1.2864419221878052, "learning_rate": 2.0111511910343993e-06, "loss": 0.4024, "step": 10845 }, { "epoch": 0.6507469850602988, "grad_norm": 1.231139063835144, "learning_rate": 2.010535659947289e-06, "loss": 0.3573, "step": 10846 }, { "epoch": 0.6508069838603228, "grad_norm": 1.2444820404052734, "learning_rate": 2.0099201851104597e-06, "loss": 0.3676, "step": 10847 }, { "epoch": 0.6508669826603468, "grad_norm": 1.2585769891738892, "learning_rate": 2.0093047665471584e-06, "loss": 0.3923, "step": 10848 }, { "epoch": 0.6509269814603708, "grad_norm": 1.4400579929351807, "learning_rate": 2.008689404280622e-06, "loss": 0.3785, "step": 10849 }, { "epoch": 0.6509869802603948, "grad_norm": 1.440271019935608, "learning_rate": 2.008074098334093e-06, "loss": 0.3554, "step": 10850 }, { "epoch": 0.6510469790604188, "grad_norm": 1.7146981954574585, "learning_rate": 2.0074588487308087e-06, "loss": 0.3708, "step": 10851 }, { "epoch": 0.6511069778604428, "grad_norm": 1.3604286909103394, "learning_rate": 2.0068436554940037e-06, "loss": 0.3393, "step": 10852 }, { "epoch": 0.6511669766604667, "grad_norm": 1.2830731868743896, "learning_rate": 2.006228518646911e-06, "loss": 0.3745, "step": 10853 }, { "epoch": 0.6512269754604908, "grad_norm": 1.2842968702316284, "learning_rate": 2.005613438212761e-06, "loss": 0.3481, "step": 10854 }, { "epoch": 0.6512869742605147, "grad_norm": 1.3142861127853394, "learning_rate": 2.0049984142147832e-06, "loss": 0.3731, "step": 10855 }, { "epoch": 0.6513469730605388, "grad_norm": 1.2751624584197998, "learning_rate": 2.0043834466762047e-06, "loss": 0.3805, "step": 10856 }, { "epoch": 0.6514069718605627, "grad_norm": 1.338315486907959, "learning_rate": 2.003768535620248e-06, "loss": 0.4156, "step": 10857 }, { "epoch": 0.6514669706605868, "grad_norm": 1.4019651412963867, "learning_rate": 2.0031536810701383e-06, "loss": 0.4329, "step": 10858 }, { "epoch": 0.6515269694606108, "grad_norm": 1.3951956033706665, "learning_rate": 2.0025388830490928e-06, "loss": 0.3881, "step": 10859 }, { "epoch": 0.6515869682606348, "grad_norm": 1.4257782697677612, "learning_rate": 2.0019241415803345e-06, "loss": 0.4329, "step": 10860 }, { "epoch": 0.6516469670606588, "grad_norm": 1.4027183055877686, "learning_rate": 2.0013094566870735e-06, "loss": 0.4168, "step": 10861 }, { "epoch": 0.6517069658606828, "grad_norm": 1.413388729095459, "learning_rate": 2.0006948283925257e-06, "loss": 0.4305, "step": 10862 }, { "epoch": 0.6517669646607068, "grad_norm": 1.3725790977478027, "learning_rate": 2.000080256719905e-06, "loss": 0.3779, "step": 10863 }, { "epoch": 0.6518269634607308, "grad_norm": 1.3474987745285034, "learning_rate": 1.9994657416924196e-06, "loss": 0.3708, "step": 10864 }, { "epoch": 0.6518869622607548, "grad_norm": 1.2648950815200806, "learning_rate": 1.998851283333277e-06, "loss": 0.3879, "step": 10865 }, { "epoch": 0.6519469610607788, "grad_norm": 1.2990957498550415, "learning_rate": 1.9982368816656803e-06, "loss": 0.4027, "step": 10866 }, { "epoch": 0.6520069598608028, "grad_norm": 1.3341710567474365, "learning_rate": 1.9976225367128356e-06, "loss": 0.3674, "step": 10867 }, { "epoch": 0.6520669586608268, "grad_norm": 1.3232570886611938, "learning_rate": 1.9970082484979435e-06, "loss": 0.3856, "step": 10868 }, { "epoch": 0.6521269574608508, "grad_norm": 1.251315712928772, "learning_rate": 1.9963940170442006e-06, "loss": 0.3621, "step": 10869 }, { "epoch": 0.6521869562608748, "grad_norm": 1.2907755374908447, "learning_rate": 1.9957798423748065e-06, "loss": 0.3576, "step": 10870 }, { "epoch": 0.6522469550608988, "grad_norm": 1.305936336517334, "learning_rate": 1.995165724512955e-06, "loss": 0.3454, "step": 10871 }, { "epoch": 0.6523069538609227, "grad_norm": 1.3271007537841797, "learning_rate": 1.994551663481838e-06, "loss": 0.3841, "step": 10872 }, { "epoch": 0.6523669526609468, "grad_norm": 1.3571803569793701, "learning_rate": 1.993937659304644e-06, "loss": 0.4533, "step": 10873 }, { "epoch": 0.6524269514609707, "grad_norm": 1.1953084468841553, "learning_rate": 1.9933237120045635e-06, "loss": 0.3555, "step": 10874 }, { "epoch": 0.6524869502609948, "grad_norm": 1.3761284351348877, "learning_rate": 1.992709821604785e-06, "loss": 0.3903, "step": 10875 }, { "epoch": 0.6525469490610187, "grad_norm": 1.4406373500823975, "learning_rate": 1.992095988128486e-06, "loss": 0.4342, "step": 10876 }, { "epoch": 0.6526069478610428, "grad_norm": 1.2502907514572144, "learning_rate": 1.991482211598853e-06, "loss": 0.3713, "step": 10877 }, { "epoch": 0.6526669466610667, "grad_norm": 1.3357762098312378, "learning_rate": 1.9908684920390625e-06, "loss": 0.373, "step": 10878 }, { "epoch": 0.6527269454610908, "grad_norm": 1.3699122667312622, "learning_rate": 1.990254829472295e-06, "loss": 0.3611, "step": 10879 }, { "epoch": 0.6527869442611148, "grad_norm": 1.3976354598999023, "learning_rate": 1.9896412239217244e-06, "loss": 0.4137, "step": 10880 }, { "epoch": 0.6528469430611388, "grad_norm": 1.2874408960342407, "learning_rate": 1.9890276754105226e-06, "loss": 0.3763, "step": 10881 }, { "epoch": 0.6529069418611628, "grad_norm": 1.4038441181182861, "learning_rate": 1.9884141839618624e-06, "loss": 0.388, "step": 10882 }, { "epoch": 0.6529669406611868, "grad_norm": 1.2817503213882446, "learning_rate": 1.987800749598912e-06, "loss": 0.3684, "step": 10883 }, { "epoch": 0.6530269394612108, "grad_norm": 1.386873722076416, "learning_rate": 1.987187372344837e-06, "loss": 0.4345, "step": 10884 }, { "epoch": 0.6530869382612348, "grad_norm": 1.251876950263977, "learning_rate": 1.986574052222802e-06, "loss": 0.3704, "step": 10885 }, { "epoch": 0.6531469370612588, "grad_norm": 1.4129787683486938, "learning_rate": 1.985960789255971e-06, "loss": 0.376, "step": 10886 }, { "epoch": 0.6532069358612828, "grad_norm": 1.2015994787216187, "learning_rate": 1.9853475834675027e-06, "loss": 0.3779, "step": 10887 }, { "epoch": 0.6532669346613068, "grad_norm": 1.2412514686584473, "learning_rate": 1.9847344348805547e-06, "loss": 0.3817, "step": 10888 }, { "epoch": 0.6533269334613307, "grad_norm": 1.3294118642807007, "learning_rate": 1.9841213435182844e-06, "loss": 0.3588, "step": 10889 }, { "epoch": 0.6533869322613548, "grad_norm": 1.3857595920562744, "learning_rate": 1.983508309403845e-06, "loss": 0.414, "step": 10890 }, { "epoch": 0.6534469310613787, "grad_norm": 1.3255349397659302, "learning_rate": 1.982895332560386e-06, "loss": 0.417, "step": 10891 }, { "epoch": 0.6535069298614028, "grad_norm": 1.3765002489089966, "learning_rate": 1.98228241301106e-06, "loss": 0.3846, "step": 10892 }, { "epoch": 0.6535669286614267, "grad_norm": 1.4160128831863403, "learning_rate": 1.981669550779011e-06, "loss": 0.3643, "step": 10893 }, { "epoch": 0.6536269274614508, "grad_norm": 1.3126477003097534, "learning_rate": 1.9810567458873882e-06, "loss": 0.4064, "step": 10894 }, { "epoch": 0.6536869262614747, "grad_norm": 1.4973433017730713, "learning_rate": 1.9804439983593294e-06, "loss": 0.4591, "step": 10895 }, { "epoch": 0.6537469250614988, "grad_norm": 1.281190037727356, "learning_rate": 1.9798313082179773e-06, "loss": 0.4107, "step": 10896 }, { "epoch": 0.6538069238615227, "grad_norm": 1.3483467102050781, "learning_rate": 1.9792186754864723e-06, "loss": 0.3734, "step": 10897 }, { "epoch": 0.6538669226615468, "grad_norm": 1.3848389387130737, "learning_rate": 1.9786061001879495e-06, "loss": 0.4013, "step": 10898 }, { "epoch": 0.6539269214615707, "grad_norm": 1.5067847967147827, "learning_rate": 1.977993582345543e-06, "loss": 0.3907, "step": 10899 }, { "epoch": 0.6539869202615948, "grad_norm": 1.2153289318084717, "learning_rate": 1.977381121982383e-06, "loss": 0.3512, "step": 10900 }, { "epoch": 0.6540469190616187, "grad_norm": 1.342529535293579, "learning_rate": 1.976768719121602e-06, "loss": 0.3913, "step": 10901 }, { "epoch": 0.6541069178616428, "grad_norm": 1.2695162296295166, "learning_rate": 1.9761563737863272e-06, "loss": 0.3687, "step": 10902 }, { "epoch": 0.6541669166616668, "grad_norm": 1.3406546115875244, "learning_rate": 1.975544085999682e-06, "loss": 0.4122, "step": 10903 }, { "epoch": 0.6542269154616908, "grad_norm": 1.3877689838409424, "learning_rate": 1.9749318557847925e-06, "loss": 0.3868, "step": 10904 }, { "epoch": 0.6542869142617148, "grad_norm": 1.3389745950698853, "learning_rate": 1.974319683164778e-06, "loss": 0.4088, "step": 10905 }, { "epoch": 0.6543469130617388, "grad_norm": 1.2189302444458008, "learning_rate": 1.97370756816276e-06, "loss": 0.3479, "step": 10906 }, { "epoch": 0.6544069118617628, "grad_norm": 1.2386475801467896, "learning_rate": 1.9730955108018503e-06, "loss": 0.3898, "step": 10907 }, { "epoch": 0.6544669106617867, "grad_norm": 1.327773094177246, "learning_rate": 1.9724835111051674e-06, "loss": 0.3649, "step": 10908 }, { "epoch": 0.6545269094618108, "grad_norm": 1.3512752056121826, "learning_rate": 1.9718715690958242e-06, "loss": 0.3651, "step": 10909 }, { "epoch": 0.6545869082618347, "grad_norm": 1.2976073026657104, "learning_rate": 1.9712596847969296e-06, "loss": 0.3518, "step": 10910 }, { "epoch": 0.6546469070618588, "grad_norm": 1.3074415922164917, "learning_rate": 1.970647858231592e-06, "loss": 0.4086, "step": 10911 }, { "epoch": 0.6547069058618827, "grad_norm": 1.3033567667007446, "learning_rate": 1.9700360894229152e-06, "loss": 0.3859, "step": 10912 }, { "epoch": 0.6547669046619068, "grad_norm": 1.3736112117767334, "learning_rate": 1.969424378394006e-06, "loss": 0.4127, "step": 10913 }, { "epoch": 0.6548269034619307, "grad_norm": 1.3598917722702026, "learning_rate": 1.9688127251679654e-06, "loss": 0.4084, "step": 10914 }, { "epoch": 0.6548869022619548, "grad_norm": 1.2445142269134521, "learning_rate": 1.9682011297678902e-06, "loss": 0.4006, "step": 10915 }, { "epoch": 0.6549469010619787, "grad_norm": 1.412029504776001, "learning_rate": 1.9675895922168815e-06, "loss": 0.4027, "step": 10916 }, { "epoch": 0.6550068998620028, "grad_norm": 1.3075361251831055, "learning_rate": 1.9669781125380317e-06, "loss": 0.3943, "step": 10917 }, { "epoch": 0.6550668986620267, "grad_norm": 1.4400330781936646, "learning_rate": 1.9663666907544345e-06, "loss": 0.343, "step": 10918 }, { "epoch": 0.6551268974620508, "grad_norm": 1.3246848583221436, "learning_rate": 1.9657553268891786e-06, "loss": 0.3509, "step": 10919 }, { "epoch": 0.6551868962620747, "grad_norm": 1.2602107524871826, "learning_rate": 1.965144020965354e-06, "loss": 0.3834, "step": 10920 }, { "epoch": 0.6552468950620988, "grad_norm": 1.350548505783081, "learning_rate": 1.9645327730060497e-06, "loss": 0.3785, "step": 10921 }, { "epoch": 0.6553068938621227, "grad_norm": 1.35257089138031, "learning_rate": 1.9639215830343442e-06, "loss": 0.39, "step": 10922 }, { "epoch": 0.6553668926621468, "grad_norm": 1.396559238433838, "learning_rate": 1.9633104510733236e-06, "loss": 0.4029, "step": 10923 }, { "epoch": 0.6554268914621707, "grad_norm": 1.3875759840011597, "learning_rate": 1.962699377146065e-06, "loss": 0.4004, "step": 10924 }, { "epoch": 0.6554868902621948, "grad_norm": 1.3746716976165771, "learning_rate": 1.9620883612756493e-06, "loss": 0.3989, "step": 10925 }, { "epoch": 0.6555468890622188, "grad_norm": 1.209123134613037, "learning_rate": 1.961477403485147e-06, "loss": 0.3963, "step": 10926 }, { "epoch": 0.6556068878622427, "grad_norm": 1.4006003141403198, "learning_rate": 1.9608665037976334e-06, "loss": 0.4216, "step": 10927 }, { "epoch": 0.6556668866622668, "grad_norm": 1.2172967195510864, "learning_rate": 1.9602556622361816e-06, "loss": 0.3585, "step": 10928 }, { "epoch": 0.6557268854622907, "grad_norm": 1.2773863077163696, "learning_rate": 1.959644878823858e-06, "loss": 0.3922, "step": 10929 }, { "epoch": 0.6557868842623148, "grad_norm": 1.227798581123352, "learning_rate": 1.9590341535837293e-06, "loss": 0.3904, "step": 10930 }, { "epoch": 0.6558468830623387, "grad_norm": 1.3322477340698242, "learning_rate": 1.958423486538859e-06, "loss": 0.4012, "step": 10931 }, { "epoch": 0.6559068818623628, "grad_norm": 1.3198668956756592, "learning_rate": 1.9578128777123117e-06, "loss": 0.3306, "step": 10932 }, { "epoch": 0.6559668806623867, "grad_norm": 1.5338436365127563, "learning_rate": 1.9572023271271457e-06, "loss": 0.3764, "step": 10933 }, { "epoch": 0.6560268794624108, "grad_norm": 1.191537618637085, "learning_rate": 1.9565918348064167e-06, "loss": 0.3906, "step": 10934 }, { "epoch": 0.6560868782624347, "grad_norm": 1.382070779800415, "learning_rate": 1.9559814007731847e-06, "loss": 0.3716, "step": 10935 }, { "epoch": 0.6561468770624588, "grad_norm": 1.1781960725784302, "learning_rate": 1.9553710250505e-06, "loss": 0.3238, "step": 10936 }, { "epoch": 0.6562068758624827, "grad_norm": 1.2523791790008545, "learning_rate": 1.954760707661414e-06, "loss": 0.4041, "step": 10937 }, { "epoch": 0.6562668746625068, "grad_norm": 1.2902932167053223, "learning_rate": 1.9541504486289767e-06, "loss": 0.3987, "step": 10938 }, { "epoch": 0.6563268734625307, "grad_norm": 1.3763858079910278, "learning_rate": 1.9535402479762326e-06, "loss": 0.3887, "step": 10939 }, { "epoch": 0.6563868722625548, "grad_norm": 1.3773443698883057, "learning_rate": 1.95293010572623e-06, "loss": 0.3938, "step": 10940 }, { "epoch": 0.6564468710625787, "grad_norm": 1.4243803024291992, "learning_rate": 1.952320021902007e-06, "loss": 0.3978, "step": 10941 }, { "epoch": 0.6565068698626028, "grad_norm": 1.2874348163604736, "learning_rate": 1.9517099965266065e-06, "loss": 0.337, "step": 10942 }, { "epoch": 0.6565668686626267, "grad_norm": 1.1626660823822021, "learning_rate": 1.951100029623064e-06, "loss": 0.3511, "step": 10943 }, { "epoch": 0.6566268674626508, "grad_norm": 1.2668308019638062, "learning_rate": 1.950490121214418e-06, "loss": 0.4211, "step": 10944 }, { "epoch": 0.6566868662626747, "grad_norm": 1.438632607460022, "learning_rate": 1.949880271323701e-06, "loss": 0.4196, "step": 10945 }, { "epoch": 0.6567468650626987, "grad_norm": 1.258392572402954, "learning_rate": 1.949270479973942e-06, "loss": 0.3829, "step": 10946 }, { "epoch": 0.6568068638627228, "grad_norm": 1.2429115772247314, "learning_rate": 1.9486607471881735e-06, "loss": 0.3483, "step": 10947 }, { "epoch": 0.6568668626627467, "grad_norm": 1.3015412092208862, "learning_rate": 1.94805107298942e-06, "loss": 0.378, "step": 10948 }, { "epoch": 0.6569268614627708, "grad_norm": 1.1619982719421387, "learning_rate": 1.9474414574007064e-06, "loss": 0.342, "step": 10949 }, { "epoch": 0.6569868602627947, "grad_norm": 1.208958387374878, "learning_rate": 1.9468319004450563e-06, "loss": 0.3684, "step": 10950 }, { "epoch": 0.6570468590628188, "grad_norm": 1.2140448093414307, "learning_rate": 1.946222402145488e-06, "loss": 0.3679, "step": 10951 }, { "epoch": 0.6571068578628427, "grad_norm": 1.3750782012939453, "learning_rate": 1.945612962525023e-06, "loss": 0.3629, "step": 10952 }, { "epoch": 0.6571668566628668, "grad_norm": 1.264861822128296, "learning_rate": 1.9450035816066723e-06, "loss": 0.3651, "step": 10953 }, { "epoch": 0.6572268554628907, "grad_norm": 1.4453816413879395, "learning_rate": 1.944394259413453e-06, "loss": 0.443, "step": 10954 }, { "epoch": 0.6572868542629148, "grad_norm": 1.3831597566604614, "learning_rate": 1.943784995968374e-06, "loss": 0.3715, "step": 10955 }, { "epoch": 0.6573468530629387, "grad_norm": 1.309208869934082, "learning_rate": 1.9431757912944463e-06, "loss": 0.372, "step": 10956 }, { "epoch": 0.6574068518629628, "grad_norm": 1.4626412391662598, "learning_rate": 1.942566645414677e-06, "loss": 0.4437, "step": 10957 }, { "epoch": 0.6574668506629867, "grad_norm": 1.2201578617095947, "learning_rate": 1.9419575583520678e-06, "loss": 0.3726, "step": 10958 }, { "epoch": 0.6575268494630108, "grad_norm": 1.3125038146972656, "learning_rate": 1.941348530129625e-06, "loss": 0.3941, "step": 10959 }, { "epoch": 0.6575868482630347, "grad_norm": 1.1934038400650024, "learning_rate": 1.9407395607703467e-06, "loss": 0.3601, "step": 10960 }, { "epoch": 0.6576468470630588, "grad_norm": 1.359400987625122, "learning_rate": 1.94013065029723e-06, "loss": 0.4083, "step": 10961 }, { "epoch": 0.6577068458630827, "grad_norm": 1.1855964660644531, "learning_rate": 1.9395217987332734e-06, "loss": 0.4051, "step": 10962 }, { "epoch": 0.6577668446631068, "grad_norm": 1.3238924741744995, "learning_rate": 1.9389130061014688e-06, "loss": 0.3988, "step": 10963 }, { "epoch": 0.6578268434631307, "grad_norm": 1.5514267683029175, "learning_rate": 1.9383042724248077e-06, "loss": 0.4214, "step": 10964 }, { "epoch": 0.6578868422631547, "grad_norm": 1.439696192741394, "learning_rate": 1.9376955977262775e-06, "loss": 0.3823, "step": 10965 }, { "epoch": 0.6579468410631787, "grad_norm": 1.4318493604660034, "learning_rate": 1.9370869820288686e-06, "loss": 0.4022, "step": 10966 }, { "epoch": 0.6580068398632027, "grad_norm": 1.346068263053894, "learning_rate": 1.9364784253555637e-06, "loss": 0.438, "step": 10967 }, { "epoch": 0.6580668386632267, "grad_norm": 1.3180506229400635, "learning_rate": 1.9358699277293435e-06, "loss": 0.3605, "step": 10968 }, { "epoch": 0.6581268374632507, "grad_norm": 1.3926719427108765, "learning_rate": 1.9352614891731914e-06, "loss": 0.3999, "step": 10969 }, { "epoch": 0.6581868362632748, "grad_norm": 1.1425647735595703, "learning_rate": 1.934653109710083e-06, "loss": 0.3364, "step": 10970 }, { "epoch": 0.6582468350632987, "grad_norm": 1.2839137315750122, "learning_rate": 1.9340447893629974e-06, "loss": 0.37, "step": 10971 }, { "epoch": 0.6583068338633228, "grad_norm": 1.391686201095581, "learning_rate": 1.9334365281549028e-06, "loss": 0.3981, "step": 10972 }, { "epoch": 0.6583668326633467, "grad_norm": 1.3294483423233032, "learning_rate": 1.9328283261087734e-06, "loss": 0.3959, "step": 10973 }, { "epoch": 0.6584268314633708, "grad_norm": 1.1569238901138306, "learning_rate": 1.932220183247579e-06, "loss": 0.3273, "step": 10974 }, { "epoch": 0.6584868302633947, "grad_norm": 1.4312759637832642, "learning_rate": 1.931612099594286e-06, "loss": 0.4048, "step": 10975 }, { "epoch": 0.6585468290634188, "grad_norm": 1.3180369138717651, "learning_rate": 1.931004075171859e-06, "loss": 0.4221, "step": 10976 }, { "epoch": 0.6586068278634427, "grad_norm": 1.2691588401794434, "learning_rate": 1.930396110003258e-06, "loss": 0.4037, "step": 10977 }, { "epoch": 0.6586668266634668, "grad_norm": 1.272603988647461, "learning_rate": 1.929788204111446e-06, "loss": 0.4007, "step": 10978 }, { "epoch": 0.6587268254634907, "grad_norm": 1.4014387130737305, "learning_rate": 1.929180357519379e-06, "loss": 0.3905, "step": 10979 }, { "epoch": 0.6587868242635148, "grad_norm": 1.2750437259674072, "learning_rate": 1.9285725702500126e-06, "loss": 0.4101, "step": 10980 }, { "epoch": 0.6588468230635387, "grad_norm": 1.4023075103759766, "learning_rate": 1.9279648423263022e-06, "loss": 0.4285, "step": 10981 }, { "epoch": 0.6589068218635628, "grad_norm": 1.4194908142089844, "learning_rate": 1.9273571737711976e-06, "loss": 0.4506, "step": 10982 }, { "epoch": 0.6589668206635867, "grad_norm": 1.2720030546188354, "learning_rate": 1.926749564607648e-06, "loss": 0.3662, "step": 10983 }, { "epoch": 0.6590268194636107, "grad_norm": 1.3833749294281006, "learning_rate": 1.9261420148585973e-06, "loss": 0.4186, "step": 10984 }, { "epoch": 0.6590868182636347, "grad_norm": 1.375199794769287, "learning_rate": 1.9255345245469926e-06, "loss": 0.397, "step": 10985 }, { "epoch": 0.6591468170636587, "grad_norm": 1.3119255304336548, "learning_rate": 1.924927093695777e-06, "loss": 0.3646, "step": 10986 }, { "epoch": 0.6592068158636827, "grad_norm": 1.3273175954818726, "learning_rate": 1.924319722327889e-06, "loss": 0.3744, "step": 10987 }, { "epoch": 0.6592668146637067, "grad_norm": 1.26382577419281, "learning_rate": 1.9237124104662665e-06, "loss": 0.376, "step": 10988 }, { "epoch": 0.6593268134637307, "grad_norm": 1.6095832586288452, "learning_rate": 1.923105158133843e-06, "loss": 0.4086, "step": 10989 }, { "epoch": 0.6593868122637547, "grad_norm": 1.306678295135498, "learning_rate": 1.9224979653535557e-06, "loss": 0.3921, "step": 10990 }, { "epoch": 0.6594468110637787, "grad_norm": 1.3254982233047485, "learning_rate": 1.921890832148332e-06, "loss": 0.3695, "step": 10991 }, { "epoch": 0.6595068098638027, "grad_norm": 1.277222752571106, "learning_rate": 1.921283758541101e-06, "loss": 0.4221, "step": 10992 }, { "epoch": 0.6595668086638268, "grad_norm": 1.3494354486465454, "learning_rate": 1.920676744554791e-06, "loss": 0.4687, "step": 10993 }, { "epoch": 0.6596268074638507, "grad_norm": 1.1385269165039062, "learning_rate": 1.9200697902123253e-06, "loss": 0.398, "step": 10994 }, { "epoch": 0.6596868062638748, "grad_norm": 1.2508960962295532, "learning_rate": 1.9194628955366248e-06, "loss": 0.3215, "step": 10995 }, { "epoch": 0.6597468050638987, "grad_norm": 1.28115975856781, "learning_rate": 1.918856060550609e-06, "loss": 0.3579, "step": 10996 }, { "epoch": 0.6598068038639228, "grad_norm": 1.3164421319961548, "learning_rate": 1.918249285277196e-06, "loss": 0.415, "step": 10997 }, { "epoch": 0.6598668026639467, "grad_norm": 1.3081704378128052, "learning_rate": 1.9176425697393037e-06, "loss": 0.4165, "step": 10998 }, { "epoch": 0.6599268014639708, "grad_norm": 1.230560541152954, "learning_rate": 1.9170359139598393e-06, "loss": 0.3183, "step": 10999 }, { "epoch": 0.6599868002639947, "grad_norm": 1.35199773311615, "learning_rate": 1.9164293179617186e-06, "loss": 0.4125, "step": 11000 }, { "epoch": 0.6600467990640188, "grad_norm": 1.5320727825164795, "learning_rate": 1.915822781767846e-06, "loss": 0.4263, "step": 11001 }, { "epoch": 0.6601067978640427, "grad_norm": 1.2322245836257935, "learning_rate": 1.9152163054011303e-06, "loss": 0.3419, "step": 11002 }, { "epoch": 0.6601667966640667, "grad_norm": 1.3866052627563477, "learning_rate": 1.9146098888844753e-06, "loss": 0.3694, "step": 11003 }, { "epoch": 0.6602267954640907, "grad_norm": 1.2334709167480469, "learning_rate": 1.91400353224078e-06, "loss": 0.4291, "step": 11004 }, { "epoch": 0.6602867942641147, "grad_norm": 1.4585684537887573, "learning_rate": 1.9133972354929473e-06, "loss": 0.4264, "step": 11005 }, { "epoch": 0.6603467930641387, "grad_norm": 1.3123319149017334, "learning_rate": 1.912790998663872e-06, "loss": 0.4067, "step": 11006 }, { "epoch": 0.6604067918641627, "grad_norm": 1.2207823991775513, "learning_rate": 1.9121848217764497e-06, "loss": 0.3586, "step": 11007 }, { "epoch": 0.6604667906641867, "grad_norm": 1.2910699844360352, "learning_rate": 1.911578704853571e-06, "loss": 0.3659, "step": 11008 }, { "epoch": 0.6605267894642107, "grad_norm": 1.268742322921753, "learning_rate": 1.9109726479181295e-06, "loss": 0.3839, "step": 11009 }, { "epoch": 0.6605867882642347, "grad_norm": 1.3485052585601807, "learning_rate": 1.910366650993011e-06, "loss": 0.3437, "step": 11010 }, { "epoch": 0.6606467870642587, "grad_norm": 1.1194865703582764, "learning_rate": 1.909760714101101e-06, "loss": 0.341, "step": 11011 }, { "epoch": 0.6607067858642827, "grad_norm": 1.2133103609085083, "learning_rate": 1.9091548372652846e-06, "loss": 0.3757, "step": 11012 }, { "epoch": 0.6607667846643067, "grad_norm": 1.2914683818817139, "learning_rate": 1.908549020508442e-06, "loss": 0.3817, "step": 11013 }, { "epoch": 0.6608267834643307, "grad_norm": 1.416888952255249, "learning_rate": 1.9079432638534512e-06, "loss": 0.4086, "step": 11014 }, { "epoch": 0.6608867822643547, "grad_norm": 1.2404413223266602, "learning_rate": 1.907337567323191e-06, "loss": 0.3731, "step": 11015 }, { "epoch": 0.6609467810643788, "grad_norm": 1.3036514520645142, "learning_rate": 1.906731930940534e-06, "loss": 0.4012, "step": 11016 }, { "epoch": 0.6610067798644027, "grad_norm": 1.2939085960388184, "learning_rate": 1.9061263547283549e-06, "loss": 0.3868, "step": 11017 }, { "epoch": 0.6610667786644268, "grad_norm": 1.3954914808273315, "learning_rate": 1.90552083870952e-06, "loss": 0.4193, "step": 11018 }, { "epoch": 0.6611267774644507, "grad_norm": 1.330032467842102, "learning_rate": 1.9049153829068977e-06, "loss": 0.4077, "step": 11019 }, { "epoch": 0.6611867762644748, "grad_norm": 1.146969199180603, "learning_rate": 1.904309987343356e-06, "loss": 0.3577, "step": 11020 }, { "epoch": 0.6612467750644987, "grad_norm": 1.1465567350387573, "learning_rate": 1.903704652041756e-06, "loss": 0.3545, "step": 11021 }, { "epoch": 0.6613067738645227, "grad_norm": 1.2960320711135864, "learning_rate": 1.9030993770249587e-06, "loss": 0.3824, "step": 11022 }, { "epoch": 0.6613667726645467, "grad_norm": 1.5387507677078247, "learning_rate": 1.902494162315821e-06, "loss": 0.4173, "step": 11023 }, { "epoch": 0.6614267714645707, "grad_norm": 1.332362174987793, "learning_rate": 1.9018890079372021e-06, "loss": 0.3715, "step": 11024 }, { "epoch": 0.6614867702645947, "grad_norm": 1.1186679601669312, "learning_rate": 1.9012839139119545e-06, "loss": 0.2993, "step": 11025 }, { "epoch": 0.6615467690646187, "grad_norm": 1.2975879907608032, "learning_rate": 1.9006788802629278e-06, "loss": 0.3757, "step": 11026 }, { "epoch": 0.6616067678646427, "grad_norm": 1.2909842729568481, "learning_rate": 1.9000739070129754e-06, "loss": 0.3767, "step": 11027 }, { "epoch": 0.6616667666646667, "grad_norm": 1.4337351322174072, "learning_rate": 1.8994689941849418e-06, "loss": 0.3795, "step": 11028 }, { "epoch": 0.6617267654646907, "grad_norm": 1.3012454509735107, "learning_rate": 1.8988641418016722e-06, "loss": 0.3843, "step": 11029 }, { "epoch": 0.6617867642647147, "grad_norm": 1.2877076864242554, "learning_rate": 1.898259349886008e-06, "loss": 0.3589, "step": 11030 }, { "epoch": 0.6618467630647387, "grad_norm": 1.37698495388031, "learning_rate": 1.8976546184607908e-06, "loss": 0.4257, "step": 11031 }, { "epoch": 0.6619067618647627, "grad_norm": 1.3436353206634521, "learning_rate": 1.8970499475488596e-06, "loss": 0.3756, "step": 11032 }, { "epoch": 0.6619667606647867, "grad_norm": 1.23500394821167, "learning_rate": 1.896445337173049e-06, "loss": 0.3701, "step": 11033 }, { "epoch": 0.6620267594648107, "grad_norm": 1.240964412689209, "learning_rate": 1.8958407873561923e-06, "loss": 0.3968, "step": 11034 }, { "epoch": 0.6620867582648347, "grad_norm": 1.3371020555496216, "learning_rate": 1.8952362981211188e-06, "loss": 0.3709, "step": 11035 }, { "epoch": 0.6621467570648587, "grad_norm": 1.3691996335983276, "learning_rate": 1.894631869490661e-06, "loss": 0.3672, "step": 11036 }, { "epoch": 0.6622067558648828, "grad_norm": 1.4101550579071045, "learning_rate": 1.8940275014876427e-06, "loss": 0.4289, "step": 11037 }, { "epoch": 0.6622667546649067, "grad_norm": 1.320032000541687, "learning_rate": 1.893423194134888e-06, "loss": 0.4278, "step": 11038 }, { "epoch": 0.6623267534649308, "grad_norm": 1.2284444570541382, "learning_rate": 1.8928189474552208e-06, "loss": 0.3676, "step": 11039 }, { "epoch": 0.6623867522649547, "grad_norm": 1.3448585271835327, "learning_rate": 1.8922147614714597e-06, "loss": 0.3829, "step": 11040 }, { "epoch": 0.6624467510649787, "grad_norm": 1.1786537170410156, "learning_rate": 1.8916106362064222e-06, "loss": 0.3661, "step": 11041 }, { "epoch": 0.6625067498650027, "grad_norm": 1.3902044296264648, "learning_rate": 1.8910065716829218e-06, "loss": 0.3976, "step": 11042 }, { "epoch": 0.6625667486650267, "grad_norm": 1.2404381036758423, "learning_rate": 1.8904025679237723e-06, "loss": 0.3694, "step": 11043 }, { "epoch": 0.6626267474650507, "grad_norm": 1.339329481124878, "learning_rate": 1.8897986249517874e-06, "loss": 0.3777, "step": 11044 }, { "epoch": 0.6626867462650747, "grad_norm": 1.3017323017120361, "learning_rate": 1.8891947427897702e-06, "loss": 0.3939, "step": 11045 }, { "epoch": 0.6627467450650987, "grad_norm": 1.3023841381072998, "learning_rate": 1.8885909214605293e-06, "loss": 0.3885, "step": 11046 }, { "epoch": 0.6628067438651227, "grad_norm": 1.2554898262023926, "learning_rate": 1.8879871609868666e-06, "loss": 0.4037, "step": 11047 }, { "epoch": 0.6628667426651467, "grad_norm": 1.2737455368041992, "learning_rate": 1.8873834613915875e-06, "loss": 0.3612, "step": 11048 }, { "epoch": 0.6629267414651707, "grad_norm": 1.5208693742752075, "learning_rate": 1.8867798226974848e-06, "loss": 0.3627, "step": 11049 }, { "epoch": 0.6629867402651947, "grad_norm": 1.3189951181411743, "learning_rate": 1.886176244927359e-06, "loss": 0.4326, "step": 11050 }, { "epoch": 0.6630467390652187, "grad_norm": 1.2295318841934204, "learning_rate": 1.885572728104005e-06, "loss": 0.3523, "step": 11051 }, { "epoch": 0.6631067378652427, "grad_norm": 1.2549419403076172, "learning_rate": 1.8849692722502134e-06, "loss": 0.378, "step": 11052 }, { "epoch": 0.6631667366652667, "grad_norm": 1.2619798183441162, "learning_rate": 1.8843658773887749e-06, "loss": 0.3569, "step": 11053 }, { "epoch": 0.6632267354652907, "grad_norm": 1.3628836870193481, "learning_rate": 1.8837625435424747e-06, "loss": 0.4242, "step": 11054 }, { "epoch": 0.6632867342653147, "grad_norm": 1.3722851276397705, "learning_rate": 1.883159270734101e-06, "loss": 0.3523, "step": 11055 }, { "epoch": 0.6633467330653386, "grad_norm": 1.3808677196502686, "learning_rate": 1.8825560589864355e-06, "loss": 0.3917, "step": 11056 }, { "epoch": 0.6634067318653627, "grad_norm": 1.2430447340011597, "learning_rate": 1.8819529083222573e-06, "loss": 0.3812, "step": 11057 }, { "epoch": 0.6634667306653866, "grad_norm": 1.2459566593170166, "learning_rate": 1.8813498187643473e-06, "loss": 0.4019, "step": 11058 }, { "epoch": 0.6635267294654107, "grad_norm": 1.2116044759750366, "learning_rate": 1.8807467903354794e-06, "loss": 0.3979, "step": 11059 }, { "epoch": 0.6635867282654347, "grad_norm": 1.349941611289978, "learning_rate": 1.8801438230584275e-06, "loss": 0.3731, "step": 11060 }, { "epoch": 0.6636467270654587, "grad_norm": 1.1510772705078125, "learning_rate": 1.879540916955964e-06, "loss": 0.3444, "step": 11061 }, { "epoch": 0.6637067258654827, "grad_norm": 1.2372891902923584, "learning_rate": 1.8789380720508564e-06, "loss": 0.3849, "step": 11062 }, { "epoch": 0.6637667246655067, "grad_norm": 1.2112350463867188, "learning_rate": 1.878335288365875e-06, "loss": 0.3365, "step": 11063 }, { "epoch": 0.6638267234655307, "grad_norm": 1.3484160900115967, "learning_rate": 1.8777325659237783e-06, "loss": 0.3797, "step": 11064 }, { "epoch": 0.6638867222655547, "grad_norm": 1.2931668758392334, "learning_rate": 1.8771299047473334e-06, "loss": 0.3617, "step": 11065 }, { "epoch": 0.6639467210655787, "grad_norm": 1.331334114074707, "learning_rate": 1.8765273048592968e-06, "loss": 0.404, "step": 11066 }, { "epoch": 0.6640067198656027, "grad_norm": 1.4329924583435059, "learning_rate": 1.8759247662824283e-06, "loss": 0.3816, "step": 11067 }, { "epoch": 0.6640667186656267, "grad_norm": 1.366409420967102, "learning_rate": 1.8753222890394825e-06, "loss": 0.4127, "step": 11068 }, { "epoch": 0.6641267174656507, "grad_norm": 1.2821463346481323, "learning_rate": 1.8747198731532105e-06, "loss": 0.3755, "step": 11069 }, { "epoch": 0.6641867162656747, "grad_norm": 1.247629165649414, "learning_rate": 1.8741175186463651e-06, "loss": 0.3778, "step": 11070 }, { "epoch": 0.6642467150656987, "grad_norm": 1.4003705978393555, "learning_rate": 1.8735152255416939e-06, "loss": 0.372, "step": 11071 }, { "epoch": 0.6643067138657227, "grad_norm": 1.3294175863265991, "learning_rate": 1.8729129938619408e-06, "loss": 0.4133, "step": 11072 }, { "epoch": 0.6643667126657467, "grad_norm": 1.277206540107727, "learning_rate": 1.8723108236298527e-06, "loss": 0.3798, "step": 11073 }, { "epoch": 0.6644267114657707, "grad_norm": 1.219580888748169, "learning_rate": 1.8717087148681688e-06, "loss": 0.3397, "step": 11074 }, { "epoch": 0.6644867102657946, "grad_norm": 1.4326673746109009, "learning_rate": 1.871106667599628e-06, "loss": 0.3985, "step": 11075 }, { "epoch": 0.6645467090658187, "grad_norm": 1.477984070777893, "learning_rate": 1.8705046818469659e-06, "loss": 0.4486, "step": 11076 }, { "epoch": 0.6646067078658426, "grad_norm": 1.2760354280471802, "learning_rate": 1.8699027576329198e-06, "loss": 0.3952, "step": 11077 }, { "epoch": 0.6646667066658667, "grad_norm": 1.2404204607009888, "learning_rate": 1.8693008949802184e-06, "loss": 0.3688, "step": 11078 }, { "epoch": 0.6647267054658906, "grad_norm": 1.1906421184539795, "learning_rate": 1.8686990939115934e-06, "loss": 0.3604, "step": 11079 }, { "epoch": 0.6647867042659147, "grad_norm": 1.301544189453125, "learning_rate": 1.868097354449772e-06, "loss": 0.3827, "step": 11080 }, { "epoch": 0.6648467030659386, "grad_norm": 1.3242143392562866, "learning_rate": 1.867495676617477e-06, "loss": 0.407, "step": 11081 }, { "epoch": 0.6649067018659627, "grad_norm": 1.4246209859848022, "learning_rate": 1.8668940604374344e-06, "loss": 0.3913, "step": 11082 }, { "epoch": 0.6649667006659867, "grad_norm": 1.2031632661819458, "learning_rate": 1.8662925059323625e-06, "loss": 0.3432, "step": 11083 }, { "epoch": 0.6650266994660107, "grad_norm": 1.3031302690505981, "learning_rate": 1.865691013124978e-06, "loss": 0.3801, "step": 11084 }, { "epoch": 0.6650866982660347, "grad_norm": 1.4136629104614258, "learning_rate": 1.8650895820379994e-06, "loss": 0.3749, "step": 11085 }, { "epoch": 0.6651466970660587, "grad_norm": 1.3870487213134766, "learning_rate": 1.8644882126941385e-06, "loss": 0.3871, "step": 11086 }, { "epoch": 0.6652066958660827, "grad_norm": 1.2353479862213135, "learning_rate": 1.863886905116107e-06, "loss": 0.3943, "step": 11087 }, { "epoch": 0.6652666946661067, "grad_norm": 1.236252784729004, "learning_rate": 1.8632856593266112e-06, "loss": 0.3645, "step": 11088 }, { "epoch": 0.6653266934661307, "grad_norm": 1.292536735534668, "learning_rate": 1.8626844753483604e-06, "loss": 0.3838, "step": 11089 }, { "epoch": 0.6653866922661547, "grad_norm": 1.2701528072357178, "learning_rate": 1.8620833532040579e-06, "loss": 0.361, "step": 11090 }, { "epoch": 0.6654466910661787, "grad_norm": 1.3153001070022583, "learning_rate": 1.8614822929164033e-06, "loss": 0.3564, "step": 11091 }, { "epoch": 0.6655066898662026, "grad_norm": 1.3488333225250244, "learning_rate": 1.8608812945080986e-06, "loss": 0.4081, "step": 11092 }, { "epoch": 0.6655666886662267, "grad_norm": 1.5156331062316895, "learning_rate": 1.8602803580018382e-06, "loss": 0.3992, "step": 11093 }, { "epoch": 0.6656266874662506, "grad_norm": 1.2804787158966064, "learning_rate": 1.859679483420321e-06, "loss": 0.3501, "step": 11094 }, { "epoch": 0.6656866862662747, "grad_norm": 1.4185670614242554, "learning_rate": 1.8590786707862334e-06, "loss": 0.404, "step": 11095 }, { "epoch": 0.6657466850662986, "grad_norm": 1.2492709159851074, "learning_rate": 1.8584779201222688e-06, "loss": 0.3877, "step": 11096 }, { "epoch": 0.6658066838663227, "grad_norm": 1.41505765914917, "learning_rate": 1.857877231451116e-06, "loss": 0.409, "step": 11097 }, { "epoch": 0.6658666826663466, "grad_norm": 1.2313625812530518, "learning_rate": 1.8572766047954582e-06, "loss": 0.4028, "step": 11098 }, { "epoch": 0.6659266814663707, "grad_norm": 1.3106107711791992, "learning_rate": 1.8566760401779788e-06, "loss": 0.3771, "step": 11099 }, { "epoch": 0.6659866802663946, "grad_norm": 1.495252251625061, "learning_rate": 1.8560755376213573e-06, "loss": 0.4162, "step": 11100 }, { "epoch": 0.6660466790664187, "grad_norm": 1.489258050918579, "learning_rate": 1.855475097148274e-06, "loss": 0.4561, "step": 11101 }, { "epoch": 0.6661066778664426, "grad_norm": 1.3013542890548706, "learning_rate": 1.854874718781404e-06, "loss": 0.3693, "step": 11102 }, { "epoch": 0.6661666766664667, "grad_norm": 1.4115508794784546, "learning_rate": 1.8542744025434194e-06, "loss": 0.4324, "step": 11103 }, { "epoch": 0.6662266754664907, "grad_norm": 1.1458916664123535, "learning_rate": 1.8536741484569944e-06, "loss": 0.3851, "step": 11104 }, { "epoch": 0.6662866742665147, "grad_norm": 1.1727062463760376, "learning_rate": 1.8530739565447954e-06, "loss": 0.3659, "step": 11105 }, { "epoch": 0.6663466730665387, "grad_norm": 1.3108593225479126, "learning_rate": 1.8524738268294902e-06, "loss": 0.3702, "step": 11106 }, { "epoch": 0.6664066718665627, "grad_norm": 1.2602463960647583, "learning_rate": 1.8518737593337414e-06, "loss": 0.4082, "step": 11107 }, { "epoch": 0.6664666706665867, "grad_norm": 1.207592248916626, "learning_rate": 1.8512737540802113e-06, "loss": 0.3956, "step": 11108 }, { "epoch": 0.6665266694666107, "grad_norm": 1.257359266281128, "learning_rate": 1.8506738110915629e-06, "loss": 0.3867, "step": 11109 }, { "epoch": 0.6665866682666347, "grad_norm": 1.2459667921066284, "learning_rate": 1.8500739303904474e-06, "loss": 0.354, "step": 11110 }, { "epoch": 0.6666466670666586, "grad_norm": 1.2924830913543701, "learning_rate": 1.8494741119995241e-06, "loss": 0.3957, "step": 11111 }, { "epoch": 0.6667066658666827, "grad_norm": 1.3538378477096558, "learning_rate": 1.8488743559414428e-06, "loss": 0.3633, "step": 11112 }, { "epoch": 0.6667666646667066, "grad_norm": 1.3078190088272095, "learning_rate": 1.8482746622388554e-06, "loss": 0.408, "step": 11113 }, { "epoch": 0.6668266634667307, "grad_norm": 1.3434808254241943, "learning_rate": 1.8476750309144087e-06, "loss": 0.3777, "step": 11114 }, { "epoch": 0.6668866622667546, "grad_norm": 1.4287434816360474, "learning_rate": 1.847075461990747e-06, "loss": 0.4176, "step": 11115 }, { "epoch": 0.6669466610667787, "grad_norm": 1.3552981615066528, "learning_rate": 1.8464759554905152e-06, "loss": 0.4117, "step": 11116 }, { "epoch": 0.6670066598668026, "grad_norm": 1.25930655002594, "learning_rate": 1.8458765114363537e-06, "loss": 0.361, "step": 11117 }, { "epoch": 0.6670666586668267, "grad_norm": 1.2827167510986328, "learning_rate": 1.8452771298509004e-06, "loss": 0.3969, "step": 11118 }, { "epoch": 0.6671266574668506, "grad_norm": 1.5029462575912476, "learning_rate": 1.8446778107567893e-06, "loss": 0.4559, "step": 11119 }, { "epoch": 0.6671866562668747, "grad_norm": 1.2443897724151611, "learning_rate": 1.8440785541766574e-06, "loss": 0.404, "step": 11120 }, { "epoch": 0.6672466550668986, "grad_norm": 1.2566397190093994, "learning_rate": 1.8434793601331338e-06, "loss": 0.3917, "step": 11121 }, { "epoch": 0.6673066538669227, "grad_norm": 1.348410964012146, "learning_rate": 1.8428802286488457e-06, "loss": 0.3969, "step": 11122 }, { "epoch": 0.6673666526669466, "grad_norm": 1.1951390504837036, "learning_rate": 1.842281159746424e-06, "loss": 0.3985, "step": 11123 }, { "epoch": 0.6674266514669707, "grad_norm": 1.3514277935028076, "learning_rate": 1.8416821534484885e-06, "loss": 0.4124, "step": 11124 }, { "epoch": 0.6674866502669946, "grad_norm": 1.5020185708999634, "learning_rate": 1.8410832097776643e-06, "loss": 0.4339, "step": 11125 }, { "epoch": 0.6675466490670187, "grad_norm": 1.371446967124939, "learning_rate": 1.8404843287565687e-06, "loss": 0.361, "step": 11126 }, { "epoch": 0.6676066478670427, "grad_norm": 1.3975306749343872, "learning_rate": 1.8398855104078183e-06, "loss": 0.3813, "step": 11127 }, { "epoch": 0.6676666466670667, "grad_norm": 1.2618961334228516, "learning_rate": 1.8392867547540301e-06, "loss": 0.358, "step": 11128 }, { "epoch": 0.6677266454670907, "grad_norm": 1.3078116178512573, "learning_rate": 1.8386880618178155e-06, "loss": 0.3782, "step": 11129 }, { "epoch": 0.6677866442671146, "grad_norm": 1.3009394407272339, "learning_rate": 1.8380894316217833e-06, "loss": 0.3614, "step": 11130 }, { "epoch": 0.6678466430671387, "grad_norm": 1.1556787490844727, "learning_rate": 1.8374908641885403e-06, "loss": 0.3964, "step": 11131 }, { "epoch": 0.6679066418671626, "grad_norm": 1.370859146118164, "learning_rate": 1.8368923595406947e-06, "loss": 0.3378, "step": 11132 }, { "epoch": 0.6679666406671867, "grad_norm": 1.2802643775939941, "learning_rate": 1.8362939177008469e-06, "loss": 0.3992, "step": 11133 }, { "epoch": 0.6680266394672106, "grad_norm": 1.1858512163162231, "learning_rate": 1.8356955386915969e-06, "loss": 0.3982, "step": 11134 }, { "epoch": 0.6680866382672347, "grad_norm": 1.2266825437545776, "learning_rate": 1.8350972225355456e-06, "loss": 0.3874, "step": 11135 }, { "epoch": 0.6681466370672586, "grad_norm": 1.2384836673736572, "learning_rate": 1.8344989692552868e-06, "loss": 0.399, "step": 11136 }, { "epoch": 0.6682066358672827, "grad_norm": 1.331286072731018, "learning_rate": 1.833900778873412e-06, "loss": 0.3885, "step": 11137 }, { "epoch": 0.6682666346673066, "grad_norm": 1.4462429285049438, "learning_rate": 1.8333026514125161e-06, "loss": 0.4451, "step": 11138 }, { "epoch": 0.6683266334673307, "grad_norm": 1.3094806671142578, "learning_rate": 1.8327045868951835e-06, "loss": 0.3644, "step": 11139 }, { "epoch": 0.6683866322673546, "grad_norm": 1.4103062152862549, "learning_rate": 1.832106585344006e-06, "loss": 0.4238, "step": 11140 }, { "epoch": 0.6684466310673787, "grad_norm": 1.2747317552566528, "learning_rate": 1.8315086467815609e-06, "loss": 0.3609, "step": 11141 }, { "epoch": 0.6685066298674026, "grad_norm": 1.2449342012405396, "learning_rate": 1.8309107712304323e-06, "loss": 0.3681, "step": 11142 }, { "epoch": 0.6685666286674267, "grad_norm": 1.163560152053833, "learning_rate": 1.8303129587132014e-06, "loss": 0.3429, "step": 11143 }, { "epoch": 0.6686266274674506, "grad_norm": 1.4299269914627075, "learning_rate": 1.8297152092524423e-06, "loss": 0.3823, "step": 11144 }, { "epoch": 0.6686866262674747, "grad_norm": 1.2641462087631226, "learning_rate": 1.8291175228707302e-06, "loss": 0.3923, "step": 11145 }, { "epoch": 0.6687466250674986, "grad_norm": 1.3491286039352417, "learning_rate": 1.8285198995906359e-06, "loss": 0.3817, "step": 11146 }, { "epoch": 0.6688066238675227, "grad_norm": 1.3258094787597656, "learning_rate": 1.8279223394347303e-06, "loss": 0.3298, "step": 11147 }, { "epoch": 0.6688666226675466, "grad_norm": 1.5215463638305664, "learning_rate": 1.8273248424255805e-06, "loss": 0.4294, "step": 11148 }, { "epoch": 0.6689266214675706, "grad_norm": 1.2654014825820923, "learning_rate": 1.8267274085857492e-06, "loss": 0.3838, "step": 11149 }, { "epoch": 0.6689866202675947, "grad_norm": 1.1068663597106934, "learning_rate": 1.8261300379378016e-06, "loss": 0.335, "step": 11150 }, { "epoch": 0.6690466190676186, "grad_norm": 1.1691854000091553, "learning_rate": 1.8255327305042964e-06, "loss": 0.4101, "step": 11151 }, { "epoch": 0.6691066178676427, "grad_norm": 1.4697210788726807, "learning_rate": 1.8249354863077909e-06, "loss": 0.3785, "step": 11152 }, { "epoch": 0.6691666166676666, "grad_norm": 1.1661274433135986, "learning_rate": 1.8243383053708395e-06, "loss": 0.44, "step": 11153 }, { "epoch": 0.6692266154676907, "grad_norm": 1.3042343854904175, "learning_rate": 1.823741187715996e-06, "loss": 0.4071, "step": 11154 }, { "epoch": 0.6692866142677146, "grad_norm": 1.310585618019104, "learning_rate": 1.8231441333658135e-06, "loss": 0.3753, "step": 11155 }, { "epoch": 0.6693466130677387, "grad_norm": 1.2734949588775635, "learning_rate": 1.8225471423428343e-06, "loss": 0.3189, "step": 11156 }, { "epoch": 0.6694066118677626, "grad_norm": 1.3260915279388428, "learning_rate": 1.8219502146696083e-06, "loss": 0.4091, "step": 11157 }, { "epoch": 0.6694666106677867, "grad_norm": 1.297430157661438, "learning_rate": 1.8213533503686766e-06, "loss": 0.3855, "step": 11158 }, { "epoch": 0.6695266094678106, "grad_norm": 1.0891362428665161, "learning_rate": 1.8207565494625827e-06, "loss": 0.3429, "step": 11159 }, { "epoch": 0.6695866082678347, "grad_norm": 1.259692907333374, "learning_rate": 1.8201598119738625e-06, "loss": 0.356, "step": 11160 }, { "epoch": 0.6696466070678586, "grad_norm": 1.1635651588439941, "learning_rate": 1.8195631379250522e-06, "loss": 0.3772, "step": 11161 }, { "epoch": 0.6697066058678827, "grad_norm": 1.4535839557647705, "learning_rate": 1.8189665273386869e-06, "loss": 0.4694, "step": 11162 }, { "epoch": 0.6697666046679066, "grad_norm": 1.2352635860443115, "learning_rate": 1.8183699802372973e-06, "loss": 0.4307, "step": 11163 }, { "epoch": 0.6698266034679307, "grad_norm": 1.3300375938415527, "learning_rate": 1.8177734966434121e-06, "loss": 0.4033, "step": 11164 }, { "epoch": 0.6698866022679546, "grad_norm": 1.2801122665405273, "learning_rate": 1.8171770765795565e-06, "loss": 0.41, "step": 11165 }, { "epoch": 0.6699466010679787, "grad_norm": 1.4007129669189453, "learning_rate": 1.8165807200682567e-06, "loss": 0.3592, "step": 11166 }, { "epoch": 0.6700065998680026, "grad_norm": 1.118653416633606, "learning_rate": 1.8159844271320338e-06, "loss": 0.3316, "step": 11167 }, { "epoch": 0.6700665986680266, "grad_norm": 1.3265467882156372, "learning_rate": 1.8153881977934055e-06, "loss": 0.3751, "step": 11168 }, { "epoch": 0.6701265974680506, "grad_norm": 1.293755292892456, "learning_rate": 1.814792032074891e-06, "loss": 0.3656, "step": 11169 }, { "epoch": 0.6701865962680746, "grad_norm": 1.279704213142395, "learning_rate": 1.8141959299990026e-06, "loss": 0.3355, "step": 11170 }, { "epoch": 0.6702465950680986, "grad_norm": 1.2413984537124634, "learning_rate": 1.8135998915882555e-06, "loss": 0.3632, "step": 11171 }, { "epoch": 0.6703065938681226, "grad_norm": 1.3466962575912476, "learning_rate": 1.8130039168651553e-06, "loss": 0.4085, "step": 11172 }, { "epoch": 0.6703665926681467, "grad_norm": 1.2973830699920654, "learning_rate": 1.812408005852211e-06, "loss": 0.3876, "step": 11173 }, { "epoch": 0.6704265914681706, "grad_norm": 1.2827211618423462, "learning_rate": 1.8118121585719287e-06, "loss": 0.3788, "step": 11174 }, { "epoch": 0.6704865902681947, "grad_norm": 1.3546191453933716, "learning_rate": 1.8112163750468103e-06, "loss": 0.3618, "step": 11175 }, { "epoch": 0.6705465890682186, "grad_norm": 1.3426132202148438, "learning_rate": 1.810620655299355e-06, "loss": 0.3835, "step": 11176 }, { "epoch": 0.6706065878682427, "grad_norm": 1.3217766284942627, "learning_rate": 1.8100249993520593e-06, "loss": 0.3807, "step": 11177 }, { "epoch": 0.6706665866682666, "grad_norm": 1.3738386631011963, "learning_rate": 1.8094294072274216e-06, "loss": 0.3673, "step": 11178 }, { "epoch": 0.6707265854682907, "grad_norm": 1.4468753337860107, "learning_rate": 1.8088338789479328e-06, "loss": 0.3859, "step": 11179 }, { "epoch": 0.6707865842683146, "grad_norm": 1.163832187652588, "learning_rate": 1.808238414536082e-06, "loss": 0.3521, "step": 11180 }, { "epoch": 0.6708465830683387, "grad_norm": 1.3097879886627197, "learning_rate": 1.80764301401436e-06, "loss": 0.3466, "step": 11181 }, { "epoch": 0.6709065818683626, "grad_norm": 1.3097933530807495, "learning_rate": 1.807047677405251e-06, "loss": 0.3508, "step": 11182 }, { "epoch": 0.6709665806683867, "grad_norm": 1.3844573497772217, "learning_rate": 1.8064524047312367e-06, "loss": 0.4008, "step": 11183 }, { "epoch": 0.6710265794684106, "grad_norm": 1.2920414209365845, "learning_rate": 1.8058571960148012e-06, "loss": 0.3908, "step": 11184 }, { "epoch": 0.6710865782684347, "grad_norm": 1.2484169006347656, "learning_rate": 1.8052620512784189e-06, "loss": 0.4061, "step": 11185 }, { "epoch": 0.6711465770684586, "grad_norm": 1.3419653177261353, "learning_rate": 1.8046669705445707e-06, "loss": 0.4059, "step": 11186 }, { "epoch": 0.6712065758684826, "grad_norm": 1.3901910781860352, "learning_rate": 1.8040719538357247e-06, "loss": 0.3787, "step": 11187 }, { "epoch": 0.6712665746685066, "grad_norm": 1.2864505052566528, "learning_rate": 1.8034770011743555e-06, "loss": 0.4006, "step": 11188 }, { "epoch": 0.6713265734685306, "grad_norm": 1.3731831312179565, "learning_rate": 1.802882112582929e-06, "loss": 0.368, "step": 11189 }, { "epoch": 0.6713865722685546, "grad_norm": 1.3394086360931396, "learning_rate": 1.8022872880839146e-06, "loss": 0.3423, "step": 11190 }, { "epoch": 0.6714465710685786, "grad_norm": 1.2662068605422974, "learning_rate": 1.8016925276997747e-06, "loss": 0.4133, "step": 11191 }, { "epoch": 0.6715065698686026, "grad_norm": 1.3018856048583984, "learning_rate": 1.801097831452969e-06, "loss": 0.3941, "step": 11192 }, { "epoch": 0.6715665686686266, "grad_norm": 1.2000439167022705, "learning_rate": 1.8005031993659595e-06, "loss": 0.4107, "step": 11193 }, { "epoch": 0.6716265674686507, "grad_norm": 1.2599668502807617, "learning_rate": 1.7999086314612012e-06, "loss": 0.3943, "step": 11194 }, { "epoch": 0.6716865662686746, "grad_norm": 1.2383685111999512, "learning_rate": 1.7993141277611467e-06, "loss": 0.3897, "step": 11195 }, { "epoch": 0.6717465650686987, "grad_norm": 1.3229994773864746, "learning_rate": 1.798719688288251e-06, "loss": 0.3671, "step": 11196 }, { "epoch": 0.6718065638687226, "grad_norm": 1.371970772743225, "learning_rate": 1.7981253130649615e-06, "loss": 0.38, "step": 11197 }, { "epoch": 0.6718665626687467, "grad_norm": 1.4235910177230835, "learning_rate": 1.7975310021137256e-06, "loss": 0.3542, "step": 11198 }, { "epoch": 0.6719265614687706, "grad_norm": 1.212952733039856, "learning_rate": 1.7969367554569858e-06, "loss": 0.4103, "step": 11199 }, { "epoch": 0.6719865602687947, "grad_norm": 1.3838024139404297, "learning_rate": 1.7963425731171866e-06, "loss": 0.3852, "step": 11200 }, { "epoch": 0.6720465590688186, "grad_norm": 1.4156769514083862, "learning_rate": 1.795748455116767e-06, "loss": 0.3849, "step": 11201 }, { "epoch": 0.6721065578688427, "grad_norm": 1.2979198694229126, "learning_rate": 1.7951544014781618e-06, "loss": 0.3927, "step": 11202 }, { "epoch": 0.6721665566688666, "grad_norm": 1.214045524597168, "learning_rate": 1.7945604122238093e-06, "loss": 0.3555, "step": 11203 }, { "epoch": 0.6722265554688907, "grad_norm": 1.255634069442749, "learning_rate": 1.7939664873761387e-06, "loss": 0.3447, "step": 11204 }, { "epoch": 0.6722865542689146, "grad_norm": 1.3026164770126343, "learning_rate": 1.7933726269575825e-06, "loss": 0.4515, "step": 11205 }, { "epoch": 0.6723465530689386, "grad_norm": 1.350549578666687, "learning_rate": 1.792778830990567e-06, "loss": 0.4131, "step": 11206 }, { "epoch": 0.6724065518689626, "grad_norm": 1.2325537204742432, "learning_rate": 1.7921850994975154e-06, "loss": 0.3956, "step": 11207 }, { "epoch": 0.6724665506689866, "grad_norm": 1.2016626596450806, "learning_rate": 1.7915914325008533e-06, "loss": 0.3665, "step": 11208 }, { "epoch": 0.6725265494690106, "grad_norm": 1.4166725873947144, "learning_rate": 1.7909978300229993e-06, "loss": 0.4022, "step": 11209 }, { "epoch": 0.6725865482690346, "grad_norm": 1.3728735446929932, "learning_rate": 1.7904042920863706e-06, "loss": 0.3976, "step": 11210 }, { "epoch": 0.6726465470690586, "grad_norm": 1.3044793605804443, "learning_rate": 1.789810818713382e-06, "loss": 0.3787, "step": 11211 }, { "epoch": 0.6727065458690826, "grad_norm": 1.4511170387268066, "learning_rate": 1.7892174099264473e-06, "loss": 0.3612, "step": 11212 }, { "epoch": 0.6727665446691066, "grad_norm": 1.3996975421905518, "learning_rate": 1.788624065747979e-06, "loss": 0.3731, "step": 11213 }, { "epoch": 0.6728265434691306, "grad_norm": 1.5263820886611938, "learning_rate": 1.7880307862003802e-06, "loss": 0.4228, "step": 11214 }, { "epoch": 0.6728865422691546, "grad_norm": 1.2582954168319702, "learning_rate": 1.7874375713060602e-06, "loss": 0.3749, "step": 11215 }, { "epoch": 0.6729465410691786, "grad_norm": 1.2080987691879272, "learning_rate": 1.78684442108742e-06, "loss": 0.379, "step": 11216 }, { "epoch": 0.6730065398692027, "grad_norm": 1.2698760032653809, "learning_rate": 1.786251335566863e-06, "loss": 0.3757, "step": 11217 }, { "epoch": 0.6730665386692266, "grad_norm": 1.3364595174789429, "learning_rate": 1.7856583147667824e-06, "loss": 0.4112, "step": 11218 }, { "epoch": 0.6731265374692507, "grad_norm": 1.2197585105895996, "learning_rate": 1.785065358709577e-06, "loss": 0.375, "step": 11219 }, { "epoch": 0.6731865362692746, "grad_norm": 1.4493558406829834, "learning_rate": 1.784472467417641e-06, "loss": 0.3855, "step": 11220 }, { "epoch": 0.6732465350692987, "grad_norm": 1.459872841835022, "learning_rate": 1.7838796409133643e-06, "loss": 0.3953, "step": 11221 }, { "epoch": 0.6733065338693226, "grad_norm": 1.2763175964355469, "learning_rate": 1.7832868792191343e-06, "loss": 0.3842, "step": 11222 }, { "epoch": 0.6733665326693467, "grad_norm": 1.4630199670791626, "learning_rate": 1.7826941823573365e-06, "loss": 0.3718, "step": 11223 }, { "epoch": 0.6734265314693706, "grad_norm": 1.2106021642684937, "learning_rate": 1.7821015503503571e-06, "loss": 0.4025, "step": 11224 }, { "epoch": 0.6734865302693946, "grad_norm": 1.2261497974395752, "learning_rate": 1.7815089832205749e-06, "loss": 0.3865, "step": 11225 }, { "epoch": 0.6735465290694186, "grad_norm": 1.2902226448059082, "learning_rate": 1.7809164809903679e-06, "loss": 0.3321, "step": 11226 }, { "epoch": 0.6736065278694426, "grad_norm": 1.3797674179077148, "learning_rate": 1.7803240436821148e-06, "loss": 0.3709, "step": 11227 }, { "epoch": 0.6736665266694666, "grad_norm": 1.2881172895431519, "learning_rate": 1.7797316713181874e-06, "loss": 0.3819, "step": 11228 }, { "epoch": 0.6737265254694906, "grad_norm": 1.2924383878707886, "learning_rate": 1.7791393639209573e-06, "loss": 0.3732, "step": 11229 }, { "epoch": 0.6737865242695146, "grad_norm": 1.1996667385101318, "learning_rate": 1.7785471215127922e-06, "loss": 0.4182, "step": 11230 }, { "epoch": 0.6738465230695386, "grad_norm": 1.4007519483566284, "learning_rate": 1.7779549441160594e-06, "loss": 0.4198, "step": 11231 }, { "epoch": 0.6739065218695626, "grad_norm": 1.3162317276000977, "learning_rate": 1.777362831753125e-06, "loss": 0.3726, "step": 11232 }, { "epoch": 0.6739665206695866, "grad_norm": 1.3956644535064697, "learning_rate": 1.7767707844463457e-06, "loss": 0.3995, "step": 11233 }, { "epoch": 0.6740265194696106, "grad_norm": 1.3702493906021118, "learning_rate": 1.7761788022180839e-06, "loss": 0.3633, "step": 11234 }, { "epoch": 0.6740865182696346, "grad_norm": 1.2536873817443848, "learning_rate": 1.7755868850906939e-06, "loss": 0.3948, "step": 11235 }, { "epoch": 0.6741465170696586, "grad_norm": 1.2843255996704102, "learning_rate": 1.7749950330865325e-06, "loss": 0.3772, "step": 11236 }, { "epoch": 0.6742065158696826, "grad_norm": 1.316590666770935, "learning_rate": 1.7744032462279493e-06, "loss": 0.3761, "step": 11237 }, { "epoch": 0.6742665146697066, "grad_norm": 1.3825095891952515, "learning_rate": 1.7738115245372922e-06, "loss": 0.4144, "step": 11238 }, { "epoch": 0.6743265134697306, "grad_norm": 1.3260964155197144, "learning_rate": 1.7732198680369106e-06, "loss": 0.3985, "step": 11239 }, { "epoch": 0.6743865122697547, "grad_norm": 1.2635282278060913, "learning_rate": 1.7726282767491475e-06, "loss": 0.3661, "step": 11240 }, { "epoch": 0.6744465110697786, "grad_norm": 1.2317129373550415, "learning_rate": 1.772036750696345e-06, "loss": 0.3867, "step": 11241 }, { "epoch": 0.6745065098698027, "grad_norm": 1.4885376691818237, "learning_rate": 1.7714452899008402e-06, "loss": 0.4607, "step": 11242 }, { "epoch": 0.6745665086698266, "grad_norm": 1.2629269361495972, "learning_rate": 1.7708538943849723e-06, "loss": 0.3529, "step": 11243 }, { "epoch": 0.6746265074698506, "grad_norm": 1.4950644969940186, "learning_rate": 1.7702625641710756e-06, "loss": 0.3963, "step": 11244 }, { "epoch": 0.6746865062698746, "grad_norm": 1.1592410802841187, "learning_rate": 1.7696712992814794e-06, "loss": 0.3332, "step": 11245 }, { "epoch": 0.6747465050698986, "grad_norm": 1.3148185014724731, "learning_rate": 1.7690800997385166e-06, "loss": 0.3762, "step": 11246 }, { "epoch": 0.6748065038699226, "grad_norm": 1.4120570421218872, "learning_rate": 1.768488965564512e-06, "loss": 0.4365, "step": 11247 }, { "epoch": 0.6748665026699466, "grad_norm": 1.2023380994796753, "learning_rate": 1.7678978967817894e-06, "loss": 0.3723, "step": 11248 }, { "epoch": 0.6749265014699706, "grad_norm": 1.2802571058273315, "learning_rate": 1.767306893412673e-06, "loss": 0.3488, "step": 11249 }, { "epoch": 0.6749865002699946, "grad_norm": 1.274385929107666, "learning_rate": 1.7667159554794799e-06, "loss": 0.3777, "step": 11250 }, { "epoch": 0.6750464990700186, "grad_norm": 1.3149641752243042, "learning_rate": 1.7661250830045297e-06, "loss": 0.3934, "step": 11251 }, { "epoch": 0.6751064978700426, "grad_norm": 1.3959558010101318, "learning_rate": 1.765534276010135e-06, "loss": 0.3639, "step": 11252 }, { "epoch": 0.6751664966700666, "grad_norm": 1.44214928150177, "learning_rate": 1.7649435345186076e-06, "loss": 0.4324, "step": 11253 }, { "epoch": 0.6752264954700906, "grad_norm": 1.258217453956604, "learning_rate": 1.7643528585522592e-06, "loss": 0.3761, "step": 11254 }, { "epoch": 0.6752864942701146, "grad_norm": 1.4537395238876343, "learning_rate": 1.763762248133396e-06, "loss": 0.4272, "step": 11255 }, { "epoch": 0.6753464930701386, "grad_norm": 1.466059923171997, "learning_rate": 1.7631717032843223e-06, "loss": 0.3853, "step": 11256 }, { "epoch": 0.6754064918701626, "grad_norm": 1.224228024482727, "learning_rate": 1.7625812240273392e-06, "loss": 0.319, "step": 11257 }, { "epoch": 0.6754664906701866, "grad_norm": 1.1746007204055786, "learning_rate": 1.7619908103847483e-06, "loss": 0.3487, "step": 11258 }, { "epoch": 0.6755264894702105, "grad_norm": 1.3975627422332764, "learning_rate": 1.7614004623788469e-06, "loss": 0.4072, "step": 11259 }, { "epoch": 0.6755864882702346, "grad_norm": 1.4300665855407715, "learning_rate": 1.7608101800319274e-06, "loss": 0.3684, "step": 11260 }, { "epoch": 0.6756464870702587, "grad_norm": 1.3020493984222412, "learning_rate": 1.7602199633662852e-06, "loss": 0.4043, "step": 11261 }, { "epoch": 0.6757064858702826, "grad_norm": 1.277403712272644, "learning_rate": 1.7596298124042069e-06, "loss": 0.3548, "step": 11262 }, { "epoch": 0.6757664846703066, "grad_norm": 1.4952588081359863, "learning_rate": 1.759039727167984e-06, "loss": 0.4349, "step": 11263 }, { "epoch": 0.6758264834703306, "grad_norm": 1.3474301099777222, "learning_rate": 1.7584497076798964e-06, "loss": 0.4167, "step": 11264 }, { "epoch": 0.6758864822703546, "grad_norm": 1.2379958629608154, "learning_rate": 1.7578597539622287e-06, "loss": 0.3345, "step": 11265 }, { "epoch": 0.6759464810703786, "grad_norm": 1.3012046813964844, "learning_rate": 1.757269866037263e-06, "loss": 0.3941, "step": 11266 }, { "epoch": 0.6760064798704026, "grad_norm": 1.3312597274780273, "learning_rate": 1.7566800439272738e-06, "loss": 0.4167, "step": 11267 }, { "epoch": 0.6760664786704266, "grad_norm": 1.3177390098571777, "learning_rate": 1.7560902876545371e-06, "loss": 0.3661, "step": 11268 }, { "epoch": 0.6761264774704506, "grad_norm": 1.2676517963409424, "learning_rate": 1.7555005972413234e-06, "loss": 0.3552, "step": 11269 }, { "epoch": 0.6761864762704746, "grad_norm": 1.2322628498077393, "learning_rate": 1.7549109727099056e-06, "loss": 0.3533, "step": 11270 }, { "epoch": 0.6762464750704986, "grad_norm": 1.2840330600738525, "learning_rate": 1.75432141408255e-06, "loss": 0.3686, "step": 11271 }, { "epoch": 0.6763064738705226, "grad_norm": 1.3079842329025269, "learning_rate": 1.75373192138152e-06, "loss": 0.3946, "step": 11272 }, { "epoch": 0.6763664726705466, "grad_norm": 1.3859012126922607, "learning_rate": 1.7531424946290808e-06, "loss": 0.4162, "step": 11273 }, { "epoch": 0.6764264714705706, "grad_norm": 1.3564448356628418, "learning_rate": 1.7525531338474902e-06, "loss": 0.3975, "step": 11274 }, { "epoch": 0.6764864702705946, "grad_norm": 1.3085983991622925, "learning_rate": 1.7519638390590068e-06, "loss": 0.3785, "step": 11275 }, { "epoch": 0.6765464690706186, "grad_norm": 1.391921043395996, "learning_rate": 1.7513746102858843e-06, "loss": 0.3624, "step": 11276 }, { "epoch": 0.6766064678706426, "grad_norm": 1.3949127197265625, "learning_rate": 1.7507854475503756e-06, "loss": 0.3942, "step": 11277 }, { "epoch": 0.6766664666706665, "grad_norm": 1.1907427310943604, "learning_rate": 1.750196350874734e-06, "loss": 0.3749, "step": 11278 }, { "epoch": 0.6767264654706906, "grad_norm": 1.3726428747177124, "learning_rate": 1.749607320281201e-06, "loss": 0.3787, "step": 11279 }, { "epoch": 0.6767864642707145, "grad_norm": 1.3332945108413696, "learning_rate": 1.7490183557920263e-06, "loss": 0.3857, "step": 11280 }, { "epoch": 0.6768464630707386, "grad_norm": 1.3060680627822876, "learning_rate": 1.7484294574294498e-06, "loss": 0.422, "step": 11281 }, { "epoch": 0.6769064618707625, "grad_norm": 1.3836562633514404, "learning_rate": 1.747840625215715e-06, "loss": 0.4093, "step": 11282 }, { "epoch": 0.6769664606707866, "grad_norm": 1.2656556367874146, "learning_rate": 1.7472518591730545e-06, "loss": 0.3509, "step": 11283 }, { "epoch": 0.6770264594708106, "grad_norm": 1.2956241369247437, "learning_rate": 1.7466631593237055e-06, "loss": 0.3225, "step": 11284 }, { "epoch": 0.6770864582708346, "grad_norm": 1.2492884397506714, "learning_rate": 1.746074525689902e-06, "loss": 0.3565, "step": 11285 }, { "epoch": 0.6771464570708586, "grad_norm": 1.3673455715179443, "learning_rate": 1.7454859582938732e-06, "loss": 0.406, "step": 11286 }, { "epoch": 0.6772064558708826, "grad_norm": 1.3493759632110596, "learning_rate": 1.7448974571578459e-06, "loss": 0.3937, "step": 11287 }, { "epoch": 0.6772664546709066, "grad_norm": 1.4605181217193604, "learning_rate": 1.7443090223040443e-06, "loss": 0.3972, "step": 11288 }, { "epoch": 0.6773264534709306, "grad_norm": 1.3363193273544312, "learning_rate": 1.7437206537546932e-06, "loss": 0.3779, "step": 11289 }, { "epoch": 0.6773864522709546, "grad_norm": 1.3238446712493896, "learning_rate": 1.7431323515320117e-06, "loss": 0.4265, "step": 11290 }, { "epoch": 0.6774464510709786, "grad_norm": 1.2379076480865479, "learning_rate": 1.742544115658215e-06, "loss": 0.3638, "step": 11291 }, { "epoch": 0.6775064498710026, "grad_norm": 1.3380604982376099, "learning_rate": 1.7419559461555223e-06, "loss": 0.3835, "step": 11292 }, { "epoch": 0.6775664486710266, "grad_norm": 1.3211973905563354, "learning_rate": 1.7413678430461433e-06, "loss": 0.382, "step": 11293 }, { "epoch": 0.6776264474710506, "grad_norm": 1.2548646926879883, "learning_rate": 1.7407798063522873e-06, "loss": 0.4124, "step": 11294 }, { "epoch": 0.6776864462710745, "grad_norm": 1.3005661964416504, "learning_rate": 1.7401918360961648e-06, "loss": 0.3597, "step": 11295 }, { "epoch": 0.6777464450710986, "grad_norm": 1.2105472087860107, "learning_rate": 1.7396039322999773e-06, "loss": 0.3694, "step": 11296 }, { "epoch": 0.6778064438711225, "grad_norm": 1.3703030347824097, "learning_rate": 1.7390160949859304e-06, "loss": 0.4132, "step": 11297 }, { "epoch": 0.6778664426711466, "grad_norm": 1.406692624092102, "learning_rate": 1.7384283241762222e-06, "loss": 0.4017, "step": 11298 }, { "epoch": 0.6779264414711705, "grad_norm": 1.2812331914901733, "learning_rate": 1.7378406198930507e-06, "loss": 0.3692, "step": 11299 }, { "epoch": 0.6779864402711946, "grad_norm": 1.1841214895248413, "learning_rate": 1.7372529821586092e-06, "loss": 0.3975, "step": 11300 }, { "epoch": 0.6780464390712185, "grad_norm": 1.3208979368209839, "learning_rate": 1.736665410995093e-06, "loss": 0.426, "step": 11301 }, { "epoch": 0.6781064378712426, "grad_norm": 1.2767865657806396, "learning_rate": 1.7360779064246908e-06, "loss": 0.3952, "step": 11302 }, { "epoch": 0.6781664366712665, "grad_norm": 1.3774479627609253, "learning_rate": 1.7354904684695877e-06, "loss": 0.3822, "step": 11303 }, { "epoch": 0.6782264354712906, "grad_norm": 1.3321337699890137, "learning_rate": 1.7349030971519721e-06, "loss": 0.3951, "step": 11304 }, { "epoch": 0.6782864342713145, "grad_norm": 1.4646186828613281, "learning_rate": 1.734315792494025e-06, "loss": 0.3511, "step": 11305 }, { "epoch": 0.6783464330713386, "grad_norm": 1.4174960851669312, "learning_rate": 1.7337285545179243e-06, "loss": 0.369, "step": 11306 }, { "epoch": 0.6784064318713626, "grad_norm": 1.3319844007492065, "learning_rate": 1.733141383245851e-06, "loss": 0.4307, "step": 11307 }, { "epoch": 0.6784664306713866, "grad_norm": 1.2429155111312866, "learning_rate": 1.7325542786999756e-06, "loss": 0.3799, "step": 11308 }, { "epoch": 0.6785264294714106, "grad_norm": 1.3366423845291138, "learning_rate": 1.7319672409024758e-06, "loss": 0.3906, "step": 11309 }, { "epoch": 0.6785864282714346, "grad_norm": 1.4520032405853271, "learning_rate": 1.7313802698755154e-06, "loss": 0.4103, "step": 11310 }, { "epoch": 0.6786464270714586, "grad_norm": 1.2214789390563965, "learning_rate": 1.7307933656412658e-06, "loss": 0.365, "step": 11311 }, { "epoch": 0.6787064258714826, "grad_norm": 1.3526976108551025, "learning_rate": 1.7302065282218887e-06, "loss": 0.377, "step": 11312 }, { "epoch": 0.6787664246715066, "grad_norm": 1.2707884311676025, "learning_rate": 1.7296197576395496e-06, "loss": 0.3659, "step": 11313 }, { "epoch": 0.6788264234715305, "grad_norm": 1.3189506530761719, "learning_rate": 1.729033053916406e-06, "loss": 0.4154, "step": 11314 }, { "epoch": 0.6788864222715546, "grad_norm": 1.1888256072998047, "learning_rate": 1.7284464170746145e-06, "loss": 0.341, "step": 11315 }, { "epoch": 0.6789464210715785, "grad_norm": 1.3408966064453125, "learning_rate": 1.7278598471363318e-06, "loss": 0.402, "step": 11316 }, { "epoch": 0.6790064198716026, "grad_norm": 1.2059904336929321, "learning_rate": 1.7272733441237088e-06, "loss": 0.3347, "step": 11317 }, { "epoch": 0.6790664186716265, "grad_norm": 1.2516093254089355, "learning_rate": 1.7266869080588936e-06, "loss": 0.356, "step": 11318 }, { "epoch": 0.6791264174716506, "grad_norm": 1.278587818145752, "learning_rate": 1.7261005389640363e-06, "loss": 0.3413, "step": 11319 }, { "epoch": 0.6791864162716745, "grad_norm": 1.3103197813034058, "learning_rate": 1.72551423686128e-06, "loss": 0.3808, "step": 11320 }, { "epoch": 0.6792464150716986, "grad_norm": 1.2541804313659668, "learning_rate": 1.724928001772766e-06, "loss": 0.345, "step": 11321 }, { "epoch": 0.6793064138717225, "grad_norm": 1.4010509252548218, "learning_rate": 1.724341833720633e-06, "loss": 0.415, "step": 11322 }, { "epoch": 0.6793664126717466, "grad_norm": 1.2246547937393188, "learning_rate": 1.7237557327270205e-06, "loss": 0.3789, "step": 11323 }, { "epoch": 0.6794264114717705, "grad_norm": 1.3454973697662354, "learning_rate": 1.7231696988140612e-06, "loss": 0.413, "step": 11324 }, { "epoch": 0.6794864102717946, "grad_norm": 1.1764336824417114, "learning_rate": 1.7225837320038859e-06, "loss": 0.3558, "step": 11325 }, { "epoch": 0.6795464090718185, "grad_norm": 1.332416296005249, "learning_rate": 1.7219978323186262e-06, "loss": 0.3765, "step": 11326 }, { "epoch": 0.6796064078718426, "grad_norm": 1.3504445552825928, "learning_rate": 1.721411999780407e-06, "loss": 0.3714, "step": 11327 }, { "epoch": 0.6796664066718665, "grad_norm": 1.4480769634246826, "learning_rate": 1.7208262344113556e-06, "loss": 0.4361, "step": 11328 }, { "epoch": 0.6797264054718906, "grad_norm": 1.3293853998184204, "learning_rate": 1.7202405362335892e-06, "loss": 0.3838, "step": 11329 }, { "epoch": 0.6797864042719146, "grad_norm": 1.286402940750122, "learning_rate": 1.7196549052692286e-06, "loss": 0.3453, "step": 11330 }, { "epoch": 0.6798464030719386, "grad_norm": 1.4224234819412231, "learning_rate": 1.7190693415403929e-06, "loss": 0.4088, "step": 11331 }, { "epoch": 0.6799064018719626, "grad_norm": 1.305611252784729, "learning_rate": 1.7184838450691938e-06, "loss": 0.4136, "step": 11332 }, { "epoch": 0.6799664006719865, "grad_norm": 1.4434022903442383, "learning_rate": 1.7178984158777436e-06, "loss": 0.4068, "step": 11333 }, { "epoch": 0.6800263994720106, "grad_norm": 1.3139852285385132, "learning_rate": 1.7173130539881494e-06, "loss": 0.3628, "step": 11334 }, { "epoch": 0.6800863982720345, "grad_norm": 1.161393404006958, "learning_rate": 1.7167277594225212e-06, "loss": 0.332, "step": 11335 }, { "epoch": 0.6801463970720586, "grad_norm": 1.5721707344055176, "learning_rate": 1.7161425322029602e-06, "loss": 0.4367, "step": 11336 }, { "epoch": 0.6802063958720825, "grad_norm": 1.2372891902923584, "learning_rate": 1.7155573723515675e-06, "loss": 0.369, "step": 11337 }, { "epoch": 0.6802663946721066, "grad_norm": 1.377427339553833, "learning_rate": 1.7149722798904445e-06, "loss": 0.39, "step": 11338 }, { "epoch": 0.6803263934721305, "grad_norm": 1.2710950374603271, "learning_rate": 1.7143872548416841e-06, "loss": 0.3645, "step": 11339 }, { "epoch": 0.6803863922721546, "grad_norm": 1.1629984378814697, "learning_rate": 1.713802297227385e-06, "loss": 0.3369, "step": 11340 }, { "epoch": 0.6804463910721785, "grad_norm": 1.3889168500900269, "learning_rate": 1.7132174070696324e-06, "loss": 0.3711, "step": 11341 }, { "epoch": 0.6805063898722026, "grad_norm": 1.338304877281189, "learning_rate": 1.7126325843905177e-06, "loss": 0.3765, "step": 11342 }, { "epoch": 0.6805663886722265, "grad_norm": 1.4454797506332397, "learning_rate": 1.7120478292121284e-06, "loss": 0.4238, "step": 11343 }, { "epoch": 0.6806263874722506, "grad_norm": 1.1901963949203491, "learning_rate": 1.7114631415565469e-06, "loss": 0.3273, "step": 11344 }, { "epoch": 0.6806863862722745, "grad_norm": 1.133010745048523, "learning_rate": 1.7108785214458537e-06, "loss": 0.3722, "step": 11345 }, { "epoch": 0.6807463850722986, "grad_norm": 1.2178245782852173, "learning_rate": 1.7102939689021266e-06, "loss": 0.3876, "step": 11346 }, { "epoch": 0.6808063838723225, "grad_norm": 1.2180594205856323, "learning_rate": 1.7097094839474433e-06, "loss": 0.3446, "step": 11347 }, { "epoch": 0.6808663826723466, "grad_norm": 1.4217852354049683, "learning_rate": 1.7091250666038762e-06, "loss": 0.4074, "step": 11348 }, { "epoch": 0.6809263814723705, "grad_norm": 1.4267128705978394, "learning_rate": 1.708540716893495e-06, "loss": 0.3772, "step": 11349 }, { "epoch": 0.6809863802723946, "grad_norm": 1.265202283859253, "learning_rate": 1.7079564348383707e-06, "loss": 0.4078, "step": 11350 }, { "epoch": 0.6810463790724186, "grad_norm": 1.2715599536895752, "learning_rate": 1.707372220460567e-06, "loss": 0.3668, "step": 11351 }, { "epoch": 0.6811063778724425, "grad_norm": 1.2670230865478516, "learning_rate": 1.7067880737821474e-06, "loss": 0.39, "step": 11352 }, { "epoch": 0.6811663766724666, "grad_norm": 1.3631799221038818, "learning_rate": 1.706203994825171e-06, "loss": 0.3595, "step": 11353 }, { "epoch": 0.6812263754724905, "grad_norm": 1.3163517713546753, "learning_rate": 1.705619983611697e-06, "loss": 0.3933, "step": 11354 }, { "epoch": 0.6812863742725146, "grad_norm": 1.3602389097213745, "learning_rate": 1.705036040163784e-06, "loss": 0.3888, "step": 11355 }, { "epoch": 0.6813463730725385, "grad_norm": 1.4291422367095947, "learning_rate": 1.7044521645034792e-06, "loss": 0.4329, "step": 11356 }, { "epoch": 0.6814063718725626, "grad_norm": 1.2747547626495361, "learning_rate": 1.703868356652837e-06, "loss": 0.358, "step": 11357 }, { "epoch": 0.6814663706725865, "grad_norm": 1.3888503313064575, "learning_rate": 1.7032846166339027e-06, "loss": 0.3848, "step": 11358 }, { "epoch": 0.6815263694726106, "grad_norm": 1.2073694467544556, "learning_rate": 1.7027009444687237e-06, "loss": 0.3814, "step": 11359 }, { "epoch": 0.6815863682726345, "grad_norm": 1.4628688097000122, "learning_rate": 1.7021173401793414e-06, "loss": 0.4081, "step": 11360 }, { "epoch": 0.6816463670726586, "grad_norm": 1.2181354761123657, "learning_rate": 1.7015338037877951e-06, "loss": 0.4045, "step": 11361 }, { "epoch": 0.6817063658726825, "grad_norm": 1.3634878396987915, "learning_rate": 1.700950335316124e-06, "loss": 0.3877, "step": 11362 }, { "epoch": 0.6817663646727066, "grad_norm": 1.3750094175338745, "learning_rate": 1.700366934786363e-06, "loss": 0.4051, "step": 11363 }, { "epoch": 0.6818263634727305, "grad_norm": 1.3123902082443237, "learning_rate": 1.699783602220543e-06, "loss": 0.3423, "step": 11364 }, { "epoch": 0.6818863622727546, "grad_norm": 1.341464877128601, "learning_rate": 1.6992003376406938e-06, "loss": 0.4303, "step": 11365 }, { "epoch": 0.6819463610727785, "grad_norm": 1.2924684286117554, "learning_rate": 1.6986171410688445e-06, "loss": 0.3265, "step": 11366 }, { "epoch": 0.6820063598728026, "grad_norm": 1.2963318824768066, "learning_rate": 1.6980340125270188e-06, "loss": 0.4352, "step": 11367 }, { "epoch": 0.6820663586728265, "grad_norm": 1.3370016813278198, "learning_rate": 1.6974509520372375e-06, "loss": 0.4362, "step": 11368 }, { "epoch": 0.6821263574728506, "grad_norm": 1.3104803562164307, "learning_rate": 1.6968679596215226e-06, "loss": 0.4039, "step": 11369 }, { "epoch": 0.6821863562728745, "grad_norm": 1.3536486625671387, "learning_rate": 1.6962850353018897e-06, "loss": 0.3767, "step": 11370 }, { "epoch": 0.6822463550728985, "grad_norm": 1.2828012704849243, "learning_rate": 1.6957021791003521e-06, "loss": 0.372, "step": 11371 }, { "epoch": 0.6823063538729225, "grad_norm": 1.4397410154342651, "learning_rate": 1.695119391038924e-06, "loss": 0.4229, "step": 11372 }, { "epoch": 0.6823663526729465, "grad_norm": 1.2559489011764526, "learning_rate": 1.6945366711396124e-06, "loss": 0.3946, "step": 11373 }, { "epoch": 0.6824263514729706, "grad_norm": 1.4280813932418823, "learning_rate": 1.6939540194244274e-06, "loss": 0.3611, "step": 11374 }, { "epoch": 0.6824863502729945, "grad_norm": 1.2278965711593628, "learning_rate": 1.6933714359153686e-06, "loss": 0.3708, "step": 11375 }, { "epoch": 0.6825463490730186, "grad_norm": 1.1894172430038452, "learning_rate": 1.6927889206344393e-06, "loss": 0.3702, "step": 11376 }, { "epoch": 0.6826063478730425, "grad_norm": 1.1633095741271973, "learning_rate": 1.69220647360364e-06, "loss": 0.3191, "step": 11377 }, { "epoch": 0.6826663466730666, "grad_norm": 1.4545860290527344, "learning_rate": 1.6916240948449662e-06, "loss": 0.3989, "step": 11378 }, { "epoch": 0.6827263454730905, "grad_norm": 1.2094554901123047, "learning_rate": 1.6910417843804113e-06, "loss": 0.3292, "step": 11379 }, { "epoch": 0.6827863442731146, "grad_norm": 1.320610761642456, "learning_rate": 1.6904595422319654e-06, "loss": 0.3856, "step": 11380 }, { "epoch": 0.6828463430731385, "grad_norm": 1.2112889289855957, "learning_rate": 1.6898773684216197e-06, "loss": 0.3544, "step": 11381 }, { "epoch": 0.6829063418731626, "grad_norm": 1.3862603902816772, "learning_rate": 1.6892952629713588e-06, "loss": 0.4942, "step": 11382 }, { "epoch": 0.6829663406731865, "grad_norm": 1.2593181133270264, "learning_rate": 1.6887132259031648e-06, "loss": 0.3736, "step": 11383 }, { "epoch": 0.6830263394732106, "grad_norm": 1.2878553867340088, "learning_rate": 1.6881312572390217e-06, "loss": 0.3542, "step": 11384 }, { "epoch": 0.6830863382732345, "grad_norm": 1.362483263015747, "learning_rate": 1.6875493570009046e-06, "loss": 0.4249, "step": 11385 }, { "epoch": 0.6831463370732586, "grad_norm": 1.2165533304214478, "learning_rate": 1.6869675252107932e-06, "loss": 0.3691, "step": 11386 }, { "epoch": 0.6832063358732825, "grad_norm": 1.2733975648880005, "learning_rate": 1.686385761890656e-06, "loss": 0.3782, "step": 11387 }, { "epoch": 0.6832663346733066, "grad_norm": 1.4157724380493164, "learning_rate": 1.6858040670624656e-06, "loss": 0.3934, "step": 11388 }, { "epoch": 0.6833263334733305, "grad_norm": 1.2970843315124512, "learning_rate": 1.6852224407481915e-06, "loss": 0.3901, "step": 11389 }, { "epoch": 0.6833863322733545, "grad_norm": 1.4897167682647705, "learning_rate": 1.6846408829697977e-06, "loss": 0.3768, "step": 11390 }, { "epoch": 0.6834463310733785, "grad_norm": 1.3486710786819458, "learning_rate": 1.684059393749247e-06, "loss": 0.3848, "step": 11391 }, { "epoch": 0.6835063298734025, "grad_norm": 1.4975258111953735, "learning_rate": 1.683477973108498e-06, "loss": 0.4776, "step": 11392 }, { "epoch": 0.6835663286734265, "grad_norm": 1.3971444368362427, "learning_rate": 1.6828966210695117e-06, "loss": 0.4027, "step": 11393 }, { "epoch": 0.6836263274734505, "grad_norm": 1.178970217704773, "learning_rate": 1.682315337654241e-06, "loss": 0.4144, "step": 11394 }, { "epoch": 0.6836863262734745, "grad_norm": 1.3403958082199097, "learning_rate": 1.6817341228846374e-06, "loss": 0.3987, "step": 11395 }, { "epoch": 0.6837463250734985, "grad_norm": 1.3529983758926392, "learning_rate": 1.6811529767826533e-06, "loss": 0.4076, "step": 11396 }, { "epoch": 0.6838063238735226, "grad_norm": 1.3130745887756348, "learning_rate": 1.6805718993702348e-06, "loss": 0.3514, "step": 11397 }, { "epoch": 0.6838663226735465, "grad_norm": 1.1523798704147339, "learning_rate": 1.679990890669326e-06, "loss": 0.3432, "step": 11398 }, { "epoch": 0.6839263214735706, "grad_norm": 1.2440913915634155, "learning_rate": 1.6794099507018683e-06, "loss": 0.3832, "step": 11399 }, { "epoch": 0.6839863202735945, "grad_norm": 1.409332036972046, "learning_rate": 1.678829079489802e-06, "loss": 0.3779, "step": 11400 }, { "epoch": 0.6840463190736186, "grad_norm": 1.25444495677948, "learning_rate": 1.678248277055067e-06, "loss": 0.3744, "step": 11401 }, { "epoch": 0.6841063178736425, "grad_norm": 1.2573667764663696, "learning_rate": 1.677667543419592e-06, "loss": 0.3745, "step": 11402 }, { "epoch": 0.6841663166736666, "grad_norm": 1.3586527109146118, "learning_rate": 1.6770868786053125e-06, "loss": 0.3697, "step": 11403 }, { "epoch": 0.6842263154736905, "grad_norm": 1.4000569581985474, "learning_rate": 1.6765062826341552e-06, "loss": 0.3732, "step": 11404 }, { "epoch": 0.6842863142737146, "grad_norm": 1.3834254741668701, "learning_rate": 1.6759257555280504e-06, "loss": 0.3947, "step": 11405 }, { "epoch": 0.6843463130737385, "grad_norm": 1.3335895538330078, "learning_rate": 1.6753452973089167e-06, "loss": 0.4466, "step": 11406 }, { "epoch": 0.6844063118737626, "grad_norm": 1.3220865726470947, "learning_rate": 1.674764907998678e-06, "loss": 0.3926, "step": 11407 }, { "epoch": 0.6844663106737865, "grad_norm": 1.2332202196121216, "learning_rate": 1.6741845876192542e-06, "loss": 0.3331, "step": 11408 }, { "epoch": 0.6845263094738105, "grad_norm": 1.3193031549453735, "learning_rate": 1.6736043361925602e-06, "loss": 0.4016, "step": 11409 }, { "epoch": 0.6845863082738345, "grad_norm": 1.3013300895690918, "learning_rate": 1.6730241537405096e-06, "loss": 0.331, "step": 11410 }, { "epoch": 0.6846463070738585, "grad_norm": 1.222724437713623, "learning_rate": 1.6724440402850115e-06, "loss": 0.3366, "step": 11411 }, { "epoch": 0.6847063058738825, "grad_norm": 1.3348371982574463, "learning_rate": 1.6718639958479772e-06, "loss": 0.3841, "step": 11412 }, { "epoch": 0.6847663046739065, "grad_norm": 1.333331823348999, "learning_rate": 1.6712840204513106e-06, "loss": 0.3536, "step": 11413 }, { "epoch": 0.6848263034739305, "grad_norm": 1.2839664220809937, "learning_rate": 1.6707041141169137e-06, "loss": 0.4055, "step": 11414 }, { "epoch": 0.6848863022739545, "grad_norm": 1.2485545873641968, "learning_rate": 1.6701242768666893e-06, "loss": 0.3803, "step": 11415 }, { "epoch": 0.6849463010739785, "grad_norm": 1.3609071969985962, "learning_rate": 1.6695445087225343e-06, "loss": 0.3569, "step": 11416 }, { "epoch": 0.6850062998740025, "grad_norm": 1.345322608947754, "learning_rate": 1.6689648097063422e-06, "loss": 0.3591, "step": 11417 }, { "epoch": 0.6850662986740266, "grad_norm": 1.1924644708633423, "learning_rate": 1.6683851798400084e-06, "loss": 0.3603, "step": 11418 }, { "epoch": 0.6851262974740505, "grad_norm": 1.3594355583190918, "learning_rate": 1.6678056191454204e-06, "loss": 0.38, "step": 11419 }, { "epoch": 0.6851862962740746, "grad_norm": 1.3081333637237549, "learning_rate": 1.6672261276444694e-06, "loss": 0.3935, "step": 11420 }, { "epoch": 0.6852462950740985, "grad_norm": 1.283033847808838, "learning_rate": 1.6666467053590346e-06, "loss": 0.3714, "step": 11421 }, { "epoch": 0.6853062938741226, "grad_norm": 1.3259949684143066, "learning_rate": 1.666067352311002e-06, "loss": 0.3466, "step": 11422 }, { "epoch": 0.6853662926741465, "grad_norm": 1.327343463897705, "learning_rate": 1.6654880685222493e-06, "loss": 0.4196, "step": 11423 }, { "epoch": 0.6854262914741706, "grad_norm": 1.3193501234054565, "learning_rate": 1.6649088540146554e-06, "loss": 0.343, "step": 11424 }, { "epoch": 0.6854862902741945, "grad_norm": 1.4060765504837036, "learning_rate": 1.6643297088100937e-06, "loss": 0.4461, "step": 11425 }, { "epoch": 0.6855462890742186, "grad_norm": 1.4174838066101074, "learning_rate": 1.663750632930434e-06, "loss": 0.3902, "step": 11426 }, { "epoch": 0.6856062878742425, "grad_norm": 1.4146026372909546, "learning_rate": 1.6631716263975484e-06, "loss": 0.3864, "step": 11427 }, { "epoch": 0.6856662866742665, "grad_norm": 1.190218448638916, "learning_rate": 1.6625926892333017e-06, "loss": 0.3583, "step": 11428 }, { "epoch": 0.6857262854742905, "grad_norm": 1.3715276718139648, "learning_rate": 1.6620138214595569e-06, "loss": 0.3987, "step": 11429 }, { "epoch": 0.6857862842743145, "grad_norm": 1.141577124595642, "learning_rate": 1.6614350230981772e-06, "loss": 0.3382, "step": 11430 }, { "epoch": 0.6858462830743385, "grad_norm": 1.1356780529022217, "learning_rate": 1.660856294171019e-06, "loss": 0.292, "step": 11431 }, { "epoch": 0.6859062818743625, "grad_norm": 1.2791396379470825, "learning_rate": 1.6602776346999423e-06, "loss": 0.3445, "step": 11432 }, { "epoch": 0.6859662806743865, "grad_norm": 1.2849208116531372, "learning_rate": 1.6596990447067947e-06, "loss": 0.4315, "step": 11433 }, { "epoch": 0.6860262794744105, "grad_norm": 1.341090440750122, "learning_rate": 1.6591205242134314e-06, "loss": 0.3645, "step": 11434 }, { "epoch": 0.6860862782744345, "grad_norm": 1.296692132949829, "learning_rate": 1.6585420732416974e-06, "loss": 0.4137, "step": 11435 }, { "epoch": 0.6861462770744585, "grad_norm": 1.312384843826294, "learning_rate": 1.6579636918134408e-06, "loss": 0.3661, "step": 11436 }, { "epoch": 0.6862062758744825, "grad_norm": 1.3742530345916748, "learning_rate": 1.6573853799505033e-06, "loss": 0.3909, "step": 11437 }, { "epoch": 0.6862662746745065, "grad_norm": 1.2539331912994385, "learning_rate": 1.6568071376747237e-06, "loss": 0.3456, "step": 11438 }, { "epoch": 0.6863262734745305, "grad_norm": 1.247627854347229, "learning_rate": 1.6562289650079425e-06, "loss": 0.407, "step": 11439 }, { "epoch": 0.6863862722745545, "grad_norm": 1.2962162494659424, "learning_rate": 1.655650861971993e-06, "loss": 0.3874, "step": 11440 }, { "epoch": 0.6864462710745786, "grad_norm": 1.480943202972412, "learning_rate": 1.655072828588706e-06, "loss": 0.4109, "step": 11441 }, { "epoch": 0.6865062698746025, "grad_norm": 1.2798810005187988, "learning_rate": 1.6544948648799146e-06, "loss": 0.3891, "step": 11442 }, { "epoch": 0.6865662686746266, "grad_norm": 1.204486608505249, "learning_rate": 1.6539169708674438e-06, "loss": 0.3029, "step": 11443 }, { "epoch": 0.6866262674746505, "grad_norm": 1.2737590074539185, "learning_rate": 1.6533391465731184e-06, "loss": 0.4087, "step": 11444 }, { "epoch": 0.6866862662746746, "grad_norm": 1.2619457244873047, "learning_rate": 1.6527613920187584e-06, "loss": 0.3876, "step": 11445 }, { "epoch": 0.6867462650746985, "grad_norm": 1.4054710865020752, "learning_rate": 1.6521837072261863e-06, "loss": 0.3576, "step": 11446 }, { "epoch": 0.6868062638747225, "grad_norm": 1.295760989189148, "learning_rate": 1.6516060922172168e-06, "loss": 0.3681, "step": 11447 }, { "epoch": 0.6868662626747465, "grad_norm": 1.371016502380371, "learning_rate": 1.6510285470136625e-06, "loss": 0.3583, "step": 11448 }, { "epoch": 0.6869262614747705, "grad_norm": 1.3202763795852661, "learning_rate": 1.6504510716373376e-06, "loss": 0.3948, "step": 11449 }, { "epoch": 0.6869862602747945, "grad_norm": 1.2491151094436646, "learning_rate": 1.649873666110048e-06, "loss": 0.3796, "step": 11450 }, { "epoch": 0.6870462590748185, "grad_norm": 1.391696810722351, "learning_rate": 1.6492963304536031e-06, "loss": 0.4035, "step": 11451 }, { "epoch": 0.6871062578748425, "grad_norm": 1.2622382640838623, "learning_rate": 1.6487190646898017e-06, "loss": 0.3651, "step": 11452 }, { "epoch": 0.6871662566748665, "grad_norm": 1.2028746604919434, "learning_rate": 1.6481418688404468e-06, "loss": 0.3836, "step": 11453 }, { "epoch": 0.6872262554748905, "grad_norm": 1.3443288803100586, "learning_rate": 1.647564742927338e-06, "loss": 0.351, "step": 11454 }, { "epoch": 0.6872862542749145, "grad_norm": 1.368546724319458, "learning_rate": 1.6469876869722685e-06, "loss": 0.4025, "step": 11455 }, { "epoch": 0.6873462530749385, "grad_norm": 1.3397161960601807, "learning_rate": 1.6464107009970326e-06, "loss": 0.4247, "step": 11456 }, { "epoch": 0.6874062518749625, "grad_norm": 1.3208616971969604, "learning_rate": 1.6458337850234175e-06, "loss": 0.3661, "step": 11457 }, { "epoch": 0.6874662506749865, "grad_norm": 1.5998777151107788, "learning_rate": 1.6452569390732145e-06, "loss": 0.387, "step": 11458 }, { "epoch": 0.6875262494750105, "grad_norm": 1.4395411014556885, "learning_rate": 1.6446801631682066e-06, "loss": 0.4159, "step": 11459 }, { "epoch": 0.6875862482750345, "grad_norm": 1.3232464790344238, "learning_rate": 1.644103457330175e-06, "loss": 0.4346, "step": 11460 }, { "epoch": 0.6876462470750585, "grad_norm": 1.3017247915267944, "learning_rate": 1.6435268215809016e-06, "loss": 0.3548, "step": 11461 }, { "epoch": 0.6877062458750824, "grad_norm": 1.3119823932647705, "learning_rate": 1.6429502559421618e-06, "loss": 0.3556, "step": 11462 }, { "epoch": 0.6877662446751065, "grad_norm": 1.4410291910171509, "learning_rate": 1.6423737604357305e-06, "loss": 0.4105, "step": 11463 }, { "epoch": 0.6878262434751305, "grad_norm": 1.3515324592590332, "learning_rate": 1.6417973350833778e-06, "loss": 0.3591, "step": 11464 }, { "epoch": 0.6878862422751545, "grad_norm": 1.3580313920974731, "learning_rate": 1.6412209799068733e-06, "loss": 0.3456, "step": 11465 }, { "epoch": 0.6879462410751785, "grad_norm": 1.5365321636199951, "learning_rate": 1.640644694927985e-06, "loss": 0.4189, "step": 11466 }, { "epoch": 0.6880062398752025, "grad_norm": 1.3167229890823364, "learning_rate": 1.6400684801684761e-06, "loss": 0.3707, "step": 11467 }, { "epoch": 0.6880662386752265, "grad_norm": 1.2554048299789429, "learning_rate": 1.6394923356501064e-06, "loss": 0.4133, "step": 11468 }, { "epoch": 0.6881262374752505, "grad_norm": 1.2825372219085693, "learning_rate": 1.6389162613946336e-06, "loss": 0.3725, "step": 11469 }, { "epoch": 0.6881862362752745, "grad_norm": 1.2474839687347412, "learning_rate": 1.6383402574238156e-06, "loss": 0.3874, "step": 11470 }, { "epoch": 0.6882462350752985, "grad_norm": 1.3727705478668213, "learning_rate": 1.6377643237594045e-06, "loss": 0.3623, "step": 11471 }, { "epoch": 0.6883062338753225, "grad_norm": 1.2616503238677979, "learning_rate": 1.6371884604231495e-06, "loss": 0.3991, "step": 11472 }, { "epoch": 0.6883662326753465, "grad_norm": 1.3720418214797974, "learning_rate": 1.6366126674368003e-06, "loss": 0.3805, "step": 11473 }, { "epoch": 0.6884262314753705, "grad_norm": 1.3539438247680664, "learning_rate": 1.6360369448221012e-06, "loss": 0.3944, "step": 11474 }, { "epoch": 0.6884862302753945, "grad_norm": 1.428816556930542, "learning_rate": 1.6354612926007948e-06, "loss": 0.3848, "step": 11475 }, { "epoch": 0.6885462290754185, "grad_norm": 1.2365142107009888, "learning_rate": 1.6348857107946188e-06, "loss": 0.4208, "step": 11476 }, { "epoch": 0.6886062278754425, "grad_norm": 1.3186471462249756, "learning_rate": 1.6343101994253124e-06, "loss": 0.3991, "step": 11477 }, { "epoch": 0.6886662266754665, "grad_norm": 1.270233154296875, "learning_rate": 1.6337347585146123e-06, "loss": 0.404, "step": 11478 }, { "epoch": 0.6887262254754905, "grad_norm": 1.3009358644485474, "learning_rate": 1.6331593880842448e-06, "loss": 0.3581, "step": 11479 }, { "epoch": 0.6887862242755145, "grad_norm": 1.3880760669708252, "learning_rate": 1.6325840881559433e-06, "loss": 0.427, "step": 11480 }, { "epoch": 0.6888462230755384, "grad_norm": 1.3355293273925781, "learning_rate": 1.6320088587514319e-06, "loss": 0.3863, "step": 11481 }, { "epoch": 0.6889062218755625, "grad_norm": 1.3463423252105713, "learning_rate": 1.6314336998924362e-06, "loss": 0.3692, "step": 11482 }, { "epoch": 0.6889662206755864, "grad_norm": 1.3387131690979004, "learning_rate": 1.6308586116006766e-06, "loss": 0.3661, "step": 11483 }, { "epoch": 0.6890262194756105, "grad_norm": 1.3327577114105225, "learning_rate": 1.6302835938978704e-06, "loss": 0.3776, "step": 11484 }, { "epoch": 0.6890862182756344, "grad_norm": 1.2429373264312744, "learning_rate": 1.6297086468057357e-06, "loss": 0.3481, "step": 11485 }, { "epoch": 0.6891462170756585, "grad_norm": 1.2918339967727661, "learning_rate": 1.6291337703459843e-06, "loss": 0.3949, "step": 11486 }, { "epoch": 0.6892062158756825, "grad_norm": 1.2873624563217163, "learning_rate": 1.6285589645403265e-06, "loss": 0.3412, "step": 11487 }, { "epoch": 0.6892662146757065, "grad_norm": 1.2317595481872559, "learning_rate": 1.6279842294104694e-06, "loss": 0.3653, "step": 11488 }, { "epoch": 0.6893262134757305, "grad_norm": 1.3371551036834717, "learning_rate": 1.6274095649781202e-06, "loss": 0.3714, "step": 11489 }, { "epoch": 0.6893862122757545, "grad_norm": 1.356935977935791, "learning_rate": 1.6268349712649802e-06, "loss": 0.389, "step": 11490 }, { "epoch": 0.6894462110757785, "grad_norm": 1.1862763166427612, "learning_rate": 1.626260448292748e-06, "loss": 0.4005, "step": 11491 }, { "epoch": 0.6895062098758025, "grad_norm": 1.281200885772705, "learning_rate": 1.6256859960831233e-06, "loss": 0.3666, "step": 11492 }, { "epoch": 0.6895662086758265, "grad_norm": 1.3910906314849854, "learning_rate": 1.6251116146577993e-06, "loss": 0.3954, "step": 11493 }, { "epoch": 0.6896262074758505, "grad_norm": 1.3215115070343018, "learning_rate": 1.624537304038466e-06, "loss": 0.3657, "step": 11494 }, { "epoch": 0.6896862062758745, "grad_norm": 1.323838472366333, "learning_rate": 1.6239630642468153e-06, "loss": 0.4046, "step": 11495 }, { "epoch": 0.6897462050758985, "grad_norm": 1.1927381753921509, "learning_rate": 1.6233888953045317e-06, "loss": 0.3818, "step": 11496 }, { "epoch": 0.6898062038759225, "grad_norm": 1.490867257118225, "learning_rate": 1.622814797233302e-06, "loss": 0.4406, "step": 11497 }, { "epoch": 0.6898662026759464, "grad_norm": 1.3878535032272339, "learning_rate": 1.6222407700548026e-06, "loss": 0.341, "step": 11498 }, { "epoch": 0.6899262014759705, "grad_norm": 1.3151757717132568, "learning_rate": 1.6216668137907138e-06, "loss": 0.369, "step": 11499 }, { "epoch": 0.6899862002759944, "grad_norm": 1.416524052619934, "learning_rate": 1.6210929284627136e-06, "loss": 0.3953, "step": 11500 }, { "epoch": 0.6900461990760185, "grad_norm": 1.3083652257919312, "learning_rate": 1.620519114092473e-06, "loss": 0.3717, "step": 11501 }, { "epoch": 0.6901061978760424, "grad_norm": 1.1155186891555786, "learning_rate": 1.6199453707016624e-06, "loss": 0.3373, "step": 11502 }, { "epoch": 0.6901661966760665, "grad_norm": 1.4349141120910645, "learning_rate": 1.6193716983119486e-06, "loss": 0.3891, "step": 11503 }, { "epoch": 0.6902261954760904, "grad_norm": 1.335561990737915, "learning_rate": 1.6187980969449988e-06, "loss": 0.3527, "step": 11504 }, { "epoch": 0.6902861942761145, "grad_norm": 1.4624284505844116, "learning_rate": 1.6182245666224741e-06, "loss": 0.3768, "step": 11505 }, { "epoch": 0.6903461930761384, "grad_norm": 1.482283115386963, "learning_rate": 1.6176511073660328e-06, "loss": 0.3995, "step": 11506 }, { "epoch": 0.6904061918761625, "grad_norm": 1.3861697912216187, "learning_rate": 1.6170777191973347e-06, "loss": 0.3565, "step": 11507 }, { "epoch": 0.6904661906761865, "grad_norm": 1.2347441911697388, "learning_rate": 1.6165044021380327e-06, "loss": 0.3667, "step": 11508 }, { "epoch": 0.6905261894762105, "grad_norm": 1.1715691089630127, "learning_rate": 1.6159311562097779e-06, "loss": 0.376, "step": 11509 }, { "epoch": 0.6905861882762345, "grad_norm": 1.2685184478759766, "learning_rate": 1.6153579814342187e-06, "loss": 0.379, "step": 11510 }, { "epoch": 0.6906461870762585, "grad_norm": 1.234850287437439, "learning_rate": 1.6147848778330017e-06, "loss": 0.4098, "step": 11511 }, { "epoch": 0.6907061858762825, "grad_norm": 1.2863036394119263, "learning_rate": 1.6142118454277724e-06, "loss": 0.37, "step": 11512 }, { "epoch": 0.6907661846763065, "grad_norm": 1.297249674797058, "learning_rate": 1.6136388842401698e-06, "loss": 0.3664, "step": 11513 }, { "epoch": 0.6908261834763305, "grad_norm": 1.2310811281204224, "learning_rate": 1.6130659942918324e-06, "loss": 0.38, "step": 11514 }, { "epoch": 0.6908861822763545, "grad_norm": 1.3411980867385864, "learning_rate": 1.6124931756043944e-06, "loss": 0.395, "step": 11515 }, { "epoch": 0.6909461810763785, "grad_norm": 1.2251574993133545, "learning_rate": 1.6119204281994906e-06, "loss": 0.3594, "step": 11516 }, { "epoch": 0.6910061798764024, "grad_norm": 1.449800968170166, "learning_rate": 1.6113477520987504e-06, "loss": 0.3732, "step": 11517 }, { "epoch": 0.6910661786764265, "grad_norm": 1.338474988937378, "learning_rate": 1.6107751473237998e-06, "loss": 0.4059, "step": 11518 }, { "epoch": 0.6911261774764504, "grad_norm": 1.2594630718231201, "learning_rate": 1.6102026138962656e-06, "loss": 0.324, "step": 11519 }, { "epoch": 0.6911861762764745, "grad_norm": 1.4275795221328735, "learning_rate": 1.6096301518377694e-06, "loss": 0.3728, "step": 11520 }, { "epoch": 0.6912461750764984, "grad_norm": 1.355067491531372, "learning_rate": 1.6090577611699292e-06, "loss": 0.422, "step": 11521 }, { "epoch": 0.6913061738765225, "grad_norm": 1.151483416557312, "learning_rate": 1.608485441914361e-06, "loss": 0.3564, "step": 11522 }, { "epoch": 0.6913661726765464, "grad_norm": 1.2046921253204346, "learning_rate": 1.6079131940926803e-06, "loss": 0.3318, "step": 11523 }, { "epoch": 0.6914261714765705, "grad_norm": 1.2638152837753296, "learning_rate": 1.6073410177265002e-06, "loss": 0.3266, "step": 11524 }, { "epoch": 0.6914861702765944, "grad_norm": 1.2220419645309448, "learning_rate": 1.6067689128374247e-06, "loss": 0.3493, "step": 11525 }, { "epoch": 0.6915461690766185, "grad_norm": 1.219735026359558, "learning_rate": 1.606196879447063e-06, "loss": 0.3586, "step": 11526 }, { "epoch": 0.6916061678766424, "grad_norm": 1.4069188833236694, "learning_rate": 1.6056249175770154e-06, "loss": 0.3722, "step": 11527 }, { "epoch": 0.6916661666766665, "grad_norm": 1.3443098068237305, "learning_rate": 1.6050530272488867e-06, "loss": 0.3852, "step": 11528 }, { "epoch": 0.6917261654766904, "grad_norm": 1.1203805208206177, "learning_rate": 1.604481208484269e-06, "loss": 0.3158, "step": 11529 }, { "epoch": 0.6917861642767145, "grad_norm": 1.2081693410873413, "learning_rate": 1.6039094613047603e-06, "loss": 0.3711, "step": 11530 }, { "epoch": 0.6918461630767385, "grad_norm": 1.3425571918487549, "learning_rate": 1.6033377857319541e-06, "loss": 0.4034, "step": 11531 }, { "epoch": 0.6919061618767625, "grad_norm": 1.3855477571487427, "learning_rate": 1.6027661817874386e-06, "loss": 0.4017, "step": 11532 }, { "epoch": 0.6919661606767865, "grad_norm": 1.3788751363754272, "learning_rate": 1.6021946494928004e-06, "loss": 0.3716, "step": 11533 }, { "epoch": 0.6920261594768105, "grad_norm": 1.3634690046310425, "learning_rate": 1.6016231888696228e-06, "loss": 0.4099, "step": 11534 }, { "epoch": 0.6920861582768345, "grad_norm": 1.2914679050445557, "learning_rate": 1.6010517999394893e-06, "loss": 0.3639, "step": 11535 }, { "epoch": 0.6921461570768584, "grad_norm": 1.3831379413604736, "learning_rate": 1.6004804827239782e-06, "loss": 0.3736, "step": 11536 }, { "epoch": 0.6922061558768825, "grad_norm": 1.2854291200637817, "learning_rate": 1.5999092372446634e-06, "loss": 0.4219, "step": 11537 }, { "epoch": 0.6922661546769064, "grad_norm": 1.3974815607070923, "learning_rate": 1.5993380635231214e-06, "loss": 0.3845, "step": 11538 }, { "epoch": 0.6923261534769305, "grad_norm": 1.4173662662506104, "learning_rate": 1.5987669615809214e-06, "loss": 0.3992, "step": 11539 }, { "epoch": 0.6923861522769544, "grad_norm": 1.196582317352295, "learning_rate": 1.59819593143963e-06, "loss": 0.3229, "step": 11540 }, { "epoch": 0.6924461510769785, "grad_norm": 1.406733751296997, "learning_rate": 1.5976249731208147e-06, "loss": 0.4013, "step": 11541 }, { "epoch": 0.6925061498770024, "grad_norm": 1.3940073251724243, "learning_rate": 1.5970540866460359e-06, "loss": 0.3718, "step": 11542 }, { "epoch": 0.6925661486770265, "grad_norm": 1.3369266986846924, "learning_rate": 1.596483272036857e-06, "loss": 0.3709, "step": 11543 }, { "epoch": 0.6926261474770504, "grad_norm": 1.4102548360824585, "learning_rate": 1.5959125293148301e-06, "loss": 0.4239, "step": 11544 }, { "epoch": 0.6926861462770745, "grad_norm": 1.3965343236923218, "learning_rate": 1.595341858501513e-06, "loss": 0.3959, "step": 11545 }, { "epoch": 0.6927461450770984, "grad_norm": 1.2128939628601074, "learning_rate": 1.5947712596184547e-06, "loss": 0.3874, "step": 11546 }, { "epoch": 0.6928061438771225, "grad_norm": 1.4474736452102661, "learning_rate": 1.5942007326872076e-06, "loss": 0.3775, "step": 11547 }, { "epoch": 0.6928661426771464, "grad_norm": 1.248874306678772, "learning_rate": 1.593630277729316e-06, "loss": 0.3306, "step": 11548 }, { "epoch": 0.6929261414771705, "grad_norm": 1.262467622756958, "learning_rate": 1.5930598947663218e-06, "loss": 0.3818, "step": 11549 }, { "epoch": 0.6929861402771944, "grad_norm": 1.3991339206695557, "learning_rate": 1.5924895838197689e-06, "loss": 0.4055, "step": 11550 }, { "epoch": 0.6930461390772185, "grad_norm": 1.316152811050415, "learning_rate": 1.5919193449111938e-06, "loss": 0.4052, "step": 11551 }, { "epoch": 0.6931061378772424, "grad_norm": 1.4270106554031372, "learning_rate": 1.5913491780621303e-06, "loss": 0.4163, "step": 11552 }, { "epoch": 0.6931661366772665, "grad_norm": 1.316135048866272, "learning_rate": 1.590779083294114e-06, "loss": 0.3267, "step": 11553 }, { "epoch": 0.6932261354772905, "grad_norm": 1.1857109069824219, "learning_rate": 1.5902090606286734e-06, "loss": 0.3702, "step": 11554 }, { "epoch": 0.6932861342773144, "grad_norm": 1.2164885997772217, "learning_rate": 1.5896391100873356e-06, "loss": 0.3549, "step": 11555 }, { "epoch": 0.6933461330773385, "grad_norm": 1.2747437953948975, "learning_rate": 1.5890692316916236e-06, "loss": 0.3611, "step": 11556 }, { "epoch": 0.6934061318773624, "grad_norm": 1.3097890615463257, "learning_rate": 1.588499425463062e-06, "loss": 0.3842, "step": 11557 }, { "epoch": 0.6934661306773865, "grad_norm": 1.28116774559021, "learning_rate": 1.587929691423167e-06, "loss": 0.3608, "step": 11558 }, { "epoch": 0.6935261294774104, "grad_norm": 1.2618690729141235, "learning_rate": 1.5873600295934575e-06, "loss": 0.3506, "step": 11559 }, { "epoch": 0.6935861282774345, "grad_norm": 1.3106898069381714, "learning_rate": 1.5867904399954458e-06, "loss": 0.3904, "step": 11560 }, { "epoch": 0.6936461270774584, "grad_norm": 1.3391633033752441, "learning_rate": 1.586220922650641e-06, "loss": 0.389, "step": 11561 }, { "epoch": 0.6937061258774825, "grad_norm": 1.2650381326675415, "learning_rate": 1.5856514775805542e-06, "loss": 0.3689, "step": 11562 }, { "epoch": 0.6937661246775064, "grad_norm": 1.364650845527649, "learning_rate": 1.5850821048066894e-06, "loss": 0.4059, "step": 11563 }, { "epoch": 0.6938261234775305, "grad_norm": 1.2119907140731812, "learning_rate": 1.5845128043505479e-06, "loss": 0.3705, "step": 11564 }, { "epoch": 0.6938861222775544, "grad_norm": 1.2968820333480835, "learning_rate": 1.583943576233632e-06, "loss": 0.4051, "step": 11565 }, { "epoch": 0.6939461210775785, "grad_norm": 1.2971450090408325, "learning_rate": 1.583374420477438e-06, "loss": 0.3675, "step": 11566 }, { "epoch": 0.6940061198776024, "grad_norm": 1.2086427211761475, "learning_rate": 1.5828053371034597e-06, "loss": 0.404, "step": 11567 }, { "epoch": 0.6940661186776265, "grad_norm": 1.1881999969482422, "learning_rate": 1.582236326133188e-06, "loss": 0.3374, "step": 11568 }, { "epoch": 0.6941261174776504, "grad_norm": 1.275755763053894, "learning_rate": 1.5816673875881142e-06, "loss": 0.4174, "step": 11569 }, { "epoch": 0.6941861162776745, "grad_norm": 1.2431408166885376, "learning_rate": 1.5810985214897231e-06, "loss": 0.3955, "step": 11570 }, { "epoch": 0.6942461150776984, "grad_norm": 1.2451567649841309, "learning_rate": 1.5805297278594971e-06, "loss": 0.428, "step": 11571 }, { "epoch": 0.6943061138777225, "grad_norm": 1.2949897050857544, "learning_rate": 1.5799610067189198e-06, "loss": 0.387, "step": 11572 }, { "epoch": 0.6943661126777464, "grad_norm": 1.3465003967285156, "learning_rate": 1.579392358089466e-06, "loss": 0.384, "step": 11573 }, { "epoch": 0.6944261114777704, "grad_norm": 1.237207293510437, "learning_rate": 1.5788237819926155e-06, "loss": 0.3278, "step": 11574 }, { "epoch": 0.6944861102777945, "grad_norm": 1.2367417812347412, "learning_rate": 1.578255278449835e-06, "loss": 0.3602, "step": 11575 }, { "epoch": 0.6945461090778184, "grad_norm": 1.1702214479446411, "learning_rate": 1.577686847482597e-06, "loss": 0.3286, "step": 11576 }, { "epoch": 0.6946061078778425, "grad_norm": 1.2722381353378296, "learning_rate": 1.5771184891123703e-06, "loss": 0.3833, "step": 11577 }, { "epoch": 0.6946661066778664, "grad_norm": 1.317482590675354, "learning_rate": 1.5765502033606177e-06, "loss": 0.3627, "step": 11578 }, { "epoch": 0.6947261054778905, "grad_norm": 1.3251302242279053, "learning_rate": 1.5759819902488004e-06, "loss": 0.3707, "step": 11579 }, { "epoch": 0.6947861042779144, "grad_norm": 1.214937686920166, "learning_rate": 1.5754138497983766e-06, "loss": 0.3778, "step": 11580 }, { "epoch": 0.6948461030779385, "grad_norm": 1.1326249837875366, "learning_rate": 1.5748457820308043e-06, "loss": 0.3666, "step": 11581 }, { "epoch": 0.6949061018779624, "grad_norm": 1.415071964263916, "learning_rate": 1.5742777869675359e-06, "loss": 0.4046, "step": 11582 }, { "epoch": 0.6949661006779865, "grad_norm": 1.154233455657959, "learning_rate": 1.573709864630021e-06, "loss": 0.3899, "step": 11583 }, { "epoch": 0.6950260994780104, "grad_norm": 1.2904815673828125, "learning_rate": 1.5731420150397092e-06, "loss": 0.351, "step": 11584 }, { "epoch": 0.6950860982780345, "grad_norm": 1.2405658960342407, "learning_rate": 1.5725742382180448e-06, "loss": 0.3703, "step": 11585 }, { "epoch": 0.6951460970780584, "grad_norm": 1.4840378761291504, "learning_rate": 1.5720065341864702e-06, "loss": 0.4258, "step": 11586 }, { "epoch": 0.6952060958780825, "grad_norm": 1.2477164268493652, "learning_rate": 1.5714389029664237e-06, "loss": 0.3985, "step": 11587 }, { "epoch": 0.6952660946781064, "grad_norm": 1.2801414728164673, "learning_rate": 1.570871344579343e-06, "loss": 0.3706, "step": 11588 }, { "epoch": 0.6953260934781305, "grad_norm": 1.2105522155761719, "learning_rate": 1.5703038590466655e-06, "loss": 0.3798, "step": 11589 }, { "epoch": 0.6953860922781544, "grad_norm": 1.264459252357483, "learning_rate": 1.569736446389817e-06, "loss": 0.3391, "step": 11590 }, { "epoch": 0.6954460910781785, "grad_norm": 1.3344322443008423, "learning_rate": 1.5691691066302297e-06, "loss": 0.3748, "step": 11591 }, { "epoch": 0.6955060898782024, "grad_norm": 1.323386788368225, "learning_rate": 1.568601839789327e-06, "loss": 0.3669, "step": 11592 }, { "epoch": 0.6955660886782264, "grad_norm": 1.3477121591567993, "learning_rate": 1.568034645888535e-06, "loss": 0.3519, "step": 11593 }, { "epoch": 0.6956260874782504, "grad_norm": 1.553995966911316, "learning_rate": 1.5674675249492721e-06, "loss": 0.4189, "step": 11594 }, { "epoch": 0.6956860862782744, "grad_norm": 1.3546406030654907, "learning_rate": 1.5669004769929551e-06, "loss": 0.3455, "step": 11595 }, { "epoch": 0.6957460850782984, "grad_norm": 1.2539857625961304, "learning_rate": 1.5663335020410012e-06, "loss": 0.4025, "step": 11596 }, { "epoch": 0.6958060838783224, "grad_norm": 1.241248607635498, "learning_rate": 1.565766600114821e-06, "loss": 0.3606, "step": 11597 }, { "epoch": 0.6958660826783465, "grad_norm": 1.3864011764526367, "learning_rate": 1.5651997712358241e-06, "loss": 0.3776, "step": 11598 }, { "epoch": 0.6959260814783704, "grad_norm": 1.1838973760604858, "learning_rate": 1.5646330154254156e-06, "loss": 0.2954, "step": 11599 }, { "epoch": 0.6959860802783945, "grad_norm": 1.1619328260421753, "learning_rate": 1.5640663327050019e-06, "loss": 0.3739, "step": 11600 }, { "epoch": 0.6960460790784184, "grad_norm": 1.3034417629241943, "learning_rate": 1.5634997230959827e-06, "loss": 0.4333, "step": 11601 }, { "epoch": 0.6961060778784425, "grad_norm": 1.2815219163894653, "learning_rate": 1.5629331866197552e-06, "loss": 0.3543, "step": 11602 }, { "epoch": 0.6961660766784664, "grad_norm": 1.3266170024871826, "learning_rate": 1.5623667232977168e-06, "loss": 0.3731, "step": 11603 }, { "epoch": 0.6962260754784905, "grad_norm": 1.32321035861969, "learning_rate": 1.5618003331512586e-06, "loss": 0.4219, "step": 11604 }, { "epoch": 0.6962860742785144, "grad_norm": 1.418489694595337, "learning_rate": 1.5612340162017725e-06, "loss": 0.4663, "step": 11605 }, { "epoch": 0.6963460730785385, "grad_norm": 1.157657265663147, "learning_rate": 1.5606677724706445e-06, "loss": 0.317, "step": 11606 }, { "epoch": 0.6964060718785624, "grad_norm": 1.4223181009292603, "learning_rate": 1.5601016019792578e-06, "loss": 0.3679, "step": 11607 }, { "epoch": 0.6964660706785865, "grad_norm": 1.3076438903808594, "learning_rate": 1.559535504748997e-06, "loss": 0.3795, "step": 11608 }, { "epoch": 0.6965260694786104, "grad_norm": 1.2751930952072144, "learning_rate": 1.5589694808012394e-06, "loss": 0.3526, "step": 11609 }, { "epoch": 0.6965860682786345, "grad_norm": 1.216184377670288, "learning_rate": 1.5584035301573612e-06, "loss": 0.3702, "step": 11610 }, { "epoch": 0.6966460670786584, "grad_norm": 1.4035392999649048, "learning_rate": 1.5578376528387345e-06, "loss": 0.3906, "step": 11611 }, { "epoch": 0.6967060658786824, "grad_norm": 1.349595546722412, "learning_rate": 1.5572718488667324e-06, "loss": 0.3809, "step": 11612 }, { "epoch": 0.6967660646787064, "grad_norm": 1.4768651723861694, "learning_rate": 1.5567061182627215e-06, "loss": 0.3935, "step": 11613 }, { "epoch": 0.6968260634787304, "grad_norm": 1.2849141359329224, "learning_rate": 1.5561404610480659e-06, "loss": 0.3723, "step": 11614 }, { "epoch": 0.6968860622787544, "grad_norm": 1.2900487184524536, "learning_rate": 1.5555748772441295e-06, "loss": 0.3741, "step": 11615 }, { "epoch": 0.6969460610787784, "grad_norm": 1.3305294513702393, "learning_rate": 1.5550093668722716e-06, "loss": 0.3688, "step": 11616 }, { "epoch": 0.6970060598788024, "grad_norm": 1.2160158157348633, "learning_rate": 1.5544439299538475e-06, "loss": 0.3698, "step": 11617 }, { "epoch": 0.6970660586788264, "grad_norm": 1.2970855236053467, "learning_rate": 1.553878566510213e-06, "loss": 0.387, "step": 11618 }, { "epoch": 0.6971260574788504, "grad_norm": 1.3293074369430542, "learning_rate": 1.5533132765627176e-06, "loss": 0.3859, "step": 11619 }, { "epoch": 0.6971860562788744, "grad_norm": 1.2623826265335083, "learning_rate": 1.5527480601327136e-06, "loss": 0.3767, "step": 11620 }, { "epoch": 0.6972460550788985, "grad_norm": 1.3619099855422974, "learning_rate": 1.5521829172415407e-06, "loss": 0.431, "step": 11621 }, { "epoch": 0.6973060538789224, "grad_norm": 1.3300985097885132, "learning_rate": 1.5516178479105445e-06, "loss": 0.3829, "step": 11622 }, { "epoch": 0.6973660526789465, "grad_norm": 1.421128749847412, "learning_rate": 1.551052852161067e-06, "loss": 0.3562, "step": 11623 }, { "epoch": 0.6974260514789704, "grad_norm": 1.2866132259368896, "learning_rate": 1.5504879300144437e-06, "loss": 0.41, "step": 11624 }, { "epoch": 0.6974860502789945, "grad_norm": 1.315739631652832, "learning_rate": 1.5499230814920096e-06, "loss": 0.4136, "step": 11625 }, { "epoch": 0.6975460490790184, "grad_norm": 1.267048954963684, "learning_rate": 1.5493583066150949e-06, "loss": 0.356, "step": 11626 }, { "epoch": 0.6976060478790425, "grad_norm": 1.3459601402282715, "learning_rate": 1.548793605405031e-06, "loss": 0.3999, "step": 11627 }, { "epoch": 0.6976660466790664, "grad_norm": 1.395461082458496, "learning_rate": 1.5482289778831428e-06, "loss": 0.3936, "step": 11628 }, { "epoch": 0.6977260454790905, "grad_norm": 1.3429502248764038, "learning_rate": 1.5476644240707528e-06, "loss": 0.3814, "step": 11629 }, { "epoch": 0.6977860442791144, "grad_norm": 1.2677184343338013, "learning_rate": 1.547099943989184e-06, "loss": 0.3881, "step": 11630 }, { "epoch": 0.6978460430791384, "grad_norm": 1.3793201446533203, "learning_rate": 1.546535537659753e-06, "loss": 0.3839, "step": 11631 }, { "epoch": 0.6979060418791624, "grad_norm": 1.2696900367736816, "learning_rate": 1.5459712051037746e-06, "loss": 0.3415, "step": 11632 }, { "epoch": 0.6979660406791864, "grad_norm": 1.3752288818359375, "learning_rate": 1.54540694634256e-06, "loss": 0.4594, "step": 11633 }, { "epoch": 0.6980260394792104, "grad_norm": 1.3081011772155762, "learning_rate": 1.5448427613974198e-06, "loss": 0.3792, "step": 11634 }, { "epoch": 0.6980860382792344, "grad_norm": 1.3247300386428833, "learning_rate": 1.5442786502896635e-06, "loss": 0.366, "step": 11635 }, { "epoch": 0.6981460370792584, "grad_norm": 1.4762322902679443, "learning_rate": 1.5437146130405897e-06, "loss": 0.3742, "step": 11636 }, { "epoch": 0.6982060358792824, "grad_norm": 1.2483631372451782, "learning_rate": 1.5431506496715037e-06, "loss": 0.3991, "step": 11637 }, { "epoch": 0.6982660346793064, "grad_norm": 1.2212945222854614, "learning_rate": 1.542586760203701e-06, "loss": 0.3538, "step": 11638 }, { "epoch": 0.6983260334793304, "grad_norm": 1.3850830793380737, "learning_rate": 1.5420229446584794e-06, "loss": 0.4073, "step": 11639 }, { "epoch": 0.6983860322793544, "grad_norm": 1.3700768947601318, "learning_rate": 1.5414592030571308e-06, "loss": 0.3545, "step": 11640 }, { "epoch": 0.6984460310793784, "grad_norm": 1.3630800247192383, "learning_rate": 1.5408955354209435e-06, "loss": 0.4043, "step": 11641 }, { "epoch": 0.6985060298794024, "grad_norm": 1.3393278121948242, "learning_rate": 1.5403319417712079e-06, "loss": 0.403, "step": 11642 }, { "epoch": 0.6985660286794264, "grad_norm": 1.4688464403152466, "learning_rate": 1.539768422129206e-06, "loss": 0.3873, "step": 11643 }, { "epoch": 0.6986260274794505, "grad_norm": 1.2331115007400513, "learning_rate": 1.5392049765162207e-06, "loss": 0.3605, "step": 11644 }, { "epoch": 0.6986860262794744, "grad_norm": 1.3425180912017822, "learning_rate": 1.5386416049535284e-06, "loss": 0.3765, "step": 11645 }, { "epoch": 0.6987460250794985, "grad_norm": 1.2854535579681396, "learning_rate": 1.5380783074624083e-06, "loss": 0.345, "step": 11646 }, { "epoch": 0.6988060238795224, "grad_norm": 1.2109570503234863, "learning_rate": 1.5375150840641318e-06, "loss": 0.379, "step": 11647 }, { "epoch": 0.6988660226795465, "grad_norm": 1.2231903076171875, "learning_rate": 1.5369519347799685e-06, "loss": 0.3396, "step": 11648 }, { "epoch": 0.6989260214795704, "grad_norm": 1.2857005596160889, "learning_rate": 1.5363888596311877e-06, "loss": 0.3886, "step": 11649 }, { "epoch": 0.6989860202795944, "grad_norm": 1.3768055438995361, "learning_rate": 1.5358258586390527e-06, "loss": 0.3833, "step": 11650 }, { "epoch": 0.6990460190796184, "grad_norm": 1.304573893547058, "learning_rate": 1.5352629318248276e-06, "loss": 0.3854, "step": 11651 }, { "epoch": 0.6991060178796424, "grad_norm": 1.2950845956802368, "learning_rate": 1.5347000792097701e-06, "loss": 0.3897, "step": 11652 }, { "epoch": 0.6991660166796664, "grad_norm": 1.3734302520751953, "learning_rate": 1.5341373008151352e-06, "loss": 0.4307, "step": 11653 }, { "epoch": 0.6992260154796904, "grad_norm": 1.3534722328186035, "learning_rate": 1.5335745966621794e-06, "loss": 0.3763, "step": 11654 }, { "epoch": 0.6992860142797144, "grad_norm": 1.245377540588379, "learning_rate": 1.533011966772152e-06, "loss": 0.3637, "step": 11655 }, { "epoch": 0.6993460130797384, "grad_norm": 1.2875944375991821, "learning_rate": 1.5324494111663013e-06, "loss": 0.3464, "step": 11656 }, { "epoch": 0.6994060118797624, "grad_norm": 1.3059831857681274, "learning_rate": 1.5318869298658708e-06, "loss": 0.3842, "step": 11657 }, { "epoch": 0.6994660106797864, "grad_norm": 1.2834299802780151, "learning_rate": 1.5313245228921058e-06, "loss": 0.394, "step": 11658 }, { "epoch": 0.6995260094798104, "grad_norm": 1.3316556215286255, "learning_rate": 1.530762190266244e-06, "loss": 0.3728, "step": 11659 }, { "epoch": 0.6995860082798344, "grad_norm": 1.4089149236679077, "learning_rate": 1.530199932009521e-06, "loss": 0.4054, "step": 11660 }, { "epoch": 0.6996460070798584, "grad_norm": 1.2555292844772339, "learning_rate": 1.5296377481431738e-06, "loss": 0.3671, "step": 11661 }, { "epoch": 0.6997060058798824, "grad_norm": 1.2333581447601318, "learning_rate": 1.5290756386884318e-06, "loss": 0.4039, "step": 11662 }, { "epoch": 0.6997660046799064, "grad_norm": 1.2938117980957031, "learning_rate": 1.5285136036665221e-06, "loss": 0.3788, "step": 11663 }, { "epoch": 0.6998260034799304, "grad_norm": 1.3982717990875244, "learning_rate": 1.5279516430986728e-06, "loss": 0.3446, "step": 11664 }, { "epoch": 0.6998860022799545, "grad_norm": 1.3441581726074219, "learning_rate": 1.527389757006104e-06, "loss": 0.3852, "step": 11665 }, { "epoch": 0.6999460010799784, "grad_norm": 1.2122411727905273, "learning_rate": 1.5268279454100397e-06, "loss": 0.3384, "step": 11666 }, { "epoch": 0.7000059998800024, "grad_norm": 1.3174179792404175, "learning_rate": 1.5262662083316912e-06, "loss": 0.351, "step": 11667 }, { "epoch": 0.7000659986800264, "grad_norm": 1.2471206188201904, "learning_rate": 1.5257045457922773e-06, "loss": 0.3935, "step": 11668 }, { "epoch": 0.7001259974800504, "grad_norm": 1.2722011804580688, "learning_rate": 1.5251429578130067e-06, "loss": 0.378, "step": 11669 }, { "epoch": 0.7001859962800744, "grad_norm": 1.4173998832702637, "learning_rate": 1.52458144441509e-06, "loss": 0.3886, "step": 11670 }, { "epoch": 0.7002459950800984, "grad_norm": 1.3689682483673096, "learning_rate": 1.5240200056197323e-06, "loss": 0.3506, "step": 11671 }, { "epoch": 0.7003059938801224, "grad_norm": 1.2511470317840576, "learning_rate": 1.5234586414481352e-06, "loss": 0.4109, "step": 11672 }, { "epoch": 0.7003659926801464, "grad_norm": 1.3326232433319092, "learning_rate": 1.5228973519215014e-06, "loss": 0.3967, "step": 11673 }, { "epoch": 0.7004259914801704, "grad_norm": 1.2576185464859009, "learning_rate": 1.522336137061027e-06, "loss": 0.3568, "step": 11674 }, { "epoch": 0.7004859902801944, "grad_norm": 1.3830574750900269, "learning_rate": 1.521774996887905e-06, "loss": 0.3526, "step": 11675 }, { "epoch": 0.7005459890802184, "grad_norm": 1.2241389751434326, "learning_rate": 1.52121393142333e-06, "loss": 0.4037, "step": 11676 }, { "epoch": 0.7006059878802424, "grad_norm": 1.223172664642334, "learning_rate": 1.5206529406884898e-06, "loss": 0.3545, "step": 11677 }, { "epoch": 0.7006659866802664, "grad_norm": 1.3693238496780396, "learning_rate": 1.5200920247045698e-06, "loss": 0.3672, "step": 11678 }, { "epoch": 0.7007259854802904, "grad_norm": 1.2599327564239502, "learning_rate": 1.5195311834927525e-06, "loss": 0.3883, "step": 11679 }, { "epoch": 0.7007859842803144, "grad_norm": 1.2627267837524414, "learning_rate": 1.5189704170742203e-06, "loss": 0.3966, "step": 11680 }, { "epoch": 0.7008459830803384, "grad_norm": 1.386107325553894, "learning_rate": 1.51840972547015e-06, "loss": 0.4002, "step": 11681 }, { "epoch": 0.7009059818803623, "grad_norm": 1.3916218280792236, "learning_rate": 1.5178491087017153e-06, "loss": 0.3727, "step": 11682 }, { "epoch": 0.7009659806803864, "grad_norm": 1.1879581212997437, "learning_rate": 1.5172885667900902e-06, "loss": 0.3495, "step": 11683 }, { "epoch": 0.7010259794804103, "grad_norm": 1.444068193435669, "learning_rate": 1.5167280997564415e-06, "loss": 0.3756, "step": 11684 }, { "epoch": 0.7010859782804344, "grad_norm": 1.412614107131958, "learning_rate": 1.516167707621938e-06, "loss": 0.3865, "step": 11685 }, { "epoch": 0.7011459770804583, "grad_norm": 1.3279575109481812, "learning_rate": 1.515607390407742e-06, "loss": 0.3583, "step": 11686 }, { "epoch": 0.7012059758804824, "grad_norm": 1.4542489051818848, "learning_rate": 1.5150471481350126e-06, "loss": 0.3808, "step": 11687 }, { "epoch": 0.7012659746805064, "grad_norm": 1.273686170578003, "learning_rate": 1.5144869808249098e-06, "loss": 0.4016, "step": 11688 }, { "epoch": 0.7013259734805304, "grad_norm": 1.275321125984192, "learning_rate": 1.5139268884985885e-06, "loss": 0.3864, "step": 11689 }, { "epoch": 0.7013859722805544, "grad_norm": 1.2057663202285767, "learning_rate": 1.5133668711771995e-06, "loss": 0.3546, "step": 11690 }, { "epoch": 0.7014459710805784, "grad_norm": 1.2137564420700073, "learning_rate": 1.512806928881892e-06, "loss": 0.3327, "step": 11691 }, { "epoch": 0.7015059698806024, "grad_norm": 1.3606600761413574, "learning_rate": 1.512247061633814e-06, "loss": 0.4081, "step": 11692 }, { "epoch": 0.7015659686806264, "grad_norm": 1.223481297492981, "learning_rate": 1.5116872694541085e-06, "loss": 0.3542, "step": 11693 }, { "epoch": 0.7016259674806504, "grad_norm": 1.3304688930511475, "learning_rate": 1.5111275523639146e-06, "loss": 0.368, "step": 11694 }, { "epoch": 0.7016859662806744, "grad_norm": 1.1477302312850952, "learning_rate": 1.5105679103843734e-06, "loss": 0.3551, "step": 11695 }, { "epoch": 0.7017459650806984, "grad_norm": 1.2484734058380127, "learning_rate": 1.5100083435366168e-06, "loss": 0.3635, "step": 11696 }, { "epoch": 0.7018059638807224, "grad_norm": 1.4706895351409912, "learning_rate": 1.5094488518417815e-06, "loss": 0.4511, "step": 11697 }, { "epoch": 0.7018659626807464, "grad_norm": 1.2828621864318848, "learning_rate": 1.5088894353209914e-06, "loss": 0.3599, "step": 11698 }, { "epoch": 0.7019259614807704, "grad_norm": 1.3550776243209839, "learning_rate": 1.5083300939953758e-06, "loss": 0.4001, "step": 11699 }, { "epoch": 0.7019859602807944, "grad_norm": 1.3397444486618042, "learning_rate": 1.50777082788606e-06, "loss": 0.3865, "step": 11700 }, { "epoch": 0.7020459590808183, "grad_norm": 1.408616542816162, "learning_rate": 1.5072116370141634e-06, "loss": 0.4007, "step": 11701 }, { "epoch": 0.7021059578808424, "grad_norm": 1.2645411491394043, "learning_rate": 1.506652521400804e-06, "loss": 0.3757, "step": 11702 }, { "epoch": 0.7021659566808663, "grad_norm": 1.2829480171203613, "learning_rate": 1.506093481067096e-06, "loss": 0.3608, "step": 11703 }, { "epoch": 0.7022259554808904, "grad_norm": 1.2643009424209595, "learning_rate": 1.5055345160341541e-06, "loss": 0.3894, "step": 11704 }, { "epoch": 0.7022859542809143, "grad_norm": 1.3114925622940063, "learning_rate": 1.5049756263230873e-06, "loss": 0.388, "step": 11705 }, { "epoch": 0.7023459530809384, "grad_norm": 1.3882304430007935, "learning_rate": 1.504416811955e-06, "loss": 0.3984, "step": 11706 }, { "epoch": 0.7024059518809623, "grad_norm": 1.2852277755737305, "learning_rate": 1.5038580729509993e-06, "loss": 0.364, "step": 11707 }, { "epoch": 0.7024659506809864, "grad_norm": 1.3204017877578735, "learning_rate": 1.5032994093321847e-06, "loss": 0.4078, "step": 11708 }, { "epoch": 0.7025259494810103, "grad_norm": 1.3997390270233154, "learning_rate": 1.5027408211196548e-06, "loss": 0.3778, "step": 11709 }, { "epoch": 0.7025859482810344, "grad_norm": 1.3630849123001099, "learning_rate": 1.5021823083345028e-06, "loss": 0.3745, "step": 11710 }, { "epoch": 0.7026459470810584, "grad_norm": 1.3526774644851685, "learning_rate": 1.5016238709978236e-06, "loss": 0.3172, "step": 11711 }, { "epoch": 0.7027059458810824, "grad_norm": 1.2420839071273804, "learning_rate": 1.501065509130708e-06, "loss": 0.346, "step": 11712 }, { "epoch": 0.7027659446811064, "grad_norm": 1.3638709783554077, "learning_rate": 1.500507222754239e-06, "loss": 0.4047, "step": 11713 }, { "epoch": 0.7028259434811304, "grad_norm": 1.418882131576538, "learning_rate": 1.4999490118895036e-06, "loss": 0.4137, "step": 11714 }, { "epoch": 0.7028859422811544, "grad_norm": 1.1779398918151855, "learning_rate": 1.499390876557581e-06, "loss": 0.3655, "step": 11715 }, { "epoch": 0.7029459410811784, "grad_norm": 1.3009405136108398, "learning_rate": 1.498832816779551e-06, "loss": 0.3995, "step": 11716 }, { "epoch": 0.7030059398812024, "grad_norm": 1.2975894212722778, "learning_rate": 1.4982748325764882e-06, "loss": 0.3538, "step": 11717 }, { "epoch": 0.7030659386812264, "grad_norm": 1.5756545066833496, "learning_rate": 1.4977169239694639e-06, "loss": 0.4021, "step": 11718 }, { "epoch": 0.7031259374812504, "grad_norm": 1.291154146194458, "learning_rate": 1.4971590909795507e-06, "loss": 0.3702, "step": 11719 }, { "epoch": 0.7031859362812743, "grad_norm": 1.3378585577011108, "learning_rate": 1.4966013336278133e-06, "loss": 0.378, "step": 11720 }, { "epoch": 0.7032459350812984, "grad_norm": 1.3501214981079102, "learning_rate": 1.4960436519353162e-06, "loss": 0.3789, "step": 11721 }, { "epoch": 0.7033059338813223, "grad_norm": 1.1630713939666748, "learning_rate": 1.495486045923119e-06, "loss": 0.3624, "step": 11722 }, { "epoch": 0.7033659326813464, "grad_norm": 1.31754469871521, "learning_rate": 1.4949285156122827e-06, "loss": 0.3768, "step": 11723 }, { "epoch": 0.7034259314813703, "grad_norm": 1.395819902420044, "learning_rate": 1.4943710610238611e-06, "loss": 0.385, "step": 11724 }, { "epoch": 0.7034859302813944, "grad_norm": 1.211292028427124, "learning_rate": 1.4938136821789057e-06, "loss": 0.3576, "step": 11725 }, { "epoch": 0.7035459290814183, "grad_norm": 1.3356785774230957, "learning_rate": 1.4932563790984687e-06, "loss": 0.3716, "step": 11726 }, { "epoch": 0.7036059278814424, "grad_norm": 1.2815102338790894, "learning_rate": 1.4926991518035955e-06, "loss": 0.4012, "step": 11727 }, { "epoch": 0.7036659266814663, "grad_norm": 1.3242700099945068, "learning_rate": 1.492142000315329e-06, "loss": 0.399, "step": 11728 }, { "epoch": 0.7037259254814904, "grad_norm": 1.3529064655303955, "learning_rate": 1.4915849246547124e-06, "loss": 0.3783, "step": 11729 }, { "epoch": 0.7037859242815143, "grad_norm": 1.3452497720718384, "learning_rate": 1.4910279248427818e-06, "loss": 0.3627, "step": 11730 }, { "epoch": 0.7038459230815384, "grad_norm": 1.4144258499145508, "learning_rate": 1.4904710009005748e-06, "loss": 0.3727, "step": 11731 }, { "epoch": 0.7039059218815624, "grad_norm": 1.1304577589035034, "learning_rate": 1.4899141528491224e-06, "loss": 0.3802, "step": 11732 }, { "epoch": 0.7039659206815864, "grad_norm": 1.3516603708267212, "learning_rate": 1.4893573807094535e-06, "loss": 0.3707, "step": 11733 }, { "epoch": 0.7040259194816104, "grad_norm": 1.5588443279266357, "learning_rate": 1.4888006845025976e-06, "loss": 0.4559, "step": 11734 }, { "epoch": 0.7040859182816344, "grad_norm": 1.2660011053085327, "learning_rate": 1.488244064249576e-06, "loss": 0.4269, "step": 11735 }, { "epoch": 0.7041459170816584, "grad_norm": 1.2750048637390137, "learning_rate": 1.4876875199714114e-06, "loss": 0.341, "step": 11736 }, { "epoch": 0.7042059158816824, "grad_norm": 1.2562291622161865, "learning_rate": 1.4871310516891193e-06, "loss": 0.3669, "step": 11737 }, { "epoch": 0.7042659146817064, "grad_norm": 1.2552586793899536, "learning_rate": 1.486574659423718e-06, "loss": 0.3814, "step": 11738 }, { "epoch": 0.7043259134817303, "grad_norm": 1.2605963945388794, "learning_rate": 1.486018343196219e-06, "loss": 0.4119, "step": 11739 }, { "epoch": 0.7043859122817544, "grad_norm": 1.2251434326171875, "learning_rate": 1.4854621030276299e-06, "loss": 0.3862, "step": 11740 }, { "epoch": 0.7044459110817783, "grad_norm": 1.3551771640777588, "learning_rate": 1.4849059389389605e-06, "loss": 0.3879, "step": 11741 }, { "epoch": 0.7045059098818024, "grad_norm": 1.3344184160232544, "learning_rate": 1.4843498509512118e-06, "loss": 0.3987, "step": 11742 }, { "epoch": 0.7045659086818263, "grad_norm": 1.260534405708313, "learning_rate": 1.4837938390853882e-06, "loss": 0.3594, "step": 11743 }, { "epoch": 0.7046259074818504, "grad_norm": 1.3433400392532349, "learning_rate": 1.483237903362483e-06, "loss": 0.4303, "step": 11744 }, { "epoch": 0.7046859062818743, "grad_norm": 1.3393785953521729, "learning_rate": 1.482682043803494e-06, "loss": 0.3606, "step": 11745 }, { "epoch": 0.7047459050818984, "grad_norm": 1.200299859046936, "learning_rate": 1.4821262604294143e-06, "loss": 0.325, "step": 11746 }, { "epoch": 0.7048059038819223, "grad_norm": 1.432547926902771, "learning_rate": 1.4815705532612327e-06, "loss": 0.3867, "step": 11747 }, { "epoch": 0.7048659026819464, "grad_norm": 1.5717359781265259, "learning_rate": 1.4810149223199354e-06, "loss": 0.4597, "step": 11748 }, { "epoch": 0.7049259014819703, "grad_norm": 1.479509949684143, "learning_rate": 1.4804593676265043e-06, "loss": 0.4735, "step": 11749 }, { "epoch": 0.7049859002819944, "grad_norm": 1.264967441558838, "learning_rate": 1.4799038892019236e-06, "loss": 0.4094, "step": 11750 }, { "epoch": 0.7050458990820183, "grad_norm": 1.1798617839813232, "learning_rate": 1.479348487067169e-06, "loss": 0.4074, "step": 11751 }, { "epoch": 0.7051058978820424, "grad_norm": 1.3938933610916138, "learning_rate": 1.4787931612432151e-06, "loss": 0.3801, "step": 11752 }, { "epoch": 0.7051658966820663, "grad_norm": 1.5394116640090942, "learning_rate": 1.4782379117510362e-06, "loss": 0.3717, "step": 11753 }, { "epoch": 0.7052258954820904, "grad_norm": 1.230034589767456, "learning_rate": 1.4776827386115998e-06, "loss": 0.3592, "step": 11754 }, { "epoch": 0.7052858942821144, "grad_norm": 1.275913119316101, "learning_rate": 1.4771276418458734e-06, "loss": 0.3516, "step": 11755 }, { "epoch": 0.7053458930821384, "grad_norm": 1.409162998199463, "learning_rate": 1.4765726214748182e-06, "loss": 0.4007, "step": 11756 }, { "epoch": 0.7054058918821624, "grad_norm": 1.3057830333709717, "learning_rate": 1.4760176775193962e-06, "loss": 0.35, "step": 11757 }, { "epoch": 0.7054658906821863, "grad_norm": 1.3448361158370972, "learning_rate": 1.475462810000568e-06, "loss": 0.4268, "step": 11758 }, { "epoch": 0.7055258894822104, "grad_norm": 1.283730387687683, "learning_rate": 1.474908018939283e-06, "loss": 0.3585, "step": 11759 }, { "epoch": 0.7055858882822343, "grad_norm": 1.3025778532028198, "learning_rate": 1.4743533043564971e-06, "loss": 0.3665, "step": 11760 }, { "epoch": 0.7056458870822584, "grad_norm": 1.2740260362625122, "learning_rate": 1.4737986662731571e-06, "loss": 0.4011, "step": 11761 }, { "epoch": 0.7057058858822823, "grad_norm": 1.3689481019973755, "learning_rate": 1.4732441047102122e-06, "loss": 0.4232, "step": 11762 }, { "epoch": 0.7057658846823064, "grad_norm": 1.2308624982833862, "learning_rate": 1.4726896196886014e-06, "loss": 0.3871, "step": 11763 }, { "epoch": 0.7058258834823303, "grad_norm": 1.2694194316864014, "learning_rate": 1.472135211229267e-06, "loss": 0.3612, "step": 11764 }, { "epoch": 0.7058858822823544, "grad_norm": 1.2946752309799194, "learning_rate": 1.4715808793531484e-06, "loss": 0.3695, "step": 11765 }, { "epoch": 0.7059458810823783, "grad_norm": 1.437221884727478, "learning_rate": 1.4710266240811781e-06, "loss": 0.4156, "step": 11766 }, { "epoch": 0.7060058798824024, "grad_norm": 1.2422105073928833, "learning_rate": 1.4704724454342884e-06, "loss": 0.3861, "step": 11767 }, { "epoch": 0.7060658786824263, "grad_norm": 1.4360109567642212, "learning_rate": 1.469918343433407e-06, "loss": 0.4574, "step": 11768 }, { "epoch": 0.7061258774824504, "grad_norm": 1.4426683187484741, "learning_rate": 1.4693643180994614e-06, "loss": 0.3896, "step": 11769 }, { "epoch": 0.7061858762824743, "grad_norm": 1.323664665222168, "learning_rate": 1.4688103694533742e-06, "loss": 0.3955, "step": 11770 }, { "epoch": 0.7062458750824984, "grad_norm": 1.381728172302246, "learning_rate": 1.4682564975160642e-06, "loss": 0.37, "step": 11771 }, { "epoch": 0.7063058738825223, "grad_norm": 1.2565581798553467, "learning_rate": 1.467702702308451e-06, "loss": 0.4097, "step": 11772 }, { "epoch": 0.7063658726825464, "grad_norm": 1.1998183727264404, "learning_rate": 1.467148983851447e-06, "loss": 0.359, "step": 11773 }, { "epoch": 0.7064258714825703, "grad_norm": 1.4572488069534302, "learning_rate": 1.4665953421659637e-06, "loss": 0.3502, "step": 11774 }, { "epoch": 0.7064858702825944, "grad_norm": 1.520837426185608, "learning_rate": 1.466041777272911e-06, "loss": 0.4037, "step": 11775 }, { "epoch": 0.7065458690826183, "grad_norm": 1.208868145942688, "learning_rate": 1.4654882891931925e-06, "loss": 0.3679, "step": 11776 }, { "epoch": 0.7066058678826423, "grad_norm": 1.2811418771743774, "learning_rate": 1.4649348779477135e-06, "loss": 0.3542, "step": 11777 }, { "epoch": 0.7066658666826664, "grad_norm": 1.2320879697799683, "learning_rate": 1.4643815435573722e-06, "loss": 0.427, "step": 11778 }, { "epoch": 0.7067258654826903, "grad_norm": 1.3362431526184082, "learning_rate": 1.4638282860430659e-06, "loss": 0.3808, "step": 11779 }, { "epoch": 0.7067858642827144, "grad_norm": 1.3798211812973022, "learning_rate": 1.4632751054256872e-06, "loss": 0.3778, "step": 11780 }, { "epoch": 0.7068458630827383, "grad_norm": 1.3967621326446533, "learning_rate": 1.4627220017261297e-06, "loss": 0.4023, "step": 11781 }, { "epoch": 0.7069058618827624, "grad_norm": 1.2495636940002441, "learning_rate": 1.4621689749652803e-06, "loss": 0.3142, "step": 11782 }, { "epoch": 0.7069658606827863, "grad_norm": 1.267271637916565, "learning_rate": 1.4616160251640232e-06, "loss": 0.3704, "step": 11783 }, { "epoch": 0.7070258594828104, "grad_norm": 1.2868067026138306, "learning_rate": 1.4610631523432434e-06, "loss": 0.3354, "step": 11784 }, { "epoch": 0.7070858582828343, "grad_norm": 1.3793994188308716, "learning_rate": 1.4605103565238186e-06, "loss": 0.3845, "step": 11785 }, { "epoch": 0.7071458570828584, "grad_norm": 1.3573265075683594, "learning_rate": 1.4599576377266246e-06, "loss": 0.3708, "step": 11786 }, { "epoch": 0.7072058558828823, "grad_norm": 1.222542643547058, "learning_rate": 1.4594049959725376e-06, "loss": 0.4059, "step": 11787 }, { "epoch": 0.7072658546829064, "grad_norm": 1.3321086168289185, "learning_rate": 1.4588524312824258e-06, "loss": 0.3729, "step": 11788 }, { "epoch": 0.7073258534829303, "grad_norm": 1.2653203010559082, "learning_rate": 1.4582999436771607e-06, "loss": 0.4165, "step": 11789 }, { "epoch": 0.7073858522829544, "grad_norm": 1.3786664009094238, "learning_rate": 1.4577475331776023e-06, "loss": 0.4016, "step": 11790 }, { "epoch": 0.7074458510829783, "grad_norm": 1.56752610206604, "learning_rate": 1.4571951998046161e-06, "loss": 0.4046, "step": 11791 }, { "epoch": 0.7075058498830024, "grad_norm": 1.3440788984298706, "learning_rate": 1.456642943579059e-06, "loss": 0.371, "step": 11792 }, { "epoch": 0.7075658486830263, "grad_norm": 1.3021684885025024, "learning_rate": 1.4560907645217897e-06, "loss": 0.3295, "step": 11793 }, { "epoch": 0.7076258474830504, "grad_norm": 1.2578977346420288, "learning_rate": 1.4555386626536603e-06, "loss": 0.3828, "step": 11794 }, { "epoch": 0.7076858462830743, "grad_norm": 1.2084349393844604, "learning_rate": 1.4549866379955197e-06, "loss": 0.3805, "step": 11795 }, { "epoch": 0.7077458450830983, "grad_norm": 1.2006282806396484, "learning_rate": 1.454434690568218e-06, "loss": 0.3521, "step": 11796 }, { "epoch": 0.7078058438831223, "grad_norm": 1.3447277545928955, "learning_rate": 1.4538828203925984e-06, "loss": 0.3729, "step": 11797 }, { "epoch": 0.7078658426831463, "grad_norm": 1.2041208744049072, "learning_rate": 1.4533310274895015e-06, "loss": 0.3556, "step": 11798 }, { "epoch": 0.7079258414831703, "grad_norm": 1.4134138822555542, "learning_rate": 1.4527793118797681e-06, "loss": 0.4445, "step": 11799 }, { "epoch": 0.7079858402831943, "grad_norm": 1.297264575958252, "learning_rate": 1.452227673584233e-06, "loss": 0.3742, "step": 11800 }, { "epoch": 0.7080458390832184, "grad_norm": 1.3410043716430664, "learning_rate": 1.4516761126237292e-06, "loss": 0.3257, "step": 11801 }, { "epoch": 0.7081058378832423, "grad_norm": 1.2727093696594238, "learning_rate": 1.4511246290190854e-06, "loss": 0.3352, "step": 11802 }, { "epoch": 0.7081658366832664, "grad_norm": 1.361200213432312, "learning_rate": 1.4505732227911307e-06, "loss": 0.4014, "step": 11803 }, { "epoch": 0.7082258354832903, "grad_norm": 1.2515175342559814, "learning_rate": 1.4500218939606882e-06, "loss": 0.362, "step": 11804 }, { "epoch": 0.7082858342833144, "grad_norm": 1.2380287647247314, "learning_rate": 1.449470642548578e-06, "loss": 0.371, "step": 11805 }, { "epoch": 0.7083458330833383, "grad_norm": 1.2401115894317627, "learning_rate": 1.4489194685756206e-06, "loss": 0.3672, "step": 11806 }, { "epoch": 0.7084058318833624, "grad_norm": 1.3928645849227905, "learning_rate": 1.448368372062629e-06, "loss": 0.3575, "step": 11807 }, { "epoch": 0.7084658306833863, "grad_norm": 1.2669483423233032, "learning_rate": 1.447817353030419e-06, "loss": 0.3575, "step": 11808 }, { "epoch": 0.7085258294834104, "grad_norm": 1.4837217330932617, "learning_rate": 1.4472664114997957e-06, "loss": 0.4108, "step": 11809 }, { "epoch": 0.7085858282834343, "grad_norm": 1.3408440351486206, "learning_rate": 1.4467155474915675e-06, "loss": 0.3467, "step": 11810 }, { "epoch": 0.7086458270834584, "grad_norm": 1.3504403829574585, "learning_rate": 1.44616476102654e-06, "loss": 0.4399, "step": 11811 }, { "epoch": 0.7087058258834823, "grad_norm": 1.2680394649505615, "learning_rate": 1.4456140521255118e-06, "loss": 0.4138, "step": 11812 }, { "epoch": 0.7087658246835064, "grad_norm": 1.1916810274124146, "learning_rate": 1.4450634208092813e-06, "loss": 0.37, "step": 11813 }, { "epoch": 0.7088258234835303, "grad_norm": 1.4516481161117554, "learning_rate": 1.4445128670986417e-06, "loss": 0.4017, "step": 11814 }, { "epoch": 0.7088858222835543, "grad_norm": 1.189942717552185, "learning_rate": 1.4439623910143876e-06, "loss": 0.3301, "step": 11815 }, { "epoch": 0.7089458210835783, "grad_norm": 1.2175487279891968, "learning_rate": 1.443411992577307e-06, "loss": 0.3731, "step": 11816 }, { "epoch": 0.7090058198836023, "grad_norm": 1.2956334352493286, "learning_rate": 1.4428616718081844e-06, "loss": 0.3439, "step": 11817 }, { "epoch": 0.7090658186836263, "grad_norm": 1.186970591545105, "learning_rate": 1.442311428727805e-06, "loss": 0.3673, "step": 11818 }, { "epoch": 0.7091258174836503, "grad_norm": 1.3022669553756714, "learning_rate": 1.4417612633569487e-06, "loss": 0.3879, "step": 11819 }, { "epoch": 0.7091858162836743, "grad_norm": 1.1997206211090088, "learning_rate": 1.4412111757163915e-06, "loss": 0.3715, "step": 11820 }, { "epoch": 0.7092458150836983, "grad_norm": 1.3692872524261475, "learning_rate": 1.4406611658269075e-06, "loss": 0.41, "step": 11821 }, { "epoch": 0.7093058138837224, "grad_norm": 1.4216363430023193, "learning_rate": 1.440111233709269e-06, "loss": 0.4012, "step": 11822 }, { "epoch": 0.7093658126837463, "grad_norm": 1.6706030368804932, "learning_rate": 1.4395613793842454e-06, "loss": 0.3976, "step": 11823 }, { "epoch": 0.7094258114837704, "grad_norm": 1.3522766828536987, "learning_rate": 1.4390116028726015e-06, "loss": 0.37, "step": 11824 }, { "epoch": 0.7094858102837943, "grad_norm": 1.3584668636322021, "learning_rate": 1.4384619041950992e-06, "loss": 0.414, "step": 11825 }, { "epoch": 0.7095458090838184, "grad_norm": 1.2692452669143677, "learning_rate": 1.4379122833724972e-06, "loss": 0.3396, "step": 11826 }, { "epoch": 0.7096058078838423, "grad_norm": 1.192103385925293, "learning_rate": 1.4373627404255546e-06, "loss": 0.3576, "step": 11827 }, { "epoch": 0.7096658066838664, "grad_norm": 1.475464105606079, "learning_rate": 1.4368132753750242e-06, "loss": 0.4201, "step": 11828 }, { "epoch": 0.7097258054838903, "grad_norm": 1.365822434425354, "learning_rate": 1.4362638882416553e-06, "loss": 0.3918, "step": 11829 }, { "epoch": 0.7097858042839144, "grad_norm": 1.3692055940628052, "learning_rate": 1.435714579046198e-06, "loss": 0.3705, "step": 11830 }, { "epoch": 0.7098458030839383, "grad_norm": 1.3108083009719849, "learning_rate": 1.4351653478093963e-06, "loss": 0.4464, "step": 11831 }, { "epoch": 0.7099058018839624, "grad_norm": 1.409367322921753, "learning_rate": 1.4346161945519922e-06, "loss": 0.4086, "step": 11832 }, { "epoch": 0.7099658006839863, "grad_norm": 1.2963837385177612, "learning_rate": 1.434067119294723e-06, "loss": 0.3586, "step": 11833 }, { "epoch": 0.7100257994840103, "grad_norm": 1.3249210119247437, "learning_rate": 1.4335181220583265e-06, "loss": 0.3167, "step": 11834 }, { "epoch": 0.7100857982840343, "grad_norm": 1.2106772661209106, "learning_rate": 1.4329692028635382e-06, "loss": 0.3797, "step": 11835 }, { "epoch": 0.7101457970840583, "grad_norm": 1.266506314277649, "learning_rate": 1.4324203617310831e-06, "loss": 0.3488, "step": 11836 }, { "epoch": 0.7102057958840823, "grad_norm": 1.2145861387252808, "learning_rate": 1.4318715986816926e-06, "loss": 0.3572, "step": 11837 }, { "epoch": 0.7102657946841063, "grad_norm": 1.254273772239685, "learning_rate": 1.431322913736088e-06, "loss": 0.336, "step": 11838 }, { "epoch": 0.7103257934841303, "grad_norm": 1.279229998588562, "learning_rate": 1.4307743069149935e-06, "loss": 0.3917, "step": 11839 }, { "epoch": 0.7103857922841543, "grad_norm": 1.4357181787490845, "learning_rate": 1.4302257782391257e-06, "loss": 0.3956, "step": 11840 }, { "epoch": 0.7104457910841783, "grad_norm": 1.201568603515625, "learning_rate": 1.4296773277291993e-06, "loss": 0.3531, "step": 11841 }, { "epoch": 0.7105057898842023, "grad_norm": 1.2476229667663574, "learning_rate": 1.429128955405929e-06, "loss": 0.357, "step": 11842 }, { "epoch": 0.7105657886842263, "grad_norm": 1.3699462413787842, "learning_rate": 1.4285806612900235e-06, "loss": 0.403, "step": 11843 }, { "epoch": 0.7106257874842503, "grad_norm": 1.460498571395874, "learning_rate": 1.428032445402189e-06, "loss": 0.3664, "step": 11844 }, { "epoch": 0.7106857862842744, "grad_norm": 1.2719099521636963, "learning_rate": 1.4274843077631278e-06, "loss": 0.4071, "step": 11845 }, { "epoch": 0.7107457850842983, "grad_norm": 1.3204808235168457, "learning_rate": 1.4269362483935434e-06, "loss": 0.409, "step": 11846 }, { "epoch": 0.7108057838843224, "grad_norm": 1.3946374654769897, "learning_rate": 1.4263882673141316e-06, "loss": 0.3503, "step": 11847 }, { "epoch": 0.7108657826843463, "grad_norm": 1.4081279039382935, "learning_rate": 1.4258403645455867e-06, "loss": 0.437, "step": 11848 }, { "epoch": 0.7109257814843704, "grad_norm": 1.2776930332183838, "learning_rate": 1.4252925401086023e-06, "loss": 0.4018, "step": 11849 }, { "epoch": 0.7109857802843943, "grad_norm": 1.1960997581481934, "learning_rate": 1.4247447940238668e-06, "loss": 0.3556, "step": 11850 }, { "epoch": 0.7110457790844184, "grad_norm": 1.2896926403045654, "learning_rate": 1.424197126312064e-06, "loss": 0.3842, "step": 11851 }, { "epoch": 0.7111057778844423, "grad_norm": 1.3807353973388672, "learning_rate": 1.4236495369938798e-06, "loss": 0.4093, "step": 11852 }, { "epoch": 0.7111657766844663, "grad_norm": 1.3454060554504395, "learning_rate": 1.4231020260899914e-06, "loss": 0.405, "step": 11853 }, { "epoch": 0.7112257754844903, "grad_norm": 1.3915899991989136, "learning_rate": 1.4225545936210798e-06, "loss": 0.3657, "step": 11854 }, { "epoch": 0.7112857742845143, "grad_norm": 1.4630240201950073, "learning_rate": 1.4220072396078135e-06, "loss": 0.3692, "step": 11855 }, { "epoch": 0.7113457730845383, "grad_norm": 1.5522319078445435, "learning_rate": 1.421459964070867e-06, "loss": 0.3828, "step": 11856 }, { "epoch": 0.7114057718845623, "grad_norm": 1.2265818119049072, "learning_rate": 1.4209127670309085e-06, "loss": 0.3558, "step": 11857 }, { "epoch": 0.7114657706845863, "grad_norm": 1.0531235933303833, "learning_rate": 1.420365648508603e-06, "loss": 0.3218, "step": 11858 }, { "epoch": 0.7115257694846103, "grad_norm": 1.3139288425445557, "learning_rate": 1.419818608524612e-06, "loss": 0.3968, "step": 11859 }, { "epoch": 0.7115857682846343, "grad_norm": 1.2865632772445679, "learning_rate": 1.4192716470995936e-06, "loss": 0.394, "step": 11860 }, { "epoch": 0.7116457670846583, "grad_norm": 1.2436343431472778, "learning_rate": 1.4187247642542066e-06, "loss": 0.4541, "step": 11861 }, { "epoch": 0.7117057658846823, "grad_norm": 1.386904001235962, "learning_rate": 1.4181779600091032e-06, "loss": 0.4324, "step": 11862 }, { "epoch": 0.7117657646847063, "grad_norm": 1.2871400117874146, "learning_rate": 1.4176312343849318e-06, "loss": 0.449, "step": 11863 }, { "epoch": 0.7118257634847303, "grad_norm": 1.2156692743301392, "learning_rate": 1.4170845874023428e-06, "loss": 0.3973, "step": 11864 }, { "epoch": 0.7118857622847543, "grad_norm": 1.1365667581558228, "learning_rate": 1.4165380190819782e-06, "loss": 0.3703, "step": 11865 }, { "epoch": 0.7119457610847783, "grad_norm": 1.3390114307403564, "learning_rate": 1.4159915294444828e-06, "loss": 0.3821, "step": 11866 }, { "epoch": 0.7120057598848023, "grad_norm": 1.3957852125167847, "learning_rate": 1.41544511851049e-06, "loss": 0.3718, "step": 11867 }, { "epoch": 0.7120657586848264, "grad_norm": 1.211129069328308, "learning_rate": 1.4148987863006377e-06, "loss": 0.3723, "step": 11868 }, { "epoch": 0.7121257574848503, "grad_norm": 1.3557043075561523, "learning_rate": 1.41435253283556e-06, "loss": 0.3772, "step": 11869 }, { "epoch": 0.7121857562848743, "grad_norm": 1.3671457767486572, "learning_rate": 1.4138063581358852e-06, "loss": 0.4178, "step": 11870 }, { "epoch": 0.7122457550848983, "grad_norm": 1.2270808219909668, "learning_rate": 1.4132602622222391e-06, "loss": 0.3816, "step": 11871 }, { "epoch": 0.7123057538849223, "grad_norm": 1.3984721899032593, "learning_rate": 1.4127142451152447e-06, "loss": 0.3975, "step": 11872 }, { "epoch": 0.7123657526849463, "grad_norm": 1.3272783756256104, "learning_rate": 1.412168306835525e-06, "loss": 0.377, "step": 11873 }, { "epoch": 0.7124257514849703, "grad_norm": 1.2822812795639038, "learning_rate": 1.411622447403696e-06, "loss": 0.417, "step": 11874 }, { "epoch": 0.7124857502849943, "grad_norm": 1.4416884183883667, "learning_rate": 1.4110766668403717e-06, "loss": 0.4375, "step": 11875 }, { "epoch": 0.7125457490850183, "grad_norm": 1.2367775440216064, "learning_rate": 1.4105309651661653e-06, "loss": 0.394, "step": 11876 }, { "epoch": 0.7126057478850423, "grad_norm": 1.2628933191299438, "learning_rate": 1.4099853424016849e-06, "loss": 0.3905, "step": 11877 }, { "epoch": 0.7126657466850663, "grad_norm": 1.3188629150390625, "learning_rate": 1.4094397985675357e-06, "loss": 0.3801, "step": 11878 }, { "epoch": 0.7127257454850903, "grad_norm": 1.4155449867248535, "learning_rate": 1.40889433368432e-06, "loss": 0.4051, "step": 11879 }, { "epoch": 0.7127857442851143, "grad_norm": 1.465272068977356, "learning_rate": 1.4083489477726384e-06, "loss": 0.4014, "step": 11880 }, { "epoch": 0.7128457430851383, "grad_norm": 1.4291943311691284, "learning_rate": 1.4078036408530896e-06, "loss": 0.4626, "step": 11881 }, { "epoch": 0.7129057418851623, "grad_norm": 1.2118583917617798, "learning_rate": 1.407258412946263e-06, "loss": 0.3653, "step": 11882 }, { "epoch": 0.7129657406851863, "grad_norm": 1.2949047088623047, "learning_rate": 1.406713264072753e-06, "loss": 0.3517, "step": 11883 }, { "epoch": 0.7130257394852103, "grad_norm": 1.3240184783935547, "learning_rate": 1.4061681942531443e-06, "loss": 0.3895, "step": 11884 }, { "epoch": 0.7130857382852342, "grad_norm": 1.3399587869644165, "learning_rate": 1.405623203508026e-06, "loss": 0.3601, "step": 11885 }, { "epoch": 0.7131457370852583, "grad_norm": 1.2690311670303345, "learning_rate": 1.4050782918579752e-06, "loss": 0.3893, "step": 11886 }, { "epoch": 0.7132057358852822, "grad_norm": 1.2311874628067017, "learning_rate": 1.4045334593235727e-06, "loss": 0.375, "step": 11887 }, { "epoch": 0.7132657346853063, "grad_norm": 1.2133234739303589, "learning_rate": 1.4039887059253958e-06, "loss": 0.3698, "step": 11888 }, { "epoch": 0.7133257334853303, "grad_norm": 1.2528902292251587, "learning_rate": 1.403444031684016e-06, "loss": 0.3818, "step": 11889 }, { "epoch": 0.7133857322853543, "grad_norm": 1.164474606513977, "learning_rate": 1.4028994366200034e-06, "loss": 0.3706, "step": 11890 }, { "epoch": 0.7134457310853783, "grad_norm": 1.4923155307769775, "learning_rate": 1.4023549207539231e-06, "loss": 0.3274, "step": 11891 }, { "epoch": 0.7135057298854023, "grad_norm": 1.3064616918563843, "learning_rate": 1.4018104841063424e-06, "loss": 0.3499, "step": 11892 }, { "epoch": 0.7135657286854263, "grad_norm": 1.4635223150253296, "learning_rate": 1.4012661266978198e-06, "loss": 0.3692, "step": 11893 }, { "epoch": 0.7136257274854503, "grad_norm": 1.292799949645996, "learning_rate": 1.400721848548913e-06, "loss": 0.3915, "step": 11894 }, { "epoch": 0.7136857262854743, "grad_norm": 1.2933951616287231, "learning_rate": 1.4001776496801788e-06, "loss": 0.4237, "step": 11895 }, { "epoch": 0.7137457250854983, "grad_norm": 1.353867530822754, "learning_rate": 1.399633530112168e-06, "loss": 0.3929, "step": 11896 }, { "epoch": 0.7138057238855223, "grad_norm": 1.1432956457138062, "learning_rate": 1.3990894898654281e-06, "loss": 0.3935, "step": 11897 }, { "epoch": 0.7138657226855463, "grad_norm": 1.493040680885315, "learning_rate": 1.398545528960508e-06, "loss": 0.3935, "step": 11898 }, { "epoch": 0.7139257214855703, "grad_norm": 1.369118571281433, "learning_rate": 1.3980016474179474e-06, "loss": 0.3741, "step": 11899 }, { "epoch": 0.7139857202855943, "grad_norm": 1.3014267683029175, "learning_rate": 1.3974578452582907e-06, "loss": 0.3537, "step": 11900 }, { "epoch": 0.7140457190856183, "grad_norm": 1.3606250286102295, "learning_rate": 1.3969141225020698e-06, "loss": 0.406, "step": 11901 }, { "epoch": 0.7141057178856423, "grad_norm": 1.2874737977981567, "learning_rate": 1.3963704791698215e-06, "loss": 0.3812, "step": 11902 }, { "epoch": 0.7141657166856663, "grad_norm": 1.2972666025161743, "learning_rate": 1.3958269152820753e-06, "loss": 0.3405, "step": 11903 }, { "epoch": 0.7142257154856902, "grad_norm": 1.3806169033050537, "learning_rate": 1.395283430859361e-06, "loss": 0.3624, "step": 11904 }, { "epoch": 0.7142857142857143, "grad_norm": 1.3669568300247192, "learning_rate": 1.3947400259222025e-06, "loss": 0.3503, "step": 11905 }, { "epoch": 0.7143457130857382, "grad_norm": 1.2657158374786377, "learning_rate": 1.3941967004911199e-06, "loss": 0.3686, "step": 11906 }, { "epoch": 0.7144057118857623, "grad_norm": 1.5089452266693115, "learning_rate": 1.393653454586636e-06, "loss": 0.3592, "step": 11907 }, { "epoch": 0.7144657106857862, "grad_norm": 1.3825308084487915, "learning_rate": 1.393110288229264e-06, "loss": 0.4138, "step": 11908 }, { "epoch": 0.7145257094858103, "grad_norm": 1.3475430011749268, "learning_rate": 1.3925672014395167e-06, "loss": 0.4076, "step": 11909 }, { "epoch": 0.7145857082858342, "grad_norm": 1.3097783327102661, "learning_rate": 1.3920241942379058e-06, "loss": 0.4113, "step": 11910 }, { "epoch": 0.7146457070858583, "grad_norm": 1.156143307685852, "learning_rate": 1.3914812666449355e-06, "loss": 0.3304, "step": 11911 }, { "epoch": 0.7147057058858823, "grad_norm": 1.2210969924926758, "learning_rate": 1.3909384186811146e-06, "loss": 0.3966, "step": 11912 }, { "epoch": 0.7147657046859063, "grad_norm": 1.3113057613372803, "learning_rate": 1.3903956503669378e-06, "loss": 0.392, "step": 11913 }, { "epoch": 0.7148257034859303, "grad_norm": 1.3038251399993896, "learning_rate": 1.3898529617229072e-06, "loss": 0.3943, "step": 11914 }, { "epoch": 0.7148857022859543, "grad_norm": 1.4014923572540283, "learning_rate": 1.3893103527695155e-06, "loss": 0.3427, "step": 11915 }, { "epoch": 0.7149457010859783, "grad_norm": 1.3405641317367554, "learning_rate": 1.3887678235272566e-06, "loss": 0.3713, "step": 11916 }, { "epoch": 0.7150056998860023, "grad_norm": 1.2398180961608887, "learning_rate": 1.3882253740166182e-06, "loss": 0.3613, "step": 11917 }, { "epoch": 0.7150656986860263, "grad_norm": 1.311437964439392, "learning_rate": 1.3876830042580848e-06, "loss": 0.3328, "step": 11918 }, { "epoch": 0.7151256974860503, "grad_norm": 1.421532154083252, "learning_rate": 1.3871407142721423e-06, "loss": 0.4068, "step": 11919 }, { "epoch": 0.7151856962860743, "grad_norm": 1.2243510484695435, "learning_rate": 1.3865985040792686e-06, "loss": 0.386, "step": 11920 }, { "epoch": 0.7152456950860983, "grad_norm": 1.219374656677246, "learning_rate": 1.386056373699939e-06, "loss": 0.3561, "step": 11921 }, { "epoch": 0.7153056938861223, "grad_norm": 1.3341995477676392, "learning_rate": 1.3855143231546312e-06, "loss": 0.3606, "step": 11922 }, { "epoch": 0.7153656926861462, "grad_norm": 1.4234665632247925, "learning_rate": 1.3849723524638134e-06, "loss": 0.3975, "step": 11923 }, { "epoch": 0.7154256914861703, "grad_norm": 1.319092035293579, "learning_rate": 1.3844304616479538e-06, "loss": 0.4006, "step": 11924 }, { "epoch": 0.7154856902861942, "grad_norm": 1.3197270631790161, "learning_rate": 1.383888650727516e-06, "loss": 0.3714, "step": 11925 }, { "epoch": 0.7155456890862183, "grad_norm": 1.3294384479522705, "learning_rate": 1.383346919722964e-06, "loss": 0.4261, "step": 11926 }, { "epoch": 0.7156056878862422, "grad_norm": 1.4657527208328247, "learning_rate": 1.3828052686547558e-06, "loss": 0.4109, "step": 11927 }, { "epoch": 0.7156656866862663, "grad_norm": 1.3240537643432617, "learning_rate": 1.3822636975433452e-06, "loss": 0.3948, "step": 11928 }, { "epoch": 0.7157256854862902, "grad_norm": 1.2945202589035034, "learning_rate": 1.3817222064091873e-06, "loss": 0.3452, "step": 11929 }, { "epoch": 0.7157856842863143, "grad_norm": 1.2965240478515625, "learning_rate": 1.38118079527273e-06, "loss": 0.3383, "step": 11930 }, { "epoch": 0.7158456830863382, "grad_norm": 1.2988686561584473, "learning_rate": 1.380639464154423e-06, "loss": 0.3428, "step": 11931 }, { "epoch": 0.7159056818863623, "grad_norm": 1.2895419597625732, "learning_rate": 1.3800982130747052e-06, "loss": 0.3517, "step": 11932 }, { "epoch": 0.7159656806863862, "grad_norm": 1.4121694564819336, "learning_rate": 1.3795570420540196e-06, "loss": 0.3794, "step": 11933 }, { "epoch": 0.7160256794864103, "grad_norm": 1.3552576303482056, "learning_rate": 1.3790159511128053e-06, "loss": 0.4119, "step": 11934 }, { "epoch": 0.7160856782864343, "grad_norm": 1.3192410469055176, "learning_rate": 1.378474940271495e-06, "loss": 0.4018, "step": 11935 }, { "epoch": 0.7161456770864583, "grad_norm": 1.3346672058105469, "learning_rate": 1.377934009550521e-06, "loss": 0.3488, "step": 11936 }, { "epoch": 0.7162056758864823, "grad_norm": 1.2403805255889893, "learning_rate": 1.3773931589703096e-06, "loss": 0.3294, "step": 11937 }, { "epoch": 0.7162656746865063, "grad_norm": 1.461081624031067, "learning_rate": 1.3768523885512895e-06, "loss": 0.4324, "step": 11938 }, { "epoch": 0.7163256734865303, "grad_norm": 1.2473410367965698, "learning_rate": 1.3763116983138815e-06, "loss": 0.3315, "step": 11939 }, { "epoch": 0.7163856722865543, "grad_norm": 1.5129543542861938, "learning_rate": 1.3757710882785044e-06, "loss": 0.3762, "step": 11940 }, { "epoch": 0.7164456710865783, "grad_norm": 1.3193155527114868, "learning_rate": 1.375230558465576e-06, "loss": 0.3593, "step": 11941 }, { "epoch": 0.7165056698866022, "grad_norm": 1.378574252128601, "learning_rate": 1.3746901088955093e-06, "loss": 0.4016, "step": 11942 }, { "epoch": 0.7165656686866263, "grad_norm": 1.3421313762664795, "learning_rate": 1.3741497395887142e-06, "loss": 0.4168, "step": 11943 }, { "epoch": 0.7166256674866502, "grad_norm": 1.162440299987793, "learning_rate": 1.3736094505655968e-06, "loss": 0.3196, "step": 11944 }, { "epoch": 0.7166856662866743, "grad_norm": 1.3072470426559448, "learning_rate": 1.3730692418465628e-06, "loss": 0.3678, "step": 11945 }, { "epoch": 0.7167456650866982, "grad_norm": 1.4112963676452637, "learning_rate": 1.372529113452014e-06, "loss": 0.3595, "step": 11946 }, { "epoch": 0.7168056638867223, "grad_norm": 1.3400683403015137, "learning_rate": 1.3719890654023487e-06, "loss": 0.4125, "step": 11947 }, { "epoch": 0.7168656626867462, "grad_norm": 1.22120201587677, "learning_rate": 1.3714490977179604e-06, "loss": 0.3543, "step": 11948 }, { "epoch": 0.7169256614867703, "grad_norm": 1.3852628469467163, "learning_rate": 1.3709092104192413e-06, "loss": 0.3791, "step": 11949 }, { "epoch": 0.7169856602867942, "grad_norm": 1.2881113290786743, "learning_rate": 1.3703694035265825e-06, "loss": 0.3448, "step": 11950 }, { "epoch": 0.7170456590868183, "grad_norm": 1.3082056045532227, "learning_rate": 1.3698296770603684e-06, "loss": 0.3923, "step": 11951 }, { "epoch": 0.7171056578868422, "grad_norm": 1.4543383121490479, "learning_rate": 1.3692900310409813e-06, "loss": 0.4029, "step": 11952 }, { "epoch": 0.7171656566868663, "grad_norm": 1.4716975688934326, "learning_rate": 1.3687504654888034e-06, "loss": 0.4092, "step": 11953 }, { "epoch": 0.7172256554868902, "grad_norm": 1.6232595443725586, "learning_rate": 1.3682109804242105e-06, "loss": 0.4762, "step": 11954 }, { "epoch": 0.7172856542869143, "grad_norm": 1.4779880046844482, "learning_rate": 1.3676715758675766e-06, "loss": 0.4053, "step": 11955 }, { "epoch": 0.7173456530869382, "grad_norm": 1.2527306079864502, "learning_rate": 1.3671322518392707e-06, "loss": 0.3879, "step": 11956 }, { "epoch": 0.7174056518869623, "grad_norm": 1.3882367610931396, "learning_rate": 1.366593008359663e-06, "loss": 0.4172, "step": 11957 }, { "epoch": 0.7174656506869863, "grad_norm": 1.259582757949829, "learning_rate": 1.3660538454491196e-06, "loss": 0.3693, "step": 11958 }, { "epoch": 0.7175256494870103, "grad_norm": 1.3006060123443604, "learning_rate": 1.3655147631279977e-06, "loss": 0.3813, "step": 11959 }, { "epoch": 0.7175856482870343, "grad_norm": 1.2611688375473022, "learning_rate": 1.36497576141666e-06, "loss": 0.3453, "step": 11960 }, { "epoch": 0.7176456470870582, "grad_norm": 1.3390967845916748, "learning_rate": 1.3644368403354594e-06, "loss": 0.3765, "step": 11961 }, { "epoch": 0.7177056458870823, "grad_norm": 1.2395575046539307, "learning_rate": 1.3638979999047509e-06, "loss": 0.3702, "step": 11962 }, { "epoch": 0.7177656446871062, "grad_norm": 1.2473042011260986, "learning_rate": 1.3633592401448832e-06, "loss": 0.3472, "step": 11963 }, { "epoch": 0.7178256434871303, "grad_norm": 1.352206826210022, "learning_rate": 1.3628205610762008e-06, "loss": 0.4063, "step": 11964 }, { "epoch": 0.7178856422871542, "grad_norm": 1.436386227607727, "learning_rate": 1.3622819627190504e-06, "loss": 0.3767, "step": 11965 }, { "epoch": 0.7179456410871783, "grad_norm": 1.2877981662750244, "learning_rate": 1.3617434450937703e-06, "loss": 0.3452, "step": 11966 }, { "epoch": 0.7180056398872022, "grad_norm": 1.2463054656982422, "learning_rate": 1.361205008220699e-06, "loss": 0.3682, "step": 11967 }, { "epoch": 0.7180656386872263, "grad_norm": 1.2678029537200928, "learning_rate": 1.3606666521201689e-06, "loss": 0.3625, "step": 11968 }, { "epoch": 0.7181256374872502, "grad_norm": 1.5237921476364136, "learning_rate": 1.3601283768125135e-06, "loss": 0.4324, "step": 11969 }, { "epoch": 0.7181856362872743, "grad_norm": 1.2435921430587769, "learning_rate": 1.3595901823180605e-06, "loss": 0.3321, "step": 11970 }, { "epoch": 0.7182456350872982, "grad_norm": 1.2067992687225342, "learning_rate": 1.3590520686571328e-06, "loss": 0.3797, "step": 11971 }, { "epoch": 0.7183056338873223, "grad_norm": 1.1728887557983398, "learning_rate": 1.3585140358500562e-06, "loss": 0.3165, "step": 11972 }, { "epoch": 0.7183656326873462, "grad_norm": 1.3668761253356934, "learning_rate": 1.3579760839171475e-06, "loss": 0.3584, "step": 11973 }, { "epoch": 0.7184256314873703, "grad_norm": 1.2822821140289307, "learning_rate": 1.357438212878722e-06, "loss": 0.3709, "step": 11974 }, { "epoch": 0.7184856302873942, "grad_norm": 1.2571063041687012, "learning_rate": 1.3569004227550949e-06, "loss": 0.3505, "step": 11975 }, { "epoch": 0.7185456290874183, "grad_norm": 1.2536770105361938, "learning_rate": 1.3563627135665733e-06, "loss": 0.393, "step": 11976 }, { "epoch": 0.7186056278874422, "grad_norm": 1.210163950920105, "learning_rate": 1.3558250853334687e-06, "loss": 0.3599, "step": 11977 }, { "epoch": 0.7186656266874663, "grad_norm": 1.301884651184082, "learning_rate": 1.355287538076079e-06, "loss": 0.3347, "step": 11978 }, { "epoch": 0.7187256254874903, "grad_norm": 1.2702187299728394, "learning_rate": 1.3547500718147078e-06, "loss": 0.3965, "step": 11979 }, { "epoch": 0.7187856242875142, "grad_norm": 1.3642083406448364, "learning_rate": 1.354212686569654e-06, "loss": 0.4316, "step": 11980 }, { "epoch": 0.7188456230875383, "grad_norm": 1.161025881767273, "learning_rate": 1.3536753823612105e-06, "loss": 0.3256, "step": 11981 }, { "epoch": 0.7189056218875622, "grad_norm": 1.2719309329986572, "learning_rate": 1.3531381592096695e-06, "loss": 0.4014, "step": 11982 }, { "epoch": 0.7189656206875863, "grad_norm": 1.3219605684280396, "learning_rate": 1.3526010171353175e-06, "loss": 0.3756, "step": 11983 }, { "epoch": 0.7190256194876102, "grad_norm": 1.3297629356384277, "learning_rate": 1.3520639561584435e-06, "loss": 0.4413, "step": 11984 }, { "epoch": 0.7190856182876343, "grad_norm": 1.2736761569976807, "learning_rate": 1.3515269762993272e-06, "loss": 0.3634, "step": 11985 }, { "epoch": 0.7191456170876582, "grad_norm": 1.5134042501449585, "learning_rate": 1.3509900775782478e-06, "loss": 0.3705, "step": 11986 }, { "epoch": 0.7192056158876823, "grad_norm": 1.4775731563568115, "learning_rate": 1.3504532600154832e-06, "loss": 0.384, "step": 11987 }, { "epoch": 0.7192656146877062, "grad_norm": 1.4616771936416626, "learning_rate": 1.3499165236313058e-06, "loss": 0.3445, "step": 11988 }, { "epoch": 0.7193256134877303, "grad_norm": 1.2717790603637695, "learning_rate": 1.3493798684459857e-06, "loss": 0.3674, "step": 11989 }, { "epoch": 0.7193856122877542, "grad_norm": 1.2718784809112549, "learning_rate": 1.3488432944797883e-06, "loss": 0.3856, "step": 11990 }, { "epoch": 0.7194456110877783, "grad_norm": 1.4246796369552612, "learning_rate": 1.3483068017529791e-06, "loss": 0.3676, "step": 11991 }, { "epoch": 0.7195056098878022, "grad_norm": 1.3450796604156494, "learning_rate": 1.34777039028582e-06, "loss": 0.3832, "step": 11992 }, { "epoch": 0.7195656086878263, "grad_norm": 1.3207895755767822, "learning_rate": 1.347234060098568e-06, "loss": 0.3449, "step": 11993 }, { "epoch": 0.7196256074878502, "grad_norm": 1.2705634832382202, "learning_rate": 1.3466978112114774e-06, "loss": 0.318, "step": 11994 }, { "epoch": 0.7196856062878743, "grad_norm": 1.3301565647125244, "learning_rate": 1.3461616436447992e-06, "loss": 0.3662, "step": 11995 }, { "epoch": 0.7197456050878982, "grad_norm": 1.399296760559082, "learning_rate": 1.3456255574187836e-06, "loss": 0.3643, "step": 11996 }, { "epoch": 0.7198056038879223, "grad_norm": 1.3796477317810059, "learning_rate": 1.3450895525536756e-06, "loss": 0.4161, "step": 11997 }, { "epoch": 0.7198656026879462, "grad_norm": 1.3492674827575684, "learning_rate": 1.3445536290697161e-06, "loss": 0.3377, "step": 11998 }, { "epoch": 0.7199256014879702, "grad_norm": 1.4145176410675049, "learning_rate": 1.3440177869871471e-06, "loss": 0.3894, "step": 11999 }, { "epoch": 0.7199856002879942, "grad_norm": 1.3928256034851074, "learning_rate": 1.3434820263262038e-06, "loss": 0.3807, "step": 12000 }, { "epoch": 0.7200455990880182, "grad_norm": 1.2622162103652954, "learning_rate": 1.3429463471071191e-06, "loss": 0.3575, "step": 12001 }, { "epoch": 0.7201055978880423, "grad_norm": 1.3186750411987305, "learning_rate": 1.3424107493501221e-06, "loss": 0.3758, "step": 12002 }, { "epoch": 0.7201655966880662, "grad_norm": 1.3232251405715942, "learning_rate": 1.3418752330754415e-06, "loss": 0.4312, "step": 12003 }, { "epoch": 0.7202255954880903, "grad_norm": 1.4071940183639526, "learning_rate": 1.3413397983033031e-06, "loss": 0.3374, "step": 12004 }, { "epoch": 0.7202855942881142, "grad_norm": 1.2819647789001465, "learning_rate": 1.3408044450539235e-06, "loss": 0.4141, "step": 12005 }, { "epoch": 0.7203455930881383, "grad_norm": 1.2249943017959595, "learning_rate": 1.3402691733475236e-06, "loss": 0.356, "step": 12006 }, { "epoch": 0.7204055918881622, "grad_norm": 1.426413893699646, "learning_rate": 1.3397339832043164e-06, "loss": 0.4123, "step": 12007 }, { "epoch": 0.7204655906881863, "grad_norm": 1.3073019981384277, "learning_rate": 1.3391988746445167e-06, "loss": 0.3832, "step": 12008 }, { "epoch": 0.7205255894882102, "grad_norm": 1.3243156671524048, "learning_rate": 1.3386638476883286e-06, "loss": 0.38, "step": 12009 }, { "epoch": 0.7205855882882343, "grad_norm": 1.3631755113601685, "learning_rate": 1.33812890235596e-06, "loss": 0.3904, "step": 12010 }, { "epoch": 0.7206455870882582, "grad_norm": 1.235956072807312, "learning_rate": 1.3375940386676145e-06, "loss": 0.335, "step": 12011 }, { "epoch": 0.7207055858882823, "grad_norm": 1.3323484659194946, "learning_rate": 1.3370592566434902e-06, "loss": 0.3396, "step": 12012 }, { "epoch": 0.7207655846883062, "grad_norm": 1.2631028890609741, "learning_rate": 1.3365245563037832e-06, "loss": 0.3895, "step": 12013 }, { "epoch": 0.7208255834883303, "grad_norm": 1.3045079708099365, "learning_rate": 1.3359899376686864e-06, "loss": 0.3747, "step": 12014 }, { "epoch": 0.7208855822883542, "grad_norm": 1.3858894109725952, "learning_rate": 1.335455400758391e-06, "loss": 0.396, "step": 12015 }, { "epoch": 0.7209455810883783, "grad_norm": 1.2728582620620728, "learning_rate": 1.3349209455930838e-06, "loss": 0.3672, "step": 12016 }, { "epoch": 0.7210055798884022, "grad_norm": 1.3351460695266724, "learning_rate": 1.3343865721929472e-06, "loss": 0.3661, "step": 12017 }, { "epoch": 0.7210655786884262, "grad_norm": 1.4665253162384033, "learning_rate": 1.3338522805781645e-06, "loss": 0.3862, "step": 12018 }, { "epoch": 0.7211255774884502, "grad_norm": 1.27366304397583, "learning_rate": 1.3333180707689119e-06, "loss": 0.3485, "step": 12019 }, { "epoch": 0.7211855762884742, "grad_norm": 1.2645955085754395, "learning_rate": 1.3327839427853636e-06, "loss": 0.3574, "step": 12020 }, { "epoch": 0.7212455750884982, "grad_norm": 1.2208225727081299, "learning_rate": 1.332249896647693e-06, "loss": 0.3362, "step": 12021 }, { "epoch": 0.7213055738885222, "grad_norm": 1.388290524482727, "learning_rate": 1.3317159323760662e-06, "loss": 0.4282, "step": 12022 }, { "epoch": 0.7213655726885462, "grad_norm": 1.3586503267288208, "learning_rate": 1.3311820499906523e-06, "loss": 0.3647, "step": 12023 }, { "epoch": 0.7214255714885702, "grad_norm": 1.313360571861267, "learning_rate": 1.330648249511609e-06, "loss": 0.4456, "step": 12024 }, { "epoch": 0.7214855702885943, "grad_norm": 1.277026653289795, "learning_rate": 1.3301145309590992e-06, "loss": 0.3893, "step": 12025 }, { "epoch": 0.7215455690886182, "grad_norm": 1.2823342084884644, "learning_rate": 1.3295808943532764e-06, "loss": 0.3782, "step": 12026 }, { "epoch": 0.7216055678886423, "grad_norm": 1.4642295837402344, "learning_rate": 1.3290473397142959e-06, "loss": 0.3773, "step": 12027 }, { "epoch": 0.7216655666886662, "grad_norm": 1.3060188293457031, "learning_rate": 1.3285138670623063e-06, "loss": 0.3971, "step": 12028 }, { "epoch": 0.7217255654886903, "grad_norm": 1.3844273090362549, "learning_rate": 1.3279804764174536e-06, "loss": 0.3934, "step": 12029 }, { "epoch": 0.7217855642887142, "grad_norm": 1.2952566146850586, "learning_rate": 1.3274471677998838e-06, "loss": 0.3876, "step": 12030 }, { "epoch": 0.7218455630887383, "grad_norm": 1.2364330291748047, "learning_rate": 1.3269139412297365e-06, "loss": 0.3674, "step": 12031 }, { "epoch": 0.7219055618887622, "grad_norm": 1.195617914199829, "learning_rate": 1.3263807967271474e-06, "loss": 0.3442, "step": 12032 }, { "epoch": 0.7219655606887863, "grad_norm": 1.2960381507873535, "learning_rate": 1.3258477343122545e-06, "loss": 0.3847, "step": 12033 }, { "epoch": 0.7220255594888102, "grad_norm": 1.3057359457015991, "learning_rate": 1.325314754005187e-06, "loss": 0.4078, "step": 12034 }, { "epoch": 0.7220855582888343, "grad_norm": 1.387564778327942, "learning_rate": 1.3247818558260733e-06, "loss": 0.3827, "step": 12035 }, { "epoch": 0.7221455570888582, "grad_norm": 1.4225960969924927, "learning_rate": 1.3242490397950375e-06, "loss": 0.4019, "step": 12036 }, { "epoch": 0.7222055558888822, "grad_norm": 1.371070146560669, "learning_rate": 1.323716305932204e-06, "loss": 0.3785, "step": 12037 }, { "epoch": 0.7222655546889062, "grad_norm": 1.2985061407089233, "learning_rate": 1.3231836542576891e-06, "loss": 0.4135, "step": 12038 }, { "epoch": 0.7223255534889302, "grad_norm": 1.2823400497436523, "learning_rate": 1.3226510847916114e-06, "loss": 0.3543, "step": 12039 }, { "epoch": 0.7223855522889542, "grad_norm": 1.2281533479690552, "learning_rate": 1.3221185975540826e-06, "loss": 0.3729, "step": 12040 }, { "epoch": 0.7224455510889782, "grad_norm": 1.2106314897537231, "learning_rate": 1.3215861925652101e-06, "loss": 0.322, "step": 12041 }, { "epoch": 0.7225055498890022, "grad_norm": 1.2385289669036865, "learning_rate": 1.3210538698451038e-06, "loss": 0.3701, "step": 12042 }, { "epoch": 0.7225655486890262, "grad_norm": 1.245763897895813, "learning_rate": 1.3205216294138657e-06, "loss": 0.3709, "step": 12043 }, { "epoch": 0.7226255474890502, "grad_norm": 1.446700096130371, "learning_rate": 1.3199894712915946e-06, "loss": 0.3728, "step": 12044 }, { "epoch": 0.7226855462890742, "grad_norm": 1.3243951797485352, "learning_rate": 1.3194573954983903e-06, "loss": 0.3797, "step": 12045 }, { "epoch": 0.7227455450890982, "grad_norm": 1.2145912647247314, "learning_rate": 1.3189254020543459e-06, "loss": 0.3261, "step": 12046 }, { "epoch": 0.7228055438891222, "grad_norm": 1.3058210611343384, "learning_rate": 1.3183934909795515e-06, "loss": 0.3524, "step": 12047 }, { "epoch": 0.7228655426891463, "grad_norm": 1.3613017797470093, "learning_rate": 1.317861662294095e-06, "loss": 0.3593, "step": 12048 }, { "epoch": 0.7229255414891702, "grad_norm": 1.3037185668945312, "learning_rate": 1.3173299160180612e-06, "loss": 0.3517, "step": 12049 }, { "epoch": 0.7229855402891943, "grad_norm": 1.3302383422851562, "learning_rate": 1.316798252171535e-06, "loss": 0.3699, "step": 12050 }, { "epoch": 0.7230455390892182, "grad_norm": 1.4710148572921753, "learning_rate": 1.3162666707745898e-06, "loss": 0.4034, "step": 12051 }, { "epoch": 0.7231055378892423, "grad_norm": 1.222113013267517, "learning_rate": 1.3157351718473042e-06, "loss": 0.3611, "step": 12052 }, { "epoch": 0.7231655366892662, "grad_norm": 1.311012864112854, "learning_rate": 1.3152037554097489e-06, "loss": 0.3736, "step": 12053 }, { "epoch": 0.7232255354892902, "grad_norm": 1.271012544631958, "learning_rate": 1.314672421481996e-06, "loss": 0.3892, "step": 12054 }, { "epoch": 0.7232855342893142, "grad_norm": 1.2870817184448242, "learning_rate": 1.3141411700841073e-06, "loss": 0.3634, "step": 12055 }, { "epoch": 0.7233455330893382, "grad_norm": 1.4662929773330688, "learning_rate": 1.3136100012361477e-06, "loss": 0.3883, "step": 12056 }, { "epoch": 0.7234055318893622, "grad_norm": 1.3958848714828491, "learning_rate": 1.3130789149581782e-06, "loss": 0.4384, "step": 12057 }, { "epoch": 0.7234655306893862, "grad_norm": 1.3610221147537231, "learning_rate": 1.3125479112702547e-06, "loss": 0.3203, "step": 12058 }, { "epoch": 0.7235255294894102, "grad_norm": 1.32725989818573, "learning_rate": 1.3120169901924305e-06, "loss": 0.3612, "step": 12059 }, { "epoch": 0.7235855282894342, "grad_norm": 1.3991137742996216, "learning_rate": 1.311486151744755e-06, "loss": 0.3897, "step": 12060 }, { "epoch": 0.7236455270894582, "grad_norm": 1.3372595310211182, "learning_rate": 1.3109553959472777e-06, "loss": 0.3737, "step": 12061 }, { "epoch": 0.7237055258894822, "grad_norm": 1.63105309009552, "learning_rate": 1.3104247228200423e-06, "loss": 0.3666, "step": 12062 }, { "epoch": 0.7237655246895062, "grad_norm": 1.3329052925109863, "learning_rate": 1.3098941323830878e-06, "loss": 0.3781, "step": 12063 }, { "epoch": 0.7238255234895302, "grad_norm": 1.2170346975326538, "learning_rate": 1.3093636246564553e-06, "loss": 0.3883, "step": 12064 }, { "epoch": 0.7238855222895542, "grad_norm": 1.3537840843200684, "learning_rate": 1.308833199660178e-06, "loss": 0.3678, "step": 12065 }, { "epoch": 0.7239455210895782, "grad_norm": 1.3300493955612183, "learning_rate": 1.3083028574142878e-06, "loss": 0.3694, "step": 12066 }, { "epoch": 0.7240055198896022, "grad_norm": 1.3479422330856323, "learning_rate": 1.3077725979388124e-06, "loss": 0.3756, "step": 12067 }, { "epoch": 0.7240655186896262, "grad_norm": 1.5013612508773804, "learning_rate": 1.307242421253778e-06, "loss": 0.4331, "step": 12068 }, { "epoch": 0.7241255174896503, "grad_norm": 1.302533507347107, "learning_rate": 1.3067123273792095e-06, "loss": 0.3364, "step": 12069 }, { "epoch": 0.7241855162896742, "grad_norm": 1.492475986480713, "learning_rate": 1.3061823163351217e-06, "loss": 0.4273, "step": 12070 }, { "epoch": 0.7242455150896983, "grad_norm": 1.2502270936965942, "learning_rate": 1.3056523881415336e-06, "loss": 0.3965, "step": 12071 }, { "epoch": 0.7243055138897222, "grad_norm": 1.3907859325408936, "learning_rate": 1.3051225428184562e-06, "loss": 0.4369, "step": 12072 }, { "epoch": 0.7243655126897462, "grad_norm": 1.3177152872085571, "learning_rate": 1.304592780385902e-06, "loss": 0.4141, "step": 12073 }, { "epoch": 0.7244255114897702, "grad_norm": 1.3781846761703491, "learning_rate": 1.3040631008638759e-06, "loss": 0.3812, "step": 12074 }, { "epoch": 0.7244855102897942, "grad_norm": 1.434443473815918, "learning_rate": 1.303533504272381e-06, "loss": 0.3836, "step": 12075 }, { "epoch": 0.7245455090898182, "grad_norm": 1.289767861366272, "learning_rate": 1.3030039906314196e-06, "loss": 0.3518, "step": 12076 }, { "epoch": 0.7246055078898422, "grad_norm": 1.2972683906555176, "learning_rate": 1.302474559960988e-06, "loss": 0.3934, "step": 12077 }, { "epoch": 0.7246655066898662, "grad_norm": 1.258524775505066, "learning_rate": 1.3019452122810803e-06, "loss": 0.3891, "step": 12078 }, { "epoch": 0.7247255054898902, "grad_norm": 1.3931827545166016, "learning_rate": 1.3014159476116866e-06, "loss": 0.4063, "step": 12079 }, { "epoch": 0.7247855042899142, "grad_norm": 1.3071565628051758, "learning_rate": 1.3008867659727969e-06, "loss": 0.3555, "step": 12080 }, { "epoch": 0.7248455030899382, "grad_norm": 1.3077362775802612, "learning_rate": 1.3003576673843951e-06, "loss": 0.4079, "step": 12081 }, { "epoch": 0.7249055018899622, "grad_norm": 1.3313664197921753, "learning_rate": 1.299828651866461e-06, "loss": 0.3858, "step": 12082 }, { "epoch": 0.7249655006899862, "grad_norm": 1.3904937505722046, "learning_rate": 1.2992997194389762e-06, "loss": 0.395, "step": 12083 }, { "epoch": 0.7250254994900102, "grad_norm": 1.361290454864502, "learning_rate": 1.2987708701219136e-06, "loss": 0.3841, "step": 12084 }, { "epoch": 0.7250854982900342, "grad_norm": 1.3280057907104492, "learning_rate": 1.2982421039352475e-06, "loss": 0.3606, "step": 12085 }, { "epoch": 0.7251454970900582, "grad_norm": 1.2304816246032715, "learning_rate": 1.297713420898946e-06, "loss": 0.3212, "step": 12086 }, { "epoch": 0.7252054958900822, "grad_norm": 1.2035444974899292, "learning_rate": 1.2971848210329739e-06, "loss": 0.3423, "step": 12087 }, { "epoch": 0.7252654946901061, "grad_norm": 1.2490837574005127, "learning_rate": 1.2966563043572962e-06, "loss": 0.3409, "step": 12088 }, { "epoch": 0.7253254934901302, "grad_norm": 1.3486124277114868, "learning_rate": 1.2961278708918713e-06, "loss": 0.3296, "step": 12089 }, { "epoch": 0.7253854922901541, "grad_norm": 1.289153814315796, "learning_rate": 1.295599520656655e-06, "loss": 0.403, "step": 12090 }, { "epoch": 0.7254454910901782, "grad_norm": 1.2322989702224731, "learning_rate": 1.2950712536716028e-06, "loss": 0.3913, "step": 12091 }, { "epoch": 0.7255054898902022, "grad_norm": 1.3154470920562744, "learning_rate": 1.2945430699566635e-06, "loss": 0.3588, "step": 12092 }, { "epoch": 0.7255654886902262, "grad_norm": 1.2825148105621338, "learning_rate": 1.2940149695317844e-06, "loss": 0.3878, "step": 12093 }, { "epoch": 0.7256254874902502, "grad_norm": 1.2489453554153442, "learning_rate": 1.2934869524169082e-06, "loss": 0.3426, "step": 12094 }, { "epoch": 0.7256854862902742, "grad_norm": 1.2944601774215698, "learning_rate": 1.2929590186319784e-06, "loss": 0.3741, "step": 12095 }, { "epoch": 0.7257454850902982, "grad_norm": 1.337450623512268, "learning_rate": 1.2924311681969312e-06, "loss": 0.3751, "step": 12096 }, { "epoch": 0.7258054838903222, "grad_norm": 1.3259761333465576, "learning_rate": 1.2919034011316997e-06, "loss": 0.3402, "step": 12097 }, { "epoch": 0.7258654826903462, "grad_norm": 1.3770679235458374, "learning_rate": 1.2913757174562182e-06, "loss": 0.3511, "step": 12098 }, { "epoch": 0.7259254814903702, "grad_norm": 1.4749456644058228, "learning_rate": 1.2908481171904119e-06, "loss": 0.3929, "step": 12099 }, { "epoch": 0.7259854802903942, "grad_norm": 1.2827301025390625, "learning_rate": 1.2903206003542096e-06, "loss": 0.3469, "step": 12100 }, { "epoch": 0.7260454790904182, "grad_norm": 1.502503752708435, "learning_rate": 1.289793166967529e-06, "loss": 0.4325, "step": 12101 }, { "epoch": 0.7261054778904422, "grad_norm": 1.2362169027328491, "learning_rate": 1.2892658170502905e-06, "loss": 0.366, "step": 12102 }, { "epoch": 0.7261654766904662, "grad_norm": 1.305549144744873, "learning_rate": 1.2887385506224114e-06, "loss": 0.3774, "step": 12103 }, { "epoch": 0.7262254754904902, "grad_norm": 1.427467942237854, "learning_rate": 1.2882113677038027e-06, "loss": 0.4292, "step": 12104 }, { "epoch": 0.7262854742905142, "grad_norm": 1.419050931930542, "learning_rate": 1.2876842683143734e-06, "loss": 0.3644, "step": 12105 }, { "epoch": 0.7263454730905382, "grad_norm": 1.2955554723739624, "learning_rate": 1.2871572524740294e-06, "loss": 0.3647, "step": 12106 }, { "epoch": 0.7264054718905621, "grad_norm": 1.3606101274490356, "learning_rate": 1.286630320202675e-06, "loss": 0.4007, "step": 12107 }, { "epoch": 0.7264654706905862, "grad_norm": 1.373364806175232, "learning_rate": 1.2861034715202098e-06, "loss": 0.4122, "step": 12108 }, { "epoch": 0.7265254694906101, "grad_norm": 1.2878000736236572, "learning_rate": 1.2855767064465285e-06, "loss": 0.4052, "step": 12109 }, { "epoch": 0.7265854682906342, "grad_norm": 1.1750010251998901, "learning_rate": 1.2850500250015273e-06, "loss": 0.3909, "step": 12110 }, { "epoch": 0.7266454670906581, "grad_norm": 1.2008534669876099, "learning_rate": 1.2845234272050952e-06, "loss": 0.352, "step": 12111 }, { "epoch": 0.7267054658906822, "grad_norm": 1.4517813920974731, "learning_rate": 1.2839969130771198e-06, "loss": 0.372, "step": 12112 }, { "epoch": 0.7267654646907061, "grad_norm": 1.301347255706787, "learning_rate": 1.283470482637484e-06, "loss": 0.4221, "step": 12113 }, { "epoch": 0.7268254634907302, "grad_norm": 1.3223029375076294, "learning_rate": 1.2829441359060688e-06, "loss": 0.4227, "step": 12114 }, { "epoch": 0.7268854622907542, "grad_norm": 1.332654356956482, "learning_rate": 1.2824178729027555e-06, "loss": 0.4075, "step": 12115 }, { "epoch": 0.7269454610907782, "grad_norm": 1.3525015115737915, "learning_rate": 1.2818916936474135e-06, "loss": 0.4292, "step": 12116 }, { "epoch": 0.7270054598908022, "grad_norm": 1.2035659551620483, "learning_rate": 1.281365598159918e-06, "loss": 0.3531, "step": 12117 }, { "epoch": 0.7270654586908262, "grad_norm": 1.219725251197815, "learning_rate": 1.2808395864601345e-06, "loss": 0.3515, "step": 12118 }, { "epoch": 0.7271254574908502, "grad_norm": 1.400921106338501, "learning_rate": 1.2803136585679303e-06, "loss": 0.3818, "step": 12119 }, { "epoch": 0.7271854562908742, "grad_norm": 1.2885721921920776, "learning_rate": 1.2797878145031666e-06, "loss": 0.3408, "step": 12120 }, { "epoch": 0.7272454550908982, "grad_norm": 1.2722986936569214, "learning_rate": 1.2792620542857006e-06, "loss": 0.3749, "step": 12121 }, { "epoch": 0.7273054538909222, "grad_norm": 1.2846314907073975, "learning_rate": 1.27873637793539e-06, "loss": 0.396, "step": 12122 }, { "epoch": 0.7273654526909462, "grad_norm": 1.3884812593460083, "learning_rate": 1.2782107854720868e-06, "loss": 0.3626, "step": 12123 }, { "epoch": 0.7274254514909702, "grad_norm": 1.341556429862976, "learning_rate": 1.2776852769156395e-06, "loss": 0.3417, "step": 12124 }, { "epoch": 0.7274854502909942, "grad_norm": 1.3529043197631836, "learning_rate": 1.2771598522858934e-06, "loss": 0.4021, "step": 12125 }, { "epoch": 0.7275454490910181, "grad_norm": 1.239794135093689, "learning_rate": 1.2766345116026937e-06, "loss": 0.3386, "step": 12126 }, { "epoch": 0.7276054478910422, "grad_norm": 1.5483194589614868, "learning_rate": 1.2761092548858786e-06, "loss": 0.3648, "step": 12127 }, { "epoch": 0.7276654466910661, "grad_norm": 1.2302454710006714, "learning_rate": 1.275584082155284e-06, "loss": 0.3812, "step": 12128 }, { "epoch": 0.7277254454910902, "grad_norm": 1.3459924459457397, "learning_rate": 1.2750589934307454e-06, "loss": 0.3806, "step": 12129 }, { "epoch": 0.7277854442911141, "grad_norm": 1.2598968744277954, "learning_rate": 1.2745339887320907e-06, "loss": 0.3681, "step": 12130 }, { "epoch": 0.7278454430911382, "grad_norm": 1.3860410451889038, "learning_rate": 1.274009068079149e-06, "loss": 0.3764, "step": 12131 }, { "epoch": 0.7279054418911621, "grad_norm": 1.3316829204559326, "learning_rate": 1.2734842314917431e-06, "loss": 0.3403, "step": 12132 }, { "epoch": 0.7279654406911862, "grad_norm": 1.415334701538086, "learning_rate": 1.2729594789896929e-06, "loss": 0.3538, "step": 12133 }, { "epoch": 0.7280254394912101, "grad_norm": 1.1913323402404785, "learning_rate": 1.2724348105928178e-06, "loss": 0.3247, "step": 12134 }, { "epoch": 0.7280854382912342, "grad_norm": 1.4145160913467407, "learning_rate": 1.2719102263209311e-06, "loss": 0.3953, "step": 12135 }, { "epoch": 0.7281454370912582, "grad_norm": 1.2232123613357544, "learning_rate": 1.2713857261938442e-06, "loss": 0.3662, "step": 12136 }, { "epoch": 0.7282054358912822, "grad_norm": 1.3748806715011597, "learning_rate": 1.270861310231364e-06, "loss": 0.4223, "step": 12137 }, { "epoch": 0.7282654346913062, "grad_norm": 1.285878300666809, "learning_rate": 1.2703369784532967e-06, "loss": 0.3978, "step": 12138 }, { "epoch": 0.7283254334913302, "grad_norm": 1.317971110343933, "learning_rate": 1.2698127308794437e-06, "loss": 0.3811, "step": 12139 }, { "epoch": 0.7283854322913542, "grad_norm": 1.3796485662460327, "learning_rate": 1.269288567529602e-06, "loss": 0.3661, "step": 12140 }, { "epoch": 0.7284454310913782, "grad_norm": 1.1918411254882812, "learning_rate": 1.268764488423569e-06, "loss": 0.3574, "step": 12141 }, { "epoch": 0.7285054298914022, "grad_norm": 1.378846526145935, "learning_rate": 1.268240493581136e-06, "loss": 0.4106, "step": 12142 }, { "epoch": 0.7285654286914262, "grad_norm": 1.222687005996704, "learning_rate": 1.2677165830220905e-06, "loss": 0.4021, "step": 12143 }, { "epoch": 0.7286254274914502, "grad_norm": 1.3985164165496826, "learning_rate": 1.2671927567662206e-06, "loss": 0.3794, "step": 12144 }, { "epoch": 0.7286854262914741, "grad_norm": 1.3270350694656372, "learning_rate": 1.266669014833306e-06, "loss": 0.3746, "step": 12145 }, { "epoch": 0.7287454250914982, "grad_norm": 1.3639596700668335, "learning_rate": 1.2661453572431306e-06, "loss": 0.3893, "step": 12146 }, { "epoch": 0.7288054238915221, "grad_norm": 1.3905600309371948, "learning_rate": 1.2656217840154654e-06, "loss": 0.3776, "step": 12147 }, { "epoch": 0.7288654226915462, "grad_norm": 1.3517673015594482, "learning_rate": 1.2650982951700861e-06, "loss": 0.4045, "step": 12148 }, { "epoch": 0.7289254214915701, "grad_norm": 1.3344682455062866, "learning_rate": 1.2645748907267614e-06, "loss": 0.4327, "step": 12149 }, { "epoch": 0.7289854202915942, "grad_norm": 1.406825065612793, "learning_rate": 1.2640515707052593e-06, "loss": 0.3402, "step": 12150 }, { "epoch": 0.7290454190916181, "grad_norm": 1.2764298915863037, "learning_rate": 1.263528335125343e-06, "loss": 0.3922, "step": 12151 }, { "epoch": 0.7291054178916422, "grad_norm": 1.1660124063491821, "learning_rate": 1.2630051840067703e-06, "loss": 0.3885, "step": 12152 }, { "epoch": 0.7291654166916661, "grad_norm": 1.231592059135437, "learning_rate": 1.2624821173693015e-06, "loss": 0.3885, "step": 12153 }, { "epoch": 0.7292254154916902, "grad_norm": 1.3717522621154785, "learning_rate": 1.2619591352326893e-06, "loss": 0.3637, "step": 12154 }, { "epoch": 0.7292854142917141, "grad_norm": 1.3790661096572876, "learning_rate": 1.2614362376166827e-06, "loss": 0.3803, "step": 12155 }, { "epoch": 0.7293454130917382, "grad_norm": 1.2109572887420654, "learning_rate": 1.2609134245410313e-06, "loss": 0.3929, "step": 12156 }, { "epoch": 0.7294054118917621, "grad_norm": 1.4118677377700806, "learning_rate": 1.2603906960254793e-06, "loss": 0.3644, "step": 12157 }, { "epoch": 0.7294654106917862, "grad_norm": 1.2966656684875488, "learning_rate": 1.2598680520897664e-06, "loss": 0.3984, "step": 12158 }, { "epoch": 0.7295254094918102, "grad_norm": 1.2701750993728638, "learning_rate": 1.2593454927536306e-06, "loss": 0.3833, "step": 12159 }, { "epoch": 0.7295854082918342, "grad_norm": 1.2879350185394287, "learning_rate": 1.258823018036808e-06, "loss": 0.3541, "step": 12160 }, { "epoch": 0.7296454070918582, "grad_norm": 1.3396241664886475, "learning_rate": 1.258300627959029e-06, "loss": 0.4136, "step": 12161 }, { "epoch": 0.7297054058918822, "grad_norm": 1.3249096870422363, "learning_rate": 1.2577783225400214e-06, "loss": 0.3851, "step": 12162 }, { "epoch": 0.7297654046919062, "grad_norm": 1.2698982954025269, "learning_rate": 1.2572561017995118e-06, "loss": 0.374, "step": 12163 }, { "epoch": 0.7298254034919301, "grad_norm": 1.305258870124817, "learning_rate": 1.2567339657572206e-06, "loss": 0.4375, "step": 12164 }, { "epoch": 0.7298854022919542, "grad_norm": 1.2931294441223145, "learning_rate": 1.2562119144328683e-06, "loss": 0.3604, "step": 12165 }, { "epoch": 0.7299454010919781, "grad_norm": 1.3499926328659058, "learning_rate": 1.255689947846169e-06, "loss": 0.3642, "step": 12166 }, { "epoch": 0.7300053998920022, "grad_norm": 1.2955905199050903, "learning_rate": 1.2551680660168344e-06, "loss": 0.3895, "step": 12167 }, { "epoch": 0.7300653986920261, "grad_norm": 1.3619014024734497, "learning_rate": 1.2546462689645754e-06, "loss": 0.3671, "step": 12168 }, { "epoch": 0.7301253974920502, "grad_norm": 1.347426414489746, "learning_rate": 1.2541245567090974e-06, "loss": 0.4018, "step": 12169 }, { "epoch": 0.7301853962920741, "grad_norm": 1.2909694910049438, "learning_rate": 1.2536029292701024e-06, "loss": 0.3754, "step": 12170 }, { "epoch": 0.7302453950920982, "grad_norm": 1.32370924949646, "learning_rate": 1.2530813866672893e-06, "loss": 0.3625, "step": 12171 }, { "epoch": 0.7303053938921221, "grad_norm": 1.264000654220581, "learning_rate": 1.2525599289203563e-06, "loss": 0.3512, "step": 12172 }, { "epoch": 0.7303653926921462, "grad_norm": 1.4956344366073608, "learning_rate": 1.2520385560489955e-06, "loss": 0.3765, "step": 12173 }, { "epoch": 0.7304253914921701, "grad_norm": 1.2864619493484497, "learning_rate": 1.2515172680728954e-06, "loss": 0.388, "step": 12174 }, { "epoch": 0.7304853902921942, "grad_norm": 1.431894063949585, "learning_rate": 1.2509960650117454e-06, "loss": 0.4263, "step": 12175 }, { "epoch": 0.7305453890922181, "grad_norm": 1.3542858362197876, "learning_rate": 1.2504749468852264e-06, "loss": 0.3793, "step": 12176 }, { "epoch": 0.7306053878922422, "grad_norm": 1.304276466369629, "learning_rate": 1.2499539137130223e-06, "loss": 0.3642, "step": 12177 }, { "epoch": 0.7306653866922661, "grad_norm": 1.3261065483093262, "learning_rate": 1.249432965514805e-06, "loss": 0.3996, "step": 12178 }, { "epoch": 0.7307253854922902, "grad_norm": 1.2219215631484985, "learning_rate": 1.2489121023102513e-06, "loss": 0.3798, "step": 12179 }, { "epoch": 0.7307853842923141, "grad_norm": 1.330866813659668, "learning_rate": 1.248391324119033e-06, "loss": 0.3575, "step": 12180 }, { "epoch": 0.7308453830923382, "grad_norm": 1.1948022842407227, "learning_rate": 1.2478706309608156e-06, "loss": 0.3472, "step": 12181 }, { "epoch": 0.7309053818923622, "grad_norm": 1.3515501022338867, "learning_rate": 1.2473500228552639e-06, "loss": 0.4196, "step": 12182 }, { "epoch": 0.7309653806923861, "grad_norm": 1.36686372756958, "learning_rate": 1.2468294998220375e-06, "loss": 0.3799, "step": 12183 }, { "epoch": 0.7310253794924102, "grad_norm": 1.3902499675750732, "learning_rate": 1.2463090618807971e-06, "loss": 0.3903, "step": 12184 }, { "epoch": 0.7310853782924341, "grad_norm": 1.3131128549575806, "learning_rate": 1.2457887090511953e-06, "loss": 0.3615, "step": 12185 }, { "epoch": 0.7311453770924582, "grad_norm": 1.2901438474655151, "learning_rate": 1.2452684413528825e-06, "loss": 0.3779, "step": 12186 }, { "epoch": 0.7312053758924821, "grad_norm": 1.3618401288986206, "learning_rate": 1.2447482588055095e-06, "loss": 0.3971, "step": 12187 }, { "epoch": 0.7312653746925062, "grad_norm": 1.4009615182876587, "learning_rate": 1.2442281614287203e-06, "loss": 0.3972, "step": 12188 }, { "epoch": 0.7313253734925301, "grad_norm": 1.319267749786377, "learning_rate": 1.2437081492421556e-06, "loss": 0.3835, "step": 12189 }, { "epoch": 0.7313853722925542, "grad_norm": 1.3473042249679565, "learning_rate": 1.2431882222654534e-06, "loss": 0.3958, "step": 12190 }, { "epoch": 0.7314453710925781, "grad_norm": 1.3320934772491455, "learning_rate": 1.24266838051825e-06, "loss": 0.3825, "step": 12191 }, { "epoch": 0.7315053698926022, "grad_norm": 1.3347259759902954, "learning_rate": 1.2421486240201798e-06, "loss": 0.3981, "step": 12192 }, { "epoch": 0.7315653686926261, "grad_norm": 1.4627209901809692, "learning_rate": 1.2416289527908673e-06, "loss": 0.4062, "step": 12193 }, { "epoch": 0.7316253674926502, "grad_norm": 1.3766169548034668, "learning_rate": 1.241109366849941e-06, "loss": 0.417, "step": 12194 }, { "epoch": 0.7316853662926741, "grad_norm": 1.2642085552215576, "learning_rate": 1.2405898662170214e-06, "loss": 0.3831, "step": 12195 }, { "epoch": 0.7317453650926982, "grad_norm": 1.32649827003479, "learning_rate": 1.24007045091173e-06, "loss": 0.3776, "step": 12196 }, { "epoch": 0.7318053638927221, "grad_norm": 1.3650137186050415, "learning_rate": 1.239551120953681e-06, "loss": 0.4003, "step": 12197 }, { "epoch": 0.7318653626927462, "grad_norm": 1.231453537940979, "learning_rate": 1.2390318763624868e-06, "loss": 0.3479, "step": 12198 }, { "epoch": 0.7319253614927701, "grad_norm": 1.3657995462417603, "learning_rate": 1.238512717157759e-06, "loss": 0.3289, "step": 12199 }, { "epoch": 0.7319853602927942, "grad_norm": 1.3403956890106201, "learning_rate": 1.2379936433591023e-06, "loss": 0.3647, "step": 12200 }, { "epoch": 0.7320453590928181, "grad_norm": 1.2185810804367065, "learning_rate": 1.2374746549861198e-06, "loss": 0.3678, "step": 12201 }, { "epoch": 0.7321053578928421, "grad_norm": 1.330371379852295, "learning_rate": 1.2369557520584105e-06, "loss": 0.3856, "step": 12202 }, { "epoch": 0.7321653566928661, "grad_norm": 1.3559045791625977, "learning_rate": 1.2364369345955735e-06, "loss": 0.4011, "step": 12203 }, { "epoch": 0.7322253554928901, "grad_norm": 1.4033125638961792, "learning_rate": 1.2359182026172004e-06, "loss": 0.3357, "step": 12204 }, { "epoch": 0.7322853542929142, "grad_norm": 1.4288475513458252, "learning_rate": 1.2353995561428808e-06, "loss": 0.3945, "step": 12205 }, { "epoch": 0.7323453530929381, "grad_norm": 1.1706126928329468, "learning_rate": 1.2348809951922034e-06, "loss": 0.3593, "step": 12206 }, { "epoch": 0.7324053518929622, "grad_norm": 1.3469347953796387, "learning_rate": 1.234362519784751e-06, "loss": 0.3694, "step": 12207 }, { "epoch": 0.7324653506929861, "grad_norm": 1.2990033626556396, "learning_rate": 1.2338441299401028e-06, "loss": 0.3534, "step": 12208 }, { "epoch": 0.7325253494930102, "grad_norm": 1.3968859910964966, "learning_rate": 1.2333258256778384e-06, "loss": 0.3815, "step": 12209 }, { "epoch": 0.7325853482930341, "grad_norm": 1.3382370471954346, "learning_rate": 1.2328076070175296e-06, "loss": 0.3621, "step": 12210 }, { "epoch": 0.7326453470930582, "grad_norm": 1.4318006038665771, "learning_rate": 1.2322894739787494e-06, "loss": 0.4321, "step": 12211 }, { "epoch": 0.7327053458930821, "grad_norm": 1.4725931882858276, "learning_rate": 1.2317714265810639e-06, "loss": 0.4318, "step": 12212 }, { "epoch": 0.7327653446931062, "grad_norm": 1.223565936088562, "learning_rate": 1.2312534648440363e-06, "loss": 0.3685, "step": 12213 }, { "epoch": 0.7328253434931301, "grad_norm": 1.3409860134124756, "learning_rate": 1.2307355887872301e-06, "loss": 0.3777, "step": 12214 }, { "epoch": 0.7328853422931542, "grad_norm": 1.3923652172088623, "learning_rate": 1.2302177984302026e-06, "loss": 0.3978, "step": 12215 }, { "epoch": 0.7329453410931781, "grad_norm": 1.2492581605911255, "learning_rate": 1.2297000937925072e-06, "loss": 0.3362, "step": 12216 }, { "epoch": 0.7330053398932022, "grad_norm": 1.3288450241088867, "learning_rate": 1.2291824748936947e-06, "loss": 0.3899, "step": 12217 }, { "epoch": 0.7330653386932261, "grad_norm": 1.2343229055404663, "learning_rate": 1.2286649417533157e-06, "loss": 0.3676, "step": 12218 }, { "epoch": 0.7331253374932502, "grad_norm": 1.30672025680542, "learning_rate": 1.2281474943909132e-06, "loss": 0.3945, "step": 12219 }, { "epoch": 0.7331853362932741, "grad_norm": 1.2642245292663574, "learning_rate": 1.2276301328260284e-06, "loss": 0.3611, "step": 12220 }, { "epoch": 0.7332453350932981, "grad_norm": 1.3209612369537354, "learning_rate": 1.227112857078202e-06, "loss": 0.357, "step": 12221 }, { "epoch": 0.7333053338933221, "grad_norm": 1.400585412979126, "learning_rate": 1.2265956671669662e-06, "loss": 0.3679, "step": 12222 }, { "epoch": 0.7333653326933461, "grad_norm": 1.2468149662017822, "learning_rate": 1.226078563111857e-06, "loss": 0.3248, "step": 12223 }, { "epoch": 0.7334253314933701, "grad_norm": 1.3036035299301147, "learning_rate": 1.2255615449323982e-06, "loss": 0.4075, "step": 12224 }, { "epoch": 0.7334853302933941, "grad_norm": 1.2697445154190063, "learning_rate": 1.2250446126481172e-06, "loss": 0.3721, "step": 12225 }, { "epoch": 0.7335453290934182, "grad_norm": 1.3685379028320312, "learning_rate": 1.224527766278538e-06, "loss": 0.348, "step": 12226 }, { "epoch": 0.7336053278934421, "grad_norm": 1.3128454685211182, "learning_rate": 1.2240110058431777e-06, "loss": 0.3618, "step": 12227 }, { "epoch": 0.7336653266934662, "grad_norm": 1.4265981912612915, "learning_rate": 1.2234943313615527e-06, "loss": 0.3797, "step": 12228 }, { "epoch": 0.7337253254934901, "grad_norm": 1.3011647462844849, "learning_rate": 1.222977742853174e-06, "loss": 0.3748, "step": 12229 }, { "epoch": 0.7337853242935142, "grad_norm": 1.2982479333877563, "learning_rate": 1.222461240337553e-06, "loss": 0.3489, "step": 12230 }, { "epoch": 0.7338453230935381, "grad_norm": 1.3016715049743652, "learning_rate": 1.2219448238341942e-06, "loss": 0.3664, "step": 12231 }, { "epoch": 0.7339053218935622, "grad_norm": 1.4031342267990112, "learning_rate": 1.2214284933625998e-06, "loss": 0.3927, "step": 12232 }, { "epoch": 0.7339653206935861, "grad_norm": 1.2655360698699951, "learning_rate": 1.2209122489422706e-06, "loss": 0.3895, "step": 12233 }, { "epoch": 0.7340253194936102, "grad_norm": 1.3617870807647705, "learning_rate": 1.2203960905927028e-06, "loss": 0.3543, "step": 12234 }, { "epoch": 0.7340853182936341, "grad_norm": 1.333296537399292, "learning_rate": 1.2198800183333887e-06, "loss": 0.4152, "step": 12235 }, { "epoch": 0.7341453170936582, "grad_norm": 1.3250577449798584, "learning_rate": 1.2193640321838168e-06, "loss": 0.3436, "step": 12236 }, { "epoch": 0.7342053158936821, "grad_norm": 1.4379417896270752, "learning_rate": 1.2188481321634752e-06, "loss": 0.3892, "step": 12237 }, { "epoch": 0.7342653146937062, "grad_norm": 1.3301628828048706, "learning_rate": 1.2183323182918488e-06, "loss": 0.4017, "step": 12238 }, { "epoch": 0.7343253134937301, "grad_norm": 1.3655120134353638, "learning_rate": 1.2178165905884129e-06, "loss": 0.3873, "step": 12239 }, { "epoch": 0.7343853122937541, "grad_norm": 1.3099058866500854, "learning_rate": 1.2173009490726487e-06, "loss": 0.3871, "step": 12240 }, { "epoch": 0.7344453110937781, "grad_norm": 1.2992281913757324, "learning_rate": 1.2167853937640258e-06, "loss": 0.3678, "step": 12241 }, { "epoch": 0.7345053098938021, "grad_norm": 1.3268803358078003, "learning_rate": 1.2162699246820191e-06, "loss": 0.3733, "step": 12242 }, { "epoch": 0.7345653086938261, "grad_norm": 1.2854727506637573, "learning_rate": 1.2157545418460898e-06, "loss": 0.3252, "step": 12243 }, { "epoch": 0.7346253074938501, "grad_norm": 1.4050962924957275, "learning_rate": 1.215239245275705e-06, "loss": 0.3972, "step": 12244 }, { "epoch": 0.7346853062938741, "grad_norm": 1.3617470264434814, "learning_rate": 1.2147240349903254e-06, "loss": 0.3623, "step": 12245 }, { "epoch": 0.7347453050938981, "grad_norm": 1.3451780080795288, "learning_rate": 1.2142089110094078e-06, "loss": 0.3836, "step": 12246 }, { "epoch": 0.7348053038939221, "grad_norm": 1.4353855848312378, "learning_rate": 1.213693873352405e-06, "loss": 0.4328, "step": 12247 }, { "epoch": 0.7348653026939461, "grad_norm": 1.2827504873275757, "learning_rate": 1.2131789220387673e-06, "loss": 0.3717, "step": 12248 }, { "epoch": 0.7349253014939702, "grad_norm": 1.2861725091934204, "learning_rate": 1.2126640570879443e-06, "loss": 0.3745, "step": 12249 }, { "epoch": 0.7349853002939941, "grad_norm": 1.3227633237838745, "learning_rate": 1.2121492785193784e-06, "loss": 0.3826, "step": 12250 }, { "epoch": 0.7350452990940182, "grad_norm": 1.37351393699646, "learning_rate": 1.21163458635251e-06, "loss": 0.4125, "step": 12251 }, { "epoch": 0.7351052978940421, "grad_norm": 1.3475826978683472, "learning_rate": 1.2111199806067784e-06, "loss": 0.4328, "step": 12252 }, { "epoch": 0.7351652966940662, "grad_norm": 1.3236442804336548, "learning_rate": 1.2106054613016173e-06, "loss": 0.3988, "step": 12253 }, { "epoch": 0.7352252954940901, "grad_norm": 1.4850263595581055, "learning_rate": 1.210091028456456e-06, "loss": 0.3652, "step": 12254 }, { "epoch": 0.7352852942941142, "grad_norm": 1.319264531135559, "learning_rate": 1.209576682090725e-06, "loss": 0.3751, "step": 12255 }, { "epoch": 0.7353452930941381, "grad_norm": 1.311742901802063, "learning_rate": 1.2090624222238467e-06, "loss": 0.38, "step": 12256 }, { "epoch": 0.7354052918941621, "grad_norm": 1.2442035675048828, "learning_rate": 1.208548248875244e-06, "loss": 0.3773, "step": 12257 }, { "epoch": 0.7354652906941861, "grad_norm": 1.1930071115493774, "learning_rate": 1.2080341620643346e-06, "loss": 0.3231, "step": 12258 }, { "epoch": 0.7355252894942101, "grad_norm": 1.2168084383010864, "learning_rate": 1.2075201618105322e-06, "loss": 0.3218, "step": 12259 }, { "epoch": 0.7355852882942341, "grad_norm": 1.2355931997299194, "learning_rate": 1.2070062481332479e-06, "loss": 0.3944, "step": 12260 }, { "epoch": 0.7356452870942581, "grad_norm": 1.2128537893295288, "learning_rate": 1.206492421051892e-06, "loss": 0.3391, "step": 12261 }, { "epoch": 0.7357052858942821, "grad_norm": 1.2898820638656616, "learning_rate": 1.2059786805858678e-06, "loss": 0.4254, "step": 12262 }, { "epoch": 0.7357652846943061, "grad_norm": 1.467195749282837, "learning_rate": 1.2054650267545767e-06, "loss": 0.3997, "step": 12263 }, { "epoch": 0.7358252834943301, "grad_norm": 1.5100277662277222, "learning_rate": 1.2049514595774182e-06, "loss": 0.3754, "step": 12264 }, { "epoch": 0.7358852822943541, "grad_norm": 1.3296464681625366, "learning_rate": 1.2044379790737875e-06, "loss": 0.3411, "step": 12265 }, { "epoch": 0.7359452810943781, "grad_norm": 1.222604513168335, "learning_rate": 1.203924585263074e-06, "loss": 0.3277, "step": 12266 }, { "epoch": 0.7360052798944021, "grad_norm": 1.3598623275756836, "learning_rate": 1.2034112781646694e-06, "loss": 0.3889, "step": 12267 }, { "epoch": 0.7360652786944261, "grad_norm": 1.269486427307129, "learning_rate": 1.2028980577979567e-06, "loss": 0.33, "step": 12268 }, { "epoch": 0.7361252774944501, "grad_norm": 1.1802564859390259, "learning_rate": 1.2023849241823205e-06, "loss": 0.3347, "step": 12269 }, { "epoch": 0.7361852762944741, "grad_norm": 1.229072093963623, "learning_rate": 1.201871877337136e-06, "loss": 0.3869, "step": 12270 }, { "epoch": 0.7362452750944981, "grad_norm": 1.3386926651000977, "learning_rate": 1.2013589172817816e-06, "loss": 0.3356, "step": 12271 }, { "epoch": 0.7363052738945222, "grad_norm": 1.2562083005905151, "learning_rate": 1.2008460440356272e-06, "loss": 0.3884, "step": 12272 }, { "epoch": 0.7363652726945461, "grad_norm": 1.316349983215332, "learning_rate": 1.2003332576180435e-06, "loss": 0.4304, "step": 12273 }, { "epoch": 0.7364252714945702, "grad_norm": 1.3096272945404053, "learning_rate": 1.1998205580483958e-06, "loss": 0.3479, "step": 12274 }, { "epoch": 0.7364852702945941, "grad_norm": 1.3000088930130005, "learning_rate": 1.1993079453460447e-06, "loss": 0.3812, "step": 12275 }, { "epoch": 0.7365452690946181, "grad_norm": 1.3668121099472046, "learning_rate": 1.198795419530352e-06, "loss": 0.3896, "step": 12276 }, { "epoch": 0.7366052678946421, "grad_norm": 1.3733563423156738, "learning_rate": 1.1982829806206718e-06, "loss": 0.4102, "step": 12277 }, { "epoch": 0.7366652666946661, "grad_norm": 1.380712628364563, "learning_rate": 1.1977706286363555e-06, "loss": 0.4101, "step": 12278 }, { "epoch": 0.7367252654946901, "grad_norm": 1.2790776491165161, "learning_rate": 1.197258363596755e-06, "loss": 0.3768, "step": 12279 }, { "epoch": 0.7367852642947141, "grad_norm": 1.3239338397979736, "learning_rate": 1.196746185521215e-06, "loss": 0.3463, "step": 12280 }, { "epoch": 0.7368452630947381, "grad_norm": 1.2762664556503296, "learning_rate": 1.1962340944290778e-06, "loss": 0.3342, "step": 12281 }, { "epoch": 0.7369052618947621, "grad_norm": 1.243072271347046, "learning_rate": 1.1957220903396815e-06, "loss": 0.4086, "step": 12282 }, { "epoch": 0.7369652606947861, "grad_norm": 1.2493164539337158, "learning_rate": 1.195210173272365e-06, "loss": 0.3214, "step": 12283 }, { "epoch": 0.7370252594948101, "grad_norm": 1.3247307538986206, "learning_rate": 1.19469834324646e-06, "loss": 0.3988, "step": 12284 }, { "epoch": 0.7370852582948341, "grad_norm": 1.2512046098709106, "learning_rate": 1.1941866002812937e-06, "loss": 0.3789, "step": 12285 }, { "epoch": 0.7371452570948581, "grad_norm": 1.2047538757324219, "learning_rate": 1.1936749443961961e-06, "loss": 0.3552, "step": 12286 }, { "epoch": 0.7372052558948821, "grad_norm": 1.6255444288253784, "learning_rate": 1.1931633756104868e-06, "loss": 0.4004, "step": 12287 }, { "epoch": 0.7372652546949061, "grad_norm": 1.260411024093628, "learning_rate": 1.1926518939434894e-06, "loss": 0.3518, "step": 12288 }, { "epoch": 0.73732525349493, "grad_norm": 1.248701810836792, "learning_rate": 1.192140499414515e-06, "loss": 0.3867, "step": 12289 }, { "epoch": 0.7373852522949541, "grad_norm": 1.2319622039794922, "learning_rate": 1.19162919204288e-06, "loss": 0.3796, "step": 12290 }, { "epoch": 0.737445251094978, "grad_norm": 1.4264367818832397, "learning_rate": 1.191117971847894e-06, "loss": 0.3626, "step": 12291 }, { "epoch": 0.7375052498950021, "grad_norm": 1.39593505859375, "learning_rate": 1.1906068388488635e-06, "loss": 0.3908, "step": 12292 }, { "epoch": 0.7375652486950262, "grad_norm": 1.3805426359176636, "learning_rate": 1.1900957930650908e-06, "loss": 0.3862, "step": 12293 }, { "epoch": 0.7376252474950501, "grad_norm": 1.1360869407653809, "learning_rate": 1.189584834515875e-06, "loss": 0.3475, "step": 12294 }, { "epoch": 0.7376852462950741, "grad_norm": 1.280441403388977, "learning_rate": 1.1890739632205147e-06, "loss": 0.3398, "step": 12295 }, { "epoch": 0.7377452450950981, "grad_norm": 1.3930575847625732, "learning_rate": 1.1885631791983028e-06, "loss": 0.3483, "step": 12296 }, { "epoch": 0.7378052438951221, "grad_norm": 1.2112411260604858, "learning_rate": 1.188052482468527e-06, "loss": 0.3503, "step": 12297 }, { "epoch": 0.7378652426951461, "grad_norm": 1.3561078310012817, "learning_rate": 1.1875418730504775e-06, "loss": 0.3573, "step": 12298 }, { "epoch": 0.7379252414951701, "grad_norm": 1.393053412437439, "learning_rate": 1.1870313509634354e-06, "loss": 0.3905, "step": 12299 }, { "epoch": 0.7379852402951941, "grad_norm": 1.3077280521392822, "learning_rate": 1.1865209162266813e-06, "loss": 0.3722, "step": 12300 }, { "epoch": 0.7380452390952181, "grad_norm": 1.4369152784347534, "learning_rate": 1.1860105688594915e-06, "loss": 0.3891, "step": 12301 }, { "epoch": 0.7381052378952421, "grad_norm": 1.2751622200012207, "learning_rate": 1.1855003088811391e-06, "loss": 0.3777, "step": 12302 }, { "epoch": 0.7381652366952661, "grad_norm": 1.243312954902649, "learning_rate": 1.184990136310897e-06, "loss": 0.3722, "step": 12303 }, { "epoch": 0.7382252354952901, "grad_norm": 1.3147151470184326, "learning_rate": 1.1844800511680302e-06, "loss": 0.4185, "step": 12304 }, { "epoch": 0.7382852342953141, "grad_norm": 1.2697654962539673, "learning_rate": 1.1839700534718021e-06, "loss": 0.3956, "step": 12305 }, { "epoch": 0.7383452330953381, "grad_norm": 1.2331242561340332, "learning_rate": 1.1834601432414725e-06, "loss": 0.3499, "step": 12306 }, { "epoch": 0.7384052318953621, "grad_norm": 1.381678581237793, "learning_rate": 1.1829503204963e-06, "loss": 0.3904, "step": 12307 }, { "epoch": 0.738465230695386, "grad_norm": 1.2258446216583252, "learning_rate": 1.1824405852555374e-06, "loss": 0.3401, "step": 12308 }, { "epoch": 0.7385252294954101, "grad_norm": 1.2897100448608398, "learning_rate": 1.1819309375384344e-06, "loss": 0.3665, "step": 12309 }, { "epoch": 0.738585228295434, "grad_norm": 1.4510506391525269, "learning_rate": 1.1814213773642396e-06, "loss": 0.3689, "step": 12310 }, { "epoch": 0.7386452270954581, "grad_norm": 1.2905772924423218, "learning_rate": 1.1809119047521961e-06, "loss": 0.3919, "step": 12311 }, { "epoch": 0.738705225895482, "grad_norm": 1.388047695159912, "learning_rate": 1.1804025197215443e-06, "loss": 0.3803, "step": 12312 }, { "epoch": 0.7387652246955061, "grad_norm": 1.3505491018295288, "learning_rate": 1.1798932222915198e-06, "loss": 0.3531, "step": 12313 }, { "epoch": 0.73882522349553, "grad_norm": 1.2749736309051514, "learning_rate": 1.179384012481358e-06, "loss": 0.384, "step": 12314 }, { "epoch": 0.7388852222955541, "grad_norm": 1.3468880653381348, "learning_rate": 1.1788748903102917e-06, "loss": 0.3643, "step": 12315 }, { "epoch": 0.7389452210955781, "grad_norm": 1.3671141862869263, "learning_rate": 1.1783658557975437e-06, "loss": 0.4018, "step": 12316 }, { "epoch": 0.7390052198956021, "grad_norm": 1.4278384447097778, "learning_rate": 1.177856908962341e-06, "loss": 0.3678, "step": 12317 }, { "epoch": 0.7390652186956261, "grad_norm": 1.3968346118927002, "learning_rate": 1.1773480498239018e-06, "loss": 0.3841, "step": 12318 }, { "epoch": 0.7391252174956501, "grad_norm": 1.4866150617599487, "learning_rate": 1.176839278401446e-06, "loss": 0.3464, "step": 12319 }, { "epoch": 0.7391852162956741, "grad_norm": 1.3798918724060059, "learning_rate": 1.1763305947141865e-06, "loss": 0.4457, "step": 12320 }, { "epoch": 0.7392452150956981, "grad_norm": 1.3836536407470703, "learning_rate": 1.1758219987813328e-06, "loss": 0.378, "step": 12321 }, { "epoch": 0.7393052138957221, "grad_norm": 1.411312222480774, "learning_rate": 1.1753134906220943e-06, "loss": 0.3804, "step": 12322 }, { "epoch": 0.7393652126957461, "grad_norm": 1.2688705921173096, "learning_rate": 1.174805070255674e-06, "loss": 0.3538, "step": 12323 }, { "epoch": 0.7394252114957701, "grad_norm": 1.3670263290405273, "learning_rate": 1.1742967377012728e-06, "loss": 0.384, "step": 12324 }, { "epoch": 0.7394852102957941, "grad_norm": 1.3880976438522339, "learning_rate": 1.1737884929780864e-06, "loss": 0.4106, "step": 12325 }, { "epoch": 0.7395452090958181, "grad_norm": 1.2483634948730469, "learning_rate": 1.173280336105312e-06, "loss": 0.3588, "step": 12326 }, { "epoch": 0.739605207895842, "grad_norm": 1.4260034561157227, "learning_rate": 1.1727722671021387e-06, "loss": 0.4139, "step": 12327 }, { "epoch": 0.7396652066958661, "grad_norm": 1.3173081874847412, "learning_rate": 1.1722642859877524e-06, "loss": 0.371, "step": 12328 }, { "epoch": 0.73972520549589, "grad_norm": 1.3167039155960083, "learning_rate": 1.1717563927813406e-06, "loss": 0.3547, "step": 12329 }, { "epoch": 0.7397852042959141, "grad_norm": 1.3214062452316284, "learning_rate": 1.1712485875020822e-06, "loss": 0.3666, "step": 12330 }, { "epoch": 0.739845203095938, "grad_norm": 1.2685823440551758, "learning_rate": 1.1707408701691536e-06, "loss": 0.3626, "step": 12331 }, { "epoch": 0.7399052018959621, "grad_norm": 1.4036812782287598, "learning_rate": 1.1702332408017318e-06, "loss": 0.3977, "step": 12332 }, { "epoch": 0.739965200695986, "grad_norm": 1.2731386423110962, "learning_rate": 1.1697256994189848e-06, "loss": 0.3241, "step": 12333 }, { "epoch": 0.7400251994960101, "grad_norm": 1.3195033073425293, "learning_rate": 1.1692182460400834e-06, "loss": 0.3745, "step": 12334 }, { "epoch": 0.740085198296034, "grad_norm": 1.2960559129714966, "learning_rate": 1.1687108806841877e-06, "loss": 0.3683, "step": 12335 }, { "epoch": 0.7401451970960581, "grad_norm": 1.2230815887451172, "learning_rate": 1.168203603370461e-06, "loss": 0.3784, "step": 12336 }, { "epoch": 0.740205195896082, "grad_norm": 1.3786529302597046, "learning_rate": 1.1676964141180615e-06, "loss": 0.4155, "step": 12337 }, { "epoch": 0.7402651946961061, "grad_norm": 1.1676067113876343, "learning_rate": 1.1671893129461422e-06, "loss": 0.3639, "step": 12338 }, { "epoch": 0.7403251934961301, "grad_norm": 1.4084662199020386, "learning_rate": 1.1666822998738547e-06, "loss": 0.3602, "step": 12339 }, { "epoch": 0.7403851922961541, "grad_norm": 1.3563339710235596, "learning_rate": 1.1661753749203447e-06, "loss": 0.3736, "step": 12340 }, { "epoch": 0.7404451910961781, "grad_norm": 1.3886120319366455, "learning_rate": 1.1656685381047597e-06, "loss": 0.3744, "step": 12341 }, { "epoch": 0.7405051898962021, "grad_norm": 1.3638049364089966, "learning_rate": 1.165161789446238e-06, "loss": 0.3762, "step": 12342 }, { "epoch": 0.7405651886962261, "grad_norm": 1.3836512565612793, "learning_rate": 1.1646551289639173e-06, "loss": 0.4234, "step": 12343 }, { "epoch": 0.7406251874962501, "grad_norm": 1.1819278001785278, "learning_rate": 1.1641485566769339e-06, "loss": 0.351, "step": 12344 }, { "epoch": 0.7406851862962741, "grad_norm": 1.2870761156082153, "learning_rate": 1.1636420726044157e-06, "loss": 0.3546, "step": 12345 }, { "epoch": 0.740745185096298, "grad_norm": 1.2760875225067139, "learning_rate": 1.1631356767654945e-06, "loss": 0.3158, "step": 12346 }, { "epoch": 0.7408051838963221, "grad_norm": 1.3843446969985962, "learning_rate": 1.1626293691792902e-06, "loss": 0.364, "step": 12347 }, { "epoch": 0.740865182696346, "grad_norm": 1.2186449766159058, "learning_rate": 1.1621231498649251e-06, "loss": 0.3833, "step": 12348 }, { "epoch": 0.7409251814963701, "grad_norm": 1.3512578010559082, "learning_rate": 1.1616170188415187e-06, "loss": 0.3901, "step": 12349 }, { "epoch": 0.740985180296394, "grad_norm": 1.2532392740249634, "learning_rate": 1.1611109761281836e-06, "loss": 0.3504, "step": 12350 }, { "epoch": 0.7410451790964181, "grad_norm": 1.3378474712371826, "learning_rate": 1.1606050217440313e-06, "loss": 0.4407, "step": 12351 }, { "epoch": 0.741105177896442, "grad_norm": 1.3034284114837646, "learning_rate": 1.1600991557081675e-06, "loss": 0.3659, "step": 12352 }, { "epoch": 0.7411651766964661, "grad_norm": 1.2647641897201538, "learning_rate": 1.1595933780396996e-06, "loss": 0.4287, "step": 12353 }, { "epoch": 0.74122517549649, "grad_norm": 1.3611034154891968, "learning_rate": 1.1590876887577267e-06, "loss": 0.3756, "step": 12354 }, { "epoch": 0.7412851742965141, "grad_norm": 1.360285758972168, "learning_rate": 1.1585820878813456e-06, "loss": 0.3918, "step": 12355 }, { "epoch": 0.741345173096538, "grad_norm": 1.3596992492675781, "learning_rate": 1.1580765754296526e-06, "loss": 0.3717, "step": 12356 }, { "epoch": 0.7414051718965621, "grad_norm": 1.419427514076233, "learning_rate": 1.157571151421738e-06, "loss": 0.4116, "step": 12357 }, { "epoch": 0.741465170696586, "grad_norm": 1.5766955614089966, "learning_rate": 1.1570658158766883e-06, "loss": 0.381, "step": 12358 }, { "epoch": 0.7415251694966101, "grad_norm": 1.3927415609359741, "learning_rate": 1.1565605688135876e-06, "loss": 0.4041, "step": 12359 }, { "epoch": 0.741585168296634, "grad_norm": 1.3661010265350342, "learning_rate": 1.1560554102515172e-06, "loss": 0.3858, "step": 12360 }, { "epoch": 0.7416451670966581, "grad_norm": 1.2807141542434692, "learning_rate": 1.1555503402095575e-06, "loss": 0.3949, "step": 12361 }, { "epoch": 0.7417051658966821, "grad_norm": 1.4717463254928589, "learning_rate": 1.155045358706778e-06, "loss": 0.3667, "step": 12362 }, { "epoch": 0.7417651646967061, "grad_norm": 1.337844967842102, "learning_rate": 1.1545404657622528e-06, "loss": 0.3549, "step": 12363 }, { "epoch": 0.7418251634967301, "grad_norm": 1.364505410194397, "learning_rate": 1.1540356613950468e-06, "loss": 0.3539, "step": 12364 }, { "epoch": 0.741885162296754, "grad_norm": 1.328317642211914, "learning_rate": 1.1535309456242285e-06, "loss": 0.3702, "step": 12365 }, { "epoch": 0.7419451610967781, "grad_norm": 1.4238063097000122, "learning_rate": 1.1530263184688533e-06, "loss": 0.3773, "step": 12366 }, { "epoch": 0.742005159896802, "grad_norm": 1.2553578615188599, "learning_rate": 1.1525217799479812e-06, "loss": 0.3853, "step": 12367 }, { "epoch": 0.7420651586968261, "grad_norm": 1.3852933645248413, "learning_rate": 1.1520173300806675e-06, "loss": 0.3917, "step": 12368 }, { "epoch": 0.74212515749685, "grad_norm": 1.316914677619934, "learning_rate": 1.1515129688859623e-06, "loss": 0.4079, "step": 12369 }, { "epoch": 0.7421851562968741, "grad_norm": 1.407149076461792, "learning_rate": 1.1510086963829123e-06, "loss": 0.3443, "step": 12370 }, { "epoch": 0.742245155096898, "grad_norm": 1.3666245937347412, "learning_rate": 1.1505045125905606e-06, "loss": 0.3692, "step": 12371 }, { "epoch": 0.7423051538969221, "grad_norm": 1.506783366203308, "learning_rate": 1.1500004175279503e-06, "loss": 0.4115, "step": 12372 }, { "epoch": 0.742365152696946, "grad_norm": 1.2452709674835205, "learning_rate": 1.1494964112141178e-06, "loss": 0.3551, "step": 12373 }, { "epoch": 0.7424251514969701, "grad_norm": 1.3120204210281372, "learning_rate": 1.148992493668096e-06, "loss": 0.3629, "step": 12374 }, { "epoch": 0.742485150296994, "grad_norm": 1.3214905261993408, "learning_rate": 1.1484886649089174e-06, "loss": 0.4164, "step": 12375 }, { "epoch": 0.7425451490970181, "grad_norm": 1.4067846536636353, "learning_rate": 1.1479849249556086e-06, "loss": 0.3816, "step": 12376 }, { "epoch": 0.742605147897042, "grad_norm": 1.4473576545715332, "learning_rate": 1.1474812738271924e-06, "loss": 0.4465, "step": 12377 }, { "epoch": 0.7426651466970661, "grad_norm": 1.3705261945724487, "learning_rate": 1.1469777115426916e-06, "loss": 0.355, "step": 12378 }, { "epoch": 0.74272514549709, "grad_norm": 1.3300539255142212, "learning_rate": 1.1464742381211211e-06, "loss": 0.396, "step": 12379 }, { "epoch": 0.7427851442971141, "grad_norm": 1.2148520946502686, "learning_rate": 1.145970853581498e-06, "loss": 0.3492, "step": 12380 }, { "epoch": 0.742845143097138, "grad_norm": 1.366515040397644, "learning_rate": 1.1454675579428288e-06, "loss": 0.4156, "step": 12381 }, { "epoch": 0.7429051418971621, "grad_norm": 1.363961100578308, "learning_rate": 1.1449643512241237e-06, "loss": 0.3714, "step": 12382 }, { "epoch": 0.7429651406971861, "grad_norm": 1.3989551067352295, "learning_rate": 1.1444612334443843e-06, "loss": 0.3653, "step": 12383 }, { "epoch": 0.74302513949721, "grad_norm": 1.3502098321914673, "learning_rate": 1.1439582046226137e-06, "loss": 0.3969, "step": 12384 }, { "epoch": 0.7430851382972341, "grad_norm": 1.2482808828353882, "learning_rate": 1.1434552647778076e-06, "loss": 0.3541, "step": 12385 }, { "epoch": 0.743145137097258, "grad_norm": 1.3137786388397217, "learning_rate": 1.1429524139289586e-06, "loss": 0.3533, "step": 12386 }, { "epoch": 0.7432051358972821, "grad_norm": 1.3377214670181274, "learning_rate": 1.1424496520950593e-06, "loss": 0.4013, "step": 12387 }, { "epoch": 0.743265134697306, "grad_norm": 1.3277690410614014, "learning_rate": 1.1419469792950956e-06, "loss": 0.3486, "step": 12388 }, { "epoch": 0.7433251334973301, "grad_norm": 1.2441807985305786, "learning_rate": 1.1414443955480506e-06, "loss": 0.3722, "step": 12389 }, { "epoch": 0.743385132297354, "grad_norm": 1.272653579711914, "learning_rate": 1.1409419008729057e-06, "loss": 0.3495, "step": 12390 }, { "epoch": 0.7434451310973781, "grad_norm": 1.1249974966049194, "learning_rate": 1.1404394952886368e-06, "loss": 0.3257, "step": 12391 }, { "epoch": 0.743505129897402, "grad_norm": 1.3737072944641113, "learning_rate": 1.1399371788142196e-06, "loss": 0.3593, "step": 12392 }, { "epoch": 0.7435651286974261, "grad_norm": 1.3261077404022217, "learning_rate": 1.1394349514686209e-06, "loss": 0.3571, "step": 12393 }, { "epoch": 0.74362512749745, "grad_norm": 1.3444479703903198, "learning_rate": 1.1389328132708105e-06, "loss": 0.3794, "step": 12394 }, { "epoch": 0.7436851262974741, "grad_norm": 1.3663156032562256, "learning_rate": 1.1384307642397495e-06, "loss": 0.3835, "step": 12395 }, { "epoch": 0.743745125097498, "grad_norm": 1.2550710439682007, "learning_rate": 1.1379288043944008e-06, "loss": 0.3808, "step": 12396 }, { "epoch": 0.7438051238975221, "grad_norm": 1.3168352842330933, "learning_rate": 1.137426933753719e-06, "loss": 0.3412, "step": 12397 }, { "epoch": 0.743865122697546, "grad_norm": 1.380474328994751, "learning_rate": 1.136925152336657e-06, "loss": 0.4172, "step": 12398 }, { "epoch": 0.7439251214975701, "grad_norm": 1.3474154472351074, "learning_rate": 1.1364234601621674e-06, "loss": 0.4028, "step": 12399 }, { "epoch": 0.743985120297594, "grad_norm": 1.3763056993484497, "learning_rate": 1.135921857249195e-06, "loss": 0.3813, "step": 12400 }, { "epoch": 0.7440451190976181, "grad_norm": 1.3759565353393555, "learning_rate": 1.1354203436166823e-06, "loss": 0.3722, "step": 12401 }, { "epoch": 0.744105117897642, "grad_norm": 1.3301928043365479, "learning_rate": 1.1349189192835712e-06, "loss": 0.3717, "step": 12402 }, { "epoch": 0.744165116697666, "grad_norm": 1.3157567977905273, "learning_rate": 1.1344175842687974e-06, "loss": 0.3772, "step": 12403 }, { "epoch": 0.74422511549769, "grad_norm": 1.3053431510925293, "learning_rate": 1.1339163385912938e-06, "loss": 0.3772, "step": 12404 }, { "epoch": 0.744285114297714, "grad_norm": 1.439972996711731, "learning_rate": 1.1334151822699895e-06, "loss": 0.3651, "step": 12405 }, { "epoch": 0.7443451130977381, "grad_norm": 1.389949083328247, "learning_rate": 1.1329141153238123e-06, "loss": 0.3526, "step": 12406 }, { "epoch": 0.744405111897762, "grad_norm": 1.4236090183258057, "learning_rate": 1.1324131377716843e-06, "loss": 0.3838, "step": 12407 }, { "epoch": 0.7444651106977861, "grad_norm": 1.378193974494934, "learning_rate": 1.1319122496325246e-06, "loss": 0.3409, "step": 12408 }, { "epoch": 0.74452510949781, "grad_norm": 1.239804744720459, "learning_rate": 1.1314114509252513e-06, "loss": 0.3497, "step": 12409 }, { "epoch": 0.7445851082978341, "grad_norm": 1.3184244632720947, "learning_rate": 1.1309107416687754e-06, "loss": 0.385, "step": 12410 }, { "epoch": 0.744645107097858, "grad_norm": 1.3027331829071045, "learning_rate": 1.1304101218820093e-06, "loss": 0.373, "step": 12411 }, { "epoch": 0.7447051058978821, "grad_norm": 1.3951046466827393, "learning_rate": 1.1299095915838549e-06, "loss": 0.431, "step": 12412 }, { "epoch": 0.744765104697906, "grad_norm": 1.4204151630401611, "learning_rate": 1.1294091507932168e-06, "loss": 0.3891, "step": 12413 }, { "epoch": 0.7448251034979301, "grad_norm": 1.352716326713562, "learning_rate": 1.128908799528996e-06, "loss": 0.3588, "step": 12414 }, { "epoch": 0.744885102297954, "grad_norm": 1.3057729005813599, "learning_rate": 1.1284085378100873e-06, "loss": 0.4227, "step": 12415 }, { "epoch": 0.7449451010979781, "grad_norm": 1.3346235752105713, "learning_rate": 1.127908365655383e-06, "loss": 0.366, "step": 12416 }, { "epoch": 0.745005099898002, "grad_norm": 1.4992340803146362, "learning_rate": 1.1274082830837716e-06, "loss": 0.375, "step": 12417 }, { "epoch": 0.7450650986980261, "grad_norm": 1.3467609882354736, "learning_rate": 1.1269082901141408e-06, "loss": 0.3902, "step": 12418 }, { "epoch": 0.74512509749805, "grad_norm": 1.487371802330017, "learning_rate": 1.126408386765372e-06, "loss": 0.3736, "step": 12419 }, { "epoch": 0.7451850962980741, "grad_norm": 1.3809928894042969, "learning_rate": 1.1259085730563435e-06, "loss": 0.4081, "step": 12420 }, { "epoch": 0.745245095098098, "grad_norm": 1.471104621887207, "learning_rate": 1.125408849005933e-06, "loss": 0.4112, "step": 12421 }, { "epoch": 0.745305093898122, "grad_norm": 1.3564354181289673, "learning_rate": 1.1249092146330114e-06, "loss": 0.363, "step": 12422 }, { "epoch": 0.745365092698146, "grad_norm": 1.3577502965927124, "learning_rate": 1.1244096699564482e-06, "loss": 0.3621, "step": 12423 }, { "epoch": 0.74542509149817, "grad_norm": 1.286658525466919, "learning_rate": 1.1239102149951072e-06, "loss": 0.3367, "step": 12424 }, { "epoch": 0.745485090298194, "grad_norm": 1.3973326683044434, "learning_rate": 1.1234108497678519e-06, "loss": 0.4095, "step": 12425 }, { "epoch": 0.745545089098218, "grad_norm": 1.3983376026153564, "learning_rate": 1.1229115742935439e-06, "loss": 0.3648, "step": 12426 }, { "epoch": 0.745605087898242, "grad_norm": 1.3425740003585815, "learning_rate": 1.122412388591033e-06, "loss": 0.3723, "step": 12427 }, { "epoch": 0.745665086698266, "grad_norm": 1.289178729057312, "learning_rate": 1.1219132926791749e-06, "loss": 0.3325, "step": 12428 }, { "epoch": 0.7457250854982901, "grad_norm": 1.40434992313385, "learning_rate": 1.1214142865768164e-06, "loss": 0.3927, "step": 12429 }, { "epoch": 0.745785084298314, "grad_norm": 1.3019214868545532, "learning_rate": 1.1209153703028043e-06, "loss": 0.397, "step": 12430 }, { "epoch": 0.7458450830983381, "grad_norm": 1.3045977354049683, "learning_rate": 1.12041654387598e-06, "loss": 0.3786, "step": 12431 }, { "epoch": 0.745905081898362, "grad_norm": 1.247591495513916, "learning_rate": 1.11991780731518e-06, "loss": 0.4061, "step": 12432 }, { "epoch": 0.7459650806983861, "grad_norm": 1.2367184162139893, "learning_rate": 1.119419160639242e-06, "loss": 0.3663, "step": 12433 }, { "epoch": 0.74602507949841, "grad_norm": 1.3644540309906006, "learning_rate": 1.1189206038669965e-06, "loss": 0.3688, "step": 12434 }, { "epoch": 0.7460850782984341, "grad_norm": 1.4684786796569824, "learning_rate": 1.1184221370172717e-06, "loss": 0.3713, "step": 12435 }, { "epoch": 0.746145077098458, "grad_norm": 1.3390215635299683, "learning_rate": 1.1179237601088913e-06, "loss": 0.3911, "step": 12436 }, { "epoch": 0.7462050758984821, "grad_norm": 1.2791475057601929, "learning_rate": 1.1174254731606776e-06, "loss": 0.3662, "step": 12437 }, { "epoch": 0.746265074698506, "grad_norm": 1.365699052810669, "learning_rate": 1.1169272761914507e-06, "loss": 0.4156, "step": 12438 }, { "epoch": 0.7463250734985301, "grad_norm": 1.3185089826583862, "learning_rate": 1.1164291692200215e-06, "loss": 0.3897, "step": 12439 }, { "epoch": 0.746385072298554, "grad_norm": 1.3269649744033813, "learning_rate": 1.1159311522652042e-06, "loss": 0.3615, "step": 12440 }, { "epoch": 0.746445071098578, "grad_norm": 1.270340085029602, "learning_rate": 1.1154332253458042e-06, "loss": 0.3553, "step": 12441 }, { "epoch": 0.746505069898602, "grad_norm": 1.4386411905288696, "learning_rate": 1.1149353884806283e-06, "loss": 0.3592, "step": 12442 }, { "epoch": 0.746565068698626, "grad_norm": 1.311416506767273, "learning_rate": 1.1144376416884766e-06, "loss": 0.3823, "step": 12443 }, { "epoch": 0.74662506749865, "grad_norm": 1.2997759580612183, "learning_rate": 1.1139399849881455e-06, "loss": 0.3459, "step": 12444 }, { "epoch": 0.746685066298674, "grad_norm": 1.2689056396484375, "learning_rate": 1.1134424183984313e-06, "loss": 0.3949, "step": 12445 }, { "epoch": 0.746745065098698, "grad_norm": 1.2772446870803833, "learning_rate": 1.1129449419381235e-06, "loss": 0.3669, "step": 12446 }, { "epoch": 0.746805063898722, "grad_norm": 1.290989875793457, "learning_rate": 1.1124475556260094e-06, "loss": 0.3741, "step": 12447 }, { "epoch": 0.746865062698746, "grad_norm": 1.400344967842102, "learning_rate": 1.1119502594808744e-06, "loss": 0.406, "step": 12448 }, { "epoch": 0.74692506149877, "grad_norm": 1.255620002746582, "learning_rate": 1.111453053521498e-06, "loss": 0.3552, "step": 12449 }, { "epoch": 0.7469850602987941, "grad_norm": 1.510749340057373, "learning_rate": 1.1109559377666575e-06, "loss": 0.3758, "step": 12450 }, { "epoch": 0.747045059098818, "grad_norm": 1.2829569578170776, "learning_rate": 1.1104589122351258e-06, "loss": 0.3669, "step": 12451 }, { "epoch": 0.7471050578988421, "grad_norm": 1.4085288047790527, "learning_rate": 1.1099619769456753e-06, "loss": 0.3869, "step": 12452 }, { "epoch": 0.747165056698866, "grad_norm": 1.2172181606292725, "learning_rate": 1.109465131917072e-06, "loss": 0.3476, "step": 12453 }, { "epoch": 0.7472250554988901, "grad_norm": 1.378688931465149, "learning_rate": 1.1089683771680783e-06, "loss": 0.429, "step": 12454 }, { "epoch": 0.747285054298914, "grad_norm": 1.4113320112228394, "learning_rate": 1.1084717127174567e-06, "loss": 0.3667, "step": 12455 }, { "epoch": 0.7473450530989381, "grad_norm": 1.3103349208831787, "learning_rate": 1.1079751385839613e-06, "loss": 0.3415, "step": 12456 }, { "epoch": 0.747405051898962, "grad_norm": 1.3015048503875732, "learning_rate": 1.1074786547863492e-06, "loss": 0.3917, "step": 12457 }, { "epoch": 0.747465050698986, "grad_norm": 1.3829082250595093, "learning_rate": 1.106982261343366e-06, "loss": 0.3663, "step": 12458 }, { "epoch": 0.74752504949901, "grad_norm": 1.4656184911727905, "learning_rate": 1.10648595827376e-06, "loss": 0.3827, "step": 12459 }, { "epoch": 0.747585048299034, "grad_norm": 1.306704044342041, "learning_rate": 1.1059897455962757e-06, "loss": 0.363, "step": 12460 }, { "epoch": 0.747645047099058, "grad_norm": 1.3722909688949585, "learning_rate": 1.1054936233296518e-06, "loss": 0.4414, "step": 12461 }, { "epoch": 0.747705045899082, "grad_norm": 1.2116416692733765, "learning_rate": 1.1049975914926245e-06, "loss": 0.3502, "step": 12462 }, { "epoch": 0.747765044699106, "grad_norm": 1.2447311878204346, "learning_rate": 1.1045016501039253e-06, "loss": 0.3664, "step": 12463 }, { "epoch": 0.74782504349913, "grad_norm": 1.2764687538146973, "learning_rate": 1.1040057991822856e-06, "loss": 0.407, "step": 12464 }, { "epoch": 0.747885042299154, "grad_norm": 1.3791216611862183, "learning_rate": 1.1035100387464314e-06, "loss": 0.3848, "step": 12465 }, { "epoch": 0.747945041099178, "grad_norm": 1.3589571714401245, "learning_rate": 1.1030143688150832e-06, "loss": 0.3926, "step": 12466 }, { "epoch": 0.748005039899202, "grad_norm": 1.454904556274414, "learning_rate": 1.1025187894069626e-06, "loss": 0.3893, "step": 12467 }, { "epoch": 0.748065038699226, "grad_norm": 1.3673430681228638, "learning_rate": 1.1020233005407846e-06, "loss": 0.4156, "step": 12468 }, { "epoch": 0.74812503749925, "grad_norm": 1.2816509008407593, "learning_rate": 1.1015279022352617e-06, "loss": 0.3426, "step": 12469 }, { "epoch": 0.748185036299274, "grad_norm": 1.1961669921875, "learning_rate": 1.101032594509101e-06, "loss": 0.3308, "step": 12470 }, { "epoch": 0.748245035099298, "grad_norm": 1.182159662246704, "learning_rate": 1.1005373773810094e-06, "loss": 0.3677, "step": 12471 }, { "epoch": 0.748305033899322, "grad_norm": 1.419730305671692, "learning_rate": 1.1000422508696909e-06, "loss": 0.3948, "step": 12472 }, { "epoch": 0.7483650326993461, "grad_norm": 1.232313871383667, "learning_rate": 1.0995472149938417e-06, "loss": 0.4313, "step": 12473 }, { "epoch": 0.74842503149937, "grad_norm": 1.3199853897094727, "learning_rate": 1.0990522697721584e-06, "loss": 0.3907, "step": 12474 }, { "epoch": 0.7484850302993941, "grad_norm": 1.3295081853866577, "learning_rate": 1.0985574152233304e-06, "loss": 0.3674, "step": 12475 }, { "epoch": 0.748545029099418, "grad_norm": 1.386242151260376, "learning_rate": 1.0980626513660494e-06, "loss": 0.3687, "step": 12476 }, { "epoch": 0.748605027899442, "grad_norm": 1.3073142766952515, "learning_rate": 1.0975679782189989e-06, "loss": 0.3759, "step": 12477 }, { "epoch": 0.748665026699466, "grad_norm": 1.3065986633300781, "learning_rate": 1.0970733958008593e-06, "loss": 0.3831, "step": 12478 }, { "epoch": 0.74872502549949, "grad_norm": 1.206452488899231, "learning_rate": 1.0965789041303109e-06, "loss": 0.3663, "step": 12479 }, { "epoch": 0.748785024299514, "grad_norm": 1.3553990125656128, "learning_rate": 1.096084503226027e-06, "loss": 0.3872, "step": 12480 }, { "epoch": 0.748845023099538, "grad_norm": 1.3656383752822876, "learning_rate": 1.0955901931066794e-06, "loss": 0.4034, "step": 12481 }, { "epoch": 0.748905021899562, "grad_norm": 1.3552658557891846, "learning_rate": 1.0950959737909348e-06, "loss": 0.3371, "step": 12482 }, { "epoch": 0.748965020699586, "grad_norm": 1.3457506895065308, "learning_rate": 1.0946018452974584e-06, "loss": 0.3501, "step": 12483 }, { "epoch": 0.74902501949961, "grad_norm": 1.2852003574371338, "learning_rate": 1.0941078076449136e-06, "loss": 0.3732, "step": 12484 }, { "epoch": 0.749085018299634, "grad_norm": 1.4981855154037476, "learning_rate": 1.0936138608519537e-06, "loss": 0.3693, "step": 12485 }, { "epoch": 0.749145017099658, "grad_norm": 1.6127748489379883, "learning_rate": 1.0931200049372357e-06, "loss": 0.42, "step": 12486 }, { "epoch": 0.749205015899682, "grad_norm": 1.4686404466629028, "learning_rate": 1.0926262399194082e-06, "loss": 0.3835, "step": 12487 }, { "epoch": 0.749265014699706, "grad_norm": 1.268518090248108, "learning_rate": 1.0921325658171208e-06, "loss": 0.3678, "step": 12488 }, { "epoch": 0.74932501349973, "grad_norm": 1.2930692434310913, "learning_rate": 1.0916389826490161e-06, "loss": 0.3759, "step": 12489 }, { "epoch": 0.749385012299754, "grad_norm": 1.1942261457443237, "learning_rate": 1.0911454904337335e-06, "loss": 0.3575, "step": 12490 }, { "epoch": 0.749445011099778, "grad_norm": 1.273660659790039, "learning_rate": 1.0906520891899124e-06, "loss": 0.3169, "step": 12491 }, { "epoch": 0.749505009899802, "grad_norm": 1.4371132850646973, "learning_rate": 1.0901587789361846e-06, "loss": 0.3568, "step": 12492 }, { "epoch": 0.749565008699826, "grad_norm": 1.252063274383545, "learning_rate": 1.0896655596911806e-06, "loss": 0.3725, "step": 12493 }, { "epoch": 0.74962500749985, "grad_norm": 1.302417278289795, "learning_rate": 1.0891724314735259e-06, "loss": 0.3752, "step": 12494 }, { "epoch": 0.749685006299874, "grad_norm": 1.320797324180603, "learning_rate": 1.0886793943018458e-06, "loss": 0.3572, "step": 12495 }, { "epoch": 0.749745005099898, "grad_norm": 1.340394377708435, "learning_rate": 1.088186448194759e-06, "loss": 0.385, "step": 12496 }, { "epoch": 0.749805003899922, "grad_norm": 1.397350549697876, "learning_rate": 1.0876935931708808e-06, "loss": 0.4065, "step": 12497 }, { "epoch": 0.749865002699946, "grad_norm": 1.3607759475708008, "learning_rate": 1.0872008292488262e-06, "loss": 0.4034, "step": 12498 }, { "epoch": 0.74992500149997, "grad_norm": 1.2813029289245605, "learning_rate": 1.0867081564472038e-06, "loss": 0.3771, "step": 12499 }, { "epoch": 0.749985000299994, "grad_norm": 1.4363412857055664, "learning_rate": 1.0862155747846179e-06, "loss": 0.438, "step": 12500 }, { "epoch": 0.750044999100018, "grad_norm": 1.5306593179702759, "learning_rate": 1.085723084279674e-06, "loss": 0.3606, "step": 12501 }, { "epoch": 0.750104997900042, "grad_norm": 1.230961561203003, "learning_rate": 1.0852306849509687e-06, "loss": 0.336, "step": 12502 }, { "epoch": 0.750164996700066, "grad_norm": 1.3496726751327515, "learning_rate": 1.0847383768171008e-06, "loss": 0.3352, "step": 12503 }, { "epoch": 0.75022499550009, "grad_norm": 1.4279721975326538, "learning_rate": 1.0842461598966583e-06, "loss": 0.3814, "step": 12504 }, { "epoch": 0.750284994300114, "grad_norm": 1.5119259357452393, "learning_rate": 1.0837540342082335e-06, "loss": 0.4139, "step": 12505 }, { "epoch": 0.750344993100138, "grad_norm": 1.5480525493621826, "learning_rate": 1.0832619997704093e-06, "loss": 0.3628, "step": 12506 }, { "epoch": 0.750404991900162, "grad_norm": 1.4718233346939087, "learning_rate": 1.0827700566017695e-06, "loss": 0.3967, "step": 12507 }, { "epoch": 0.750464990700186, "grad_norm": 1.3533656597137451, "learning_rate": 1.082278204720892e-06, "loss": 0.381, "step": 12508 }, { "epoch": 0.75052498950021, "grad_norm": 1.2571332454681396, "learning_rate": 1.0817864441463507e-06, "loss": 0.4046, "step": 12509 }, { "epoch": 0.750584988300234, "grad_norm": 1.27544105052948, "learning_rate": 1.081294774896719e-06, "loss": 0.3738, "step": 12510 }, { "epoch": 0.750644987100258, "grad_norm": 1.308336615562439, "learning_rate": 1.080803196990564e-06, "loss": 0.3504, "step": 12511 }, { "epoch": 0.750704985900282, "grad_norm": 1.4036723375320435, "learning_rate": 1.0803117104464494e-06, "loss": 0.3408, "step": 12512 }, { "epoch": 0.750764984700306, "grad_norm": 1.258279800415039, "learning_rate": 1.0798203152829386e-06, "loss": 0.3693, "step": 12513 }, { "epoch": 0.75082498350033, "grad_norm": 1.2515206336975098, "learning_rate": 1.079329011518588e-06, "loss": 0.3875, "step": 12514 }, { "epoch": 0.7508849823003539, "grad_norm": 1.4888290166854858, "learning_rate": 1.078837799171952e-06, "loss": 0.3913, "step": 12515 }, { "epoch": 0.750944981100378, "grad_norm": 1.377907156944275, "learning_rate": 1.0783466782615805e-06, "loss": 0.3418, "step": 12516 }, { "epoch": 0.7510049799004019, "grad_norm": 1.3488739728927612, "learning_rate": 1.0778556488060231e-06, "loss": 0.4086, "step": 12517 }, { "epoch": 0.751064978700426, "grad_norm": 1.5948572158813477, "learning_rate": 1.0773647108238216e-06, "loss": 0.4134, "step": 12518 }, { "epoch": 0.75112497750045, "grad_norm": 1.2184864282608032, "learning_rate": 1.0768738643335185e-06, "loss": 0.389, "step": 12519 }, { "epoch": 0.751184976300474, "grad_norm": 1.2306164503097534, "learning_rate": 1.0763831093536496e-06, "loss": 0.4021, "step": 12520 }, { "epoch": 0.751244975100498, "grad_norm": 1.408740520477295, "learning_rate": 1.075892445902748e-06, "loss": 0.3852, "step": 12521 }, { "epoch": 0.751304973900522, "grad_norm": 1.2114275693893433, "learning_rate": 1.0754018739993452e-06, "loss": 0.3224, "step": 12522 }, { "epoch": 0.751364972700546, "grad_norm": 1.2828335762023926, "learning_rate": 1.0749113936619675e-06, "loss": 0.3924, "step": 12523 }, { "epoch": 0.75142497150057, "grad_norm": 1.2144176959991455, "learning_rate": 1.0744210049091363e-06, "loss": 0.3868, "step": 12524 }, { "epoch": 0.751484970300594, "grad_norm": 1.2621864080429077, "learning_rate": 1.0739307077593745e-06, "loss": 0.3864, "step": 12525 }, { "epoch": 0.751544969100618, "grad_norm": 1.38970947265625, "learning_rate": 1.073440502231196e-06, "loss": 0.3884, "step": 12526 }, { "epoch": 0.751604967900642, "grad_norm": 1.557480812072754, "learning_rate": 1.072950388343115e-06, "loss": 0.3979, "step": 12527 }, { "epoch": 0.751664966700666, "grad_norm": 1.4967012405395508, "learning_rate": 1.072460366113639e-06, "loss": 0.3783, "step": 12528 }, { "epoch": 0.75172496550069, "grad_norm": 1.2980194091796875, "learning_rate": 1.0719704355612748e-06, "loss": 0.3671, "step": 12529 }, { "epoch": 0.751784964300714, "grad_norm": 1.3135398626327515, "learning_rate": 1.0714805967045274e-06, "loss": 0.3744, "step": 12530 }, { "epoch": 0.751844963100738, "grad_norm": 1.2790417671203613, "learning_rate": 1.0709908495618917e-06, "loss": 0.3867, "step": 12531 }, { "epoch": 0.751904961900762, "grad_norm": 1.2172682285308838, "learning_rate": 1.070501194151866e-06, "loss": 0.3386, "step": 12532 }, { "epoch": 0.751964960700786, "grad_norm": 1.4070795774459839, "learning_rate": 1.0700116304929405e-06, "loss": 0.3839, "step": 12533 }, { "epoch": 0.7520249595008099, "grad_norm": 1.4262495040893555, "learning_rate": 1.0695221586036067e-06, "loss": 0.3773, "step": 12534 }, { "epoch": 0.752084958300834, "grad_norm": 1.2570247650146484, "learning_rate": 1.0690327785023455e-06, "loss": 0.3793, "step": 12535 }, { "epoch": 0.7521449571008579, "grad_norm": 1.328294277191162, "learning_rate": 1.0685434902076408e-06, "loss": 0.3935, "step": 12536 }, { "epoch": 0.752204955900882, "grad_norm": 1.3671129941940308, "learning_rate": 1.0680542937379718e-06, "loss": 0.3869, "step": 12537 }, { "epoch": 0.7522649547009059, "grad_norm": 1.303458333015442, "learning_rate": 1.0675651891118124e-06, "loss": 0.397, "step": 12538 }, { "epoch": 0.75232495350093, "grad_norm": 1.4077919721603394, "learning_rate": 1.0670761763476339e-06, "loss": 0.3742, "step": 12539 }, { "epoch": 0.752384952300954, "grad_norm": 1.4579428434371948, "learning_rate": 1.0665872554639025e-06, "loss": 0.3223, "step": 12540 }, { "epoch": 0.752444951100978, "grad_norm": 1.1985163688659668, "learning_rate": 1.0660984264790849e-06, "loss": 0.3748, "step": 12541 }, { "epoch": 0.752504949901002, "grad_norm": 1.3848479986190796, "learning_rate": 1.0656096894116408e-06, "loss": 0.4092, "step": 12542 }, { "epoch": 0.752564948701026, "grad_norm": 1.1969760656356812, "learning_rate": 1.0651210442800268e-06, "loss": 0.3925, "step": 12543 }, { "epoch": 0.75262494750105, "grad_norm": 1.2559003829956055, "learning_rate": 1.0646324911026985e-06, "loss": 0.3326, "step": 12544 }, { "epoch": 0.752684946301074, "grad_norm": 1.3583663702011108, "learning_rate": 1.064144029898106e-06, "loss": 0.3649, "step": 12545 }, { "epoch": 0.752744945101098, "grad_norm": 1.23468816280365, "learning_rate": 1.0636556606846954e-06, "loss": 0.345, "step": 12546 }, { "epoch": 0.752804943901122, "grad_norm": 1.3334763050079346, "learning_rate": 1.0631673834809095e-06, "loss": 0.36, "step": 12547 }, { "epoch": 0.752864942701146, "grad_norm": 1.3402032852172852, "learning_rate": 1.0626791983051895e-06, "loss": 0.377, "step": 12548 }, { "epoch": 0.75292494150117, "grad_norm": 1.2853999137878418, "learning_rate": 1.0621911051759734e-06, "loss": 0.3508, "step": 12549 }, { "epoch": 0.752984940301194, "grad_norm": 1.3873035907745361, "learning_rate": 1.0617031041116912e-06, "loss": 0.3873, "step": 12550 }, { "epoch": 0.753044939101218, "grad_norm": 1.2828742265701294, "learning_rate": 1.0612151951307743e-06, "loss": 0.4161, "step": 12551 }, { "epoch": 0.753104937901242, "grad_norm": 1.262709140777588, "learning_rate": 1.060727378251648e-06, "loss": 0.3441, "step": 12552 }, { "epoch": 0.7531649367012659, "grad_norm": 1.2592976093292236, "learning_rate": 1.0602396534927358e-06, "loss": 0.3541, "step": 12553 }, { "epoch": 0.75322493550129, "grad_norm": 1.309664011001587, "learning_rate": 1.0597520208724567e-06, "loss": 0.4013, "step": 12554 }, { "epoch": 0.7532849343013139, "grad_norm": 1.356341004371643, "learning_rate": 1.0592644804092248e-06, "loss": 0.3907, "step": 12555 }, { "epoch": 0.753344933101338, "grad_norm": 1.2751072645187378, "learning_rate": 1.0587770321214548e-06, "loss": 0.3663, "step": 12556 }, { "epoch": 0.7534049319013619, "grad_norm": 1.4547005891799927, "learning_rate": 1.058289676027554e-06, "loss": 0.4028, "step": 12557 }, { "epoch": 0.753464930701386, "grad_norm": 1.416587471961975, "learning_rate": 1.0578024121459276e-06, "loss": 0.3721, "step": 12558 }, { "epoch": 0.7535249295014099, "grad_norm": 1.3279399871826172, "learning_rate": 1.0573152404949767e-06, "loss": 0.3822, "step": 12559 }, { "epoch": 0.753584928301434, "grad_norm": 1.431860089302063, "learning_rate": 1.0568281610931012e-06, "loss": 0.4062, "step": 12560 }, { "epoch": 0.7536449271014579, "grad_norm": 1.3180896043777466, "learning_rate": 1.0563411739586949e-06, "loss": 0.4169, "step": 12561 }, { "epoch": 0.753704925901482, "grad_norm": 1.3440124988555908, "learning_rate": 1.0558542791101481e-06, "loss": 0.3969, "step": 12562 }, { "epoch": 0.753764924701506, "grad_norm": 1.1133028268814087, "learning_rate": 1.055367476565851e-06, "loss": 0.3342, "step": 12563 }, { "epoch": 0.75382492350153, "grad_norm": 1.241337776184082, "learning_rate": 1.0548807663441854e-06, "loss": 0.3312, "step": 12564 }, { "epoch": 0.753884922301554, "grad_norm": 1.2638641595840454, "learning_rate": 1.0543941484635345e-06, "loss": 0.3873, "step": 12565 }, { "epoch": 0.753944921101578, "grad_norm": 1.3404943943023682, "learning_rate": 1.0539076229422743e-06, "loss": 0.3752, "step": 12566 }, { "epoch": 0.754004919901602, "grad_norm": 1.311989426612854, "learning_rate": 1.0534211897987775e-06, "loss": 0.364, "step": 12567 }, { "epoch": 0.754064918701626, "grad_norm": 1.406083583831787, "learning_rate": 1.0529348490514172e-06, "loss": 0.3612, "step": 12568 }, { "epoch": 0.75412491750165, "grad_norm": 1.3084520101547241, "learning_rate": 1.0524486007185588e-06, "loss": 0.3747, "step": 12569 }, { "epoch": 0.754184916301674, "grad_norm": 1.2147045135498047, "learning_rate": 1.0519624448185646e-06, "loss": 0.3733, "step": 12570 }, { "epoch": 0.754244915101698, "grad_norm": 1.2641922235488892, "learning_rate": 1.051476381369797e-06, "loss": 0.4104, "step": 12571 }, { "epoch": 0.7543049139017219, "grad_norm": 1.4515082836151123, "learning_rate": 1.0509904103906106e-06, "loss": 0.395, "step": 12572 }, { "epoch": 0.754364912701746, "grad_norm": 1.3736120462417603, "learning_rate": 1.0505045318993586e-06, "loss": 0.3938, "step": 12573 }, { "epoch": 0.7544249115017699, "grad_norm": 1.3342101573944092, "learning_rate": 1.05001874591439e-06, "loss": 0.4017, "step": 12574 }, { "epoch": 0.754484910301794, "grad_norm": 1.333113670349121, "learning_rate": 1.0495330524540523e-06, "loss": 0.3773, "step": 12575 }, { "epoch": 0.7545449091018179, "grad_norm": 1.3432481288909912, "learning_rate": 1.0490474515366865e-06, "loss": 0.3605, "step": 12576 }, { "epoch": 0.754604907901842, "grad_norm": 1.4504225254058838, "learning_rate": 1.0485619431806312e-06, "loss": 0.4111, "step": 12577 }, { "epoch": 0.7546649067018659, "grad_norm": 1.284906268119812, "learning_rate": 1.0480765274042234e-06, "loss": 0.3322, "step": 12578 }, { "epoch": 0.75472490550189, "grad_norm": 1.250930666923523, "learning_rate": 1.0475912042257936e-06, "loss": 0.3779, "step": 12579 }, { "epoch": 0.7547849043019139, "grad_norm": 1.2671092748641968, "learning_rate": 1.0471059736636726e-06, "loss": 0.4155, "step": 12580 }, { "epoch": 0.754844903101938, "grad_norm": 1.3821910619735718, "learning_rate": 1.0466208357361815e-06, "loss": 0.3563, "step": 12581 }, { "epoch": 0.7549049019019619, "grad_norm": 1.3840250968933105, "learning_rate": 1.0461357904616438e-06, "loss": 0.4037, "step": 12582 }, { "epoch": 0.754964900701986, "grad_norm": 1.4344255924224854, "learning_rate": 1.045650837858379e-06, "loss": 0.3891, "step": 12583 }, { "epoch": 0.7550248995020099, "grad_norm": 1.3195149898529053, "learning_rate": 1.0451659779446998e-06, "loss": 0.3701, "step": 12584 }, { "epoch": 0.755084898302034, "grad_norm": 1.5307843685150146, "learning_rate": 1.0446812107389174e-06, "loss": 0.4586, "step": 12585 }, { "epoch": 0.755144897102058, "grad_norm": 1.3536796569824219, "learning_rate": 1.0441965362593384e-06, "loss": 0.3838, "step": 12586 }, { "epoch": 0.755204895902082, "grad_norm": 1.3174117803573608, "learning_rate": 1.0437119545242683e-06, "loss": 0.3831, "step": 12587 }, { "epoch": 0.755264894702106, "grad_norm": 1.3436119556427002, "learning_rate": 1.0432274655520074e-06, "loss": 0.4044, "step": 12588 }, { "epoch": 0.75532489350213, "grad_norm": 1.3826247453689575, "learning_rate": 1.0427430693608503e-06, "loss": 0.4631, "step": 12589 }, { "epoch": 0.755384892302154, "grad_norm": 1.229016900062561, "learning_rate": 1.0422587659690938e-06, "loss": 0.3255, "step": 12590 }, { "epoch": 0.7554448911021779, "grad_norm": 1.247667670249939, "learning_rate": 1.0417745553950261e-06, "loss": 0.3762, "step": 12591 }, { "epoch": 0.755504889902202, "grad_norm": 1.3119091987609863, "learning_rate": 1.0412904376569342e-06, "loss": 0.3596, "step": 12592 }, { "epoch": 0.7555648887022259, "grad_norm": 1.2870360612869263, "learning_rate": 1.0408064127730989e-06, "loss": 0.3465, "step": 12593 }, { "epoch": 0.75562488750225, "grad_norm": 1.2750741243362427, "learning_rate": 1.0403224807618016e-06, "loss": 0.3387, "step": 12594 }, { "epoch": 0.7556848863022739, "grad_norm": 1.246214747428894, "learning_rate": 1.03983864164132e-06, "loss": 0.3973, "step": 12595 }, { "epoch": 0.755744885102298, "grad_norm": 1.3761041164398193, "learning_rate": 1.0393548954299225e-06, "loss": 0.3681, "step": 12596 }, { "epoch": 0.7558048839023219, "grad_norm": 1.3264166116714478, "learning_rate": 1.038871242145881e-06, "loss": 0.3698, "step": 12597 }, { "epoch": 0.755864882702346, "grad_norm": 1.3676402568817139, "learning_rate": 1.038387681807459e-06, "loss": 0.4054, "step": 12598 }, { "epoch": 0.7559248815023699, "grad_norm": 1.3070920705795288, "learning_rate": 1.03790421443292e-06, "loss": 0.3869, "step": 12599 }, { "epoch": 0.755984880302394, "grad_norm": 1.3185744285583496, "learning_rate": 1.0374208400405212e-06, "loss": 0.324, "step": 12600 }, { "epoch": 0.7560448791024179, "grad_norm": 1.368636131286621, "learning_rate": 1.0369375586485166e-06, "loss": 0.3598, "step": 12601 }, { "epoch": 0.756104877902442, "grad_norm": 1.3930262327194214, "learning_rate": 1.0364543702751606e-06, "loss": 0.3592, "step": 12602 }, { "epoch": 0.7561648767024659, "grad_norm": 1.2994673252105713, "learning_rate": 1.0359712749386985e-06, "loss": 0.3891, "step": 12603 }, { "epoch": 0.75622487550249, "grad_norm": 1.2764134407043457, "learning_rate": 1.0354882726573756e-06, "loss": 0.3788, "step": 12604 }, { "epoch": 0.7562848743025139, "grad_norm": 1.2592228651046753, "learning_rate": 1.035005363449431e-06, "loss": 0.3641, "step": 12605 }, { "epoch": 0.756344873102538, "grad_norm": 1.3766112327575684, "learning_rate": 1.0345225473331048e-06, "loss": 0.381, "step": 12606 }, { "epoch": 0.756404871902562, "grad_norm": 1.2036139965057373, "learning_rate": 1.0340398243266288e-06, "loss": 0.3792, "step": 12607 }, { "epoch": 0.756464870702586, "grad_norm": 1.6638418436050415, "learning_rate": 1.033557194448233e-06, "loss": 0.4001, "step": 12608 }, { "epoch": 0.75652486950261, "grad_norm": 1.3365951776504517, "learning_rate": 1.033074657716146e-06, "loss": 0.4098, "step": 12609 }, { "epoch": 0.7565848683026339, "grad_norm": 1.4909358024597168, "learning_rate": 1.0325922141485884e-06, "loss": 0.375, "step": 12610 }, { "epoch": 0.756644867102658, "grad_norm": 1.2531630992889404, "learning_rate": 1.0321098637637829e-06, "loss": 0.3392, "step": 12611 }, { "epoch": 0.7567048659026819, "grad_norm": 1.36961829662323, "learning_rate": 1.0316276065799442e-06, "loss": 0.3534, "step": 12612 }, { "epoch": 0.756764864702706, "grad_norm": 1.4428913593292236, "learning_rate": 1.0311454426152835e-06, "loss": 0.3955, "step": 12613 }, { "epoch": 0.7568248635027299, "grad_norm": 1.4019885063171387, "learning_rate": 1.0306633718880126e-06, "loss": 0.3941, "step": 12614 }, { "epoch": 0.756884862302754, "grad_norm": 1.3675493001937866, "learning_rate": 1.0301813944163361e-06, "loss": 0.4245, "step": 12615 }, { "epoch": 0.7569448611027779, "grad_norm": 1.3688206672668457, "learning_rate": 1.0296995102184557e-06, "loss": 0.3742, "step": 12616 }, { "epoch": 0.757004859902802, "grad_norm": 1.2257887125015259, "learning_rate": 1.029217719312569e-06, "loss": 0.3544, "step": 12617 }, { "epoch": 0.7570648587028259, "grad_norm": 1.3162575960159302, "learning_rate": 1.028736021716873e-06, "loss": 0.38, "step": 12618 }, { "epoch": 0.75712485750285, "grad_norm": 1.3689112663269043, "learning_rate": 1.028254417449559e-06, "loss": 0.4049, "step": 12619 }, { "epoch": 0.7571848563028739, "grad_norm": 1.2986526489257812, "learning_rate": 1.027772906528813e-06, "loss": 0.349, "step": 12620 }, { "epoch": 0.757244855102898, "grad_norm": 1.2471656799316406, "learning_rate": 1.0272914889728217e-06, "loss": 0.3904, "step": 12621 }, { "epoch": 0.7573048539029219, "grad_norm": 1.4435425996780396, "learning_rate": 1.0268101647997658e-06, "loss": 0.4294, "step": 12622 }, { "epoch": 0.757364852702946, "grad_norm": 1.2011750936508179, "learning_rate": 1.0263289340278207e-06, "loss": 0.3524, "step": 12623 }, { "epoch": 0.7574248515029699, "grad_norm": 1.346975326538086, "learning_rate": 1.025847796675163e-06, "loss": 0.3682, "step": 12624 }, { "epoch": 0.757484850302994, "grad_norm": 1.531186819076538, "learning_rate": 1.0253667527599608e-06, "loss": 0.3712, "step": 12625 }, { "epoch": 0.7575448491030179, "grad_norm": 1.2812250852584839, "learning_rate": 1.0248858023003838e-06, "loss": 0.3818, "step": 12626 }, { "epoch": 0.757604847903042, "grad_norm": 1.39378821849823, "learning_rate": 1.0244049453145914e-06, "loss": 0.4089, "step": 12627 }, { "epoch": 0.7576648467030659, "grad_norm": 1.4100093841552734, "learning_rate": 1.0239241818207465e-06, "loss": 0.3736, "step": 12628 }, { "epoch": 0.7577248455030899, "grad_norm": 1.239506721496582, "learning_rate": 1.0234435118370032e-06, "loss": 0.3702, "step": 12629 }, { "epoch": 0.757784844303114, "grad_norm": 1.4301342964172363, "learning_rate": 1.0229629353815167e-06, "loss": 0.3668, "step": 12630 }, { "epoch": 0.7578448431031379, "grad_norm": 1.3888802528381348, "learning_rate": 1.0224824524724344e-06, "loss": 0.3629, "step": 12631 }, { "epoch": 0.757904841903162, "grad_norm": 1.2891408205032349, "learning_rate": 1.0220020631279012e-06, "loss": 0.3594, "step": 12632 }, { "epoch": 0.7579648407031859, "grad_norm": 1.463934302330017, "learning_rate": 1.0215217673660613e-06, "loss": 0.407, "step": 12633 }, { "epoch": 0.75802483950321, "grad_norm": 1.3316750526428223, "learning_rate": 1.0210415652050526e-06, "loss": 0.3861, "step": 12634 }, { "epoch": 0.7580848383032339, "grad_norm": 1.413047194480896, "learning_rate": 1.0205614566630089e-06, "loss": 0.4032, "step": 12635 }, { "epoch": 0.758144837103258, "grad_norm": 1.2654927968978882, "learning_rate": 1.0200814417580639e-06, "loss": 0.3554, "step": 12636 }, { "epoch": 0.7582048359032819, "grad_norm": 1.2370250225067139, "learning_rate": 1.019601520508344e-06, "loss": 0.386, "step": 12637 }, { "epoch": 0.758264834703306, "grad_norm": 1.3587145805358887, "learning_rate": 1.0191216929319743e-06, "loss": 0.4024, "step": 12638 }, { "epoch": 0.7583248335033299, "grad_norm": 1.414624810218811, "learning_rate": 1.018641959047074e-06, "loss": 0.3963, "step": 12639 }, { "epoch": 0.758384832303354, "grad_norm": 1.3403668403625488, "learning_rate": 1.018162318871763e-06, "loss": 0.3845, "step": 12640 }, { "epoch": 0.7584448311033779, "grad_norm": 1.2928783893585205, "learning_rate": 1.017682772424154e-06, "loss": 0.3426, "step": 12641 }, { "epoch": 0.758504829903402, "grad_norm": 1.5177127122879028, "learning_rate": 1.0172033197223565e-06, "loss": 0.4238, "step": 12642 }, { "epoch": 0.7585648287034259, "grad_norm": 1.3575201034545898, "learning_rate": 1.016723960784479e-06, "loss": 0.3762, "step": 12643 }, { "epoch": 0.75862482750345, "grad_norm": 1.3036112785339355, "learning_rate": 1.0162446956286225e-06, "loss": 0.353, "step": 12644 }, { "epoch": 0.7586848263034739, "grad_norm": 1.313029408454895, "learning_rate": 1.0157655242728888e-06, "loss": 0.328, "step": 12645 }, { "epoch": 0.7587448251034979, "grad_norm": 1.2984764575958252, "learning_rate": 1.0152864467353733e-06, "loss": 0.3392, "step": 12646 }, { "epoch": 0.7588048239035219, "grad_norm": 1.399155616760254, "learning_rate": 1.0148074630341675e-06, "loss": 0.3737, "step": 12647 }, { "epoch": 0.7588648227035459, "grad_norm": 1.3578975200653076, "learning_rate": 1.014328573187362e-06, "loss": 0.3809, "step": 12648 }, { "epoch": 0.7589248215035699, "grad_norm": 1.389991283416748, "learning_rate": 1.0138497772130417e-06, "loss": 0.3953, "step": 12649 }, { "epoch": 0.7589848203035939, "grad_norm": 1.2631380558013916, "learning_rate": 1.0133710751292885e-06, "loss": 0.3408, "step": 12650 }, { "epoch": 0.7590448191036179, "grad_norm": 1.3289566040039062, "learning_rate": 1.01289246695418e-06, "loss": 0.3796, "step": 12651 }, { "epoch": 0.7591048179036419, "grad_norm": 1.311524748802185, "learning_rate": 1.0124139527057924e-06, "loss": 0.3375, "step": 12652 }, { "epoch": 0.759164816703666, "grad_norm": 1.3468960523605347, "learning_rate": 1.0119355324021966e-06, "loss": 0.3489, "step": 12653 }, { "epoch": 0.7592248155036899, "grad_norm": 1.1365852355957031, "learning_rate": 1.0114572060614592e-06, "loss": 0.3647, "step": 12654 }, { "epoch": 0.759284814303714, "grad_norm": 1.3873233795166016, "learning_rate": 1.0109789737016459e-06, "loss": 0.3925, "step": 12655 }, { "epoch": 0.7593448131037379, "grad_norm": 1.3819185495376587, "learning_rate": 1.010500835340816e-06, "loss": 0.3169, "step": 12656 }, { "epoch": 0.759404811903762, "grad_norm": 1.455917477607727, "learning_rate": 1.01002279099703e-06, "loss": 0.3636, "step": 12657 }, { "epoch": 0.7594648107037859, "grad_norm": 1.208272099494934, "learning_rate": 1.0095448406883363e-06, "loss": 0.3605, "step": 12658 }, { "epoch": 0.75952480950381, "grad_norm": 1.3067296743392944, "learning_rate": 1.0090669844327873e-06, "loss": 0.3776, "step": 12659 }, { "epoch": 0.7595848083038339, "grad_norm": 1.2681714296340942, "learning_rate": 1.0085892222484312e-06, "loss": 0.3598, "step": 12660 }, { "epoch": 0.759644807103858, "grad_norm": 1.328248143196106, "learning_rate": 1.0081115541533093e-06, "loss": 0.336, "step": 12661 }, { "epoch": 0.7597048059038819, "grad_norm": 1.2683894634246826, "learning_rate": 1.0076339801654604e-06, "loss": 0.3457, "step": 12662 }, { "epoch": 0.759764804703906, "grad_norm": 1.3037254810333252, "learning_rate": 1.0071565003029202e-06, "loss": 0.3723, "step": 12663 }, { "epoch": 0.7598248035039299, "grad_norm": 1.2961630821228027, "learning_rate": 1.0066791145837229e-06, "loss": 0.3654, "step": 12664 }, { "epoch": 0.7598848023039539, "grad_norm": 1.4174259901046753, "learning_rate": 1.0062018230258959e-06, "loss": 0.3542, "step": 12665 }, { "epoch": 0.7599448011039779, "grad_norm": 1.2697219848632812, "learning_rate": 1.0057246256474627e-06, "loss": 0.3838, "step": 12666 }, { "epoch": 0.7600047999040019, "grad_norm": 1.3171414136886597, "learning_rate": 1.0052475224664483e-06, "loss": 0.3691, "step": 12667 }, { "epoch": 0.7600647987040259, "grad_norm": 1.265517234802246, "learning_rate": 1.0047705135008683e-06, "loss": 0.4001, "step": 12668 }, { "epoch": 0.7601247975040499, "grad_norm": 1.4031703472137451, "learning_rate": 1.0042935987687378e-06, "loss": 0.3871, "step": 12669 }, { "epoch": 0.7601847963040739, "grad_norm": 1.501164436340332, "learning_rate": 1.0038167782880667e-06, "loss": 0.4079, "step": 12670 }, { "epoch": 0.7602447951040979, "grad_norm": 1.3608143329620361, "learning_rate": 1.0033400520768636e-06, "loss": 0.35, "step": 12671 }, { "epoch": 0.7603047939041219, "grad_norm": 1.446382761001587, "learning_rate": 1.002863420153134e-06, "loss": 0.418, "step": 12672 }, { "epoch": 0.7603647927041459, "grad_norm": 1.3100042343139648, "learning_rate": 1.0023868825348738e-06, "loss": 0.3972, "step": 12673 }, { "epoch": 0.7604247915041699, "grad_norm": 1.2527092695236206, "learning_rate": 1.0019104392400832e-06, "loss": 0.3549, "step": 12674 }, { "epoch": 0.7604847903041939, "grad_norm": 1.4163405895233154, "learning_rate": 1.001434090286753e-06, "loss": 0.3867, "step": 12675 }, { "epoch": 0.760544789104218, "grad_norm": 1.3521502017974854, "learning_rate": 1.0009578356928743e-06, "loss": 0.355, "step": 12676 }, { "epoch": 0.7606047879042419, "grad_norm": 1.2384209632873535, "learning_rate": 1.000481675476433e-06, "loss": 0.3394, "step": 12677 }, { "epoch": 0.760664786704266, "grad_norm": 1.3636095523834229, "learning_rate": 1.0000056096554098e-06, "loss": 0.3714, "step": 12678 }, { "epoch": 0.7607247855042899, "grad_norm": 1.257460355758667, "learning_rate": 9.99529638247786e-07, "loss": 0.3639, "step": 12679 }, { "epoch": 0.760784784304314, "grad_norm": 1.3678414821624756, "learning_rate": 9.990537612715357e-07, "loss": 0.3588, "step": 12680 }, { "epoch": 0.7608447831043379, "grad_norm": 1.2776204347610474, "learning_rate": 9.985779787446307e-07, "loss": 0.3745, "step": 12681 }, { "epoch": 0.760904781904362, "grad_norm": 1.2754756212234497, "learning_rate": 9.981022906850373e-07, "loss": 0.3604, "step": 12682 }, { "epoch": 0.7609647807043859, "grad_norm": 1.3488954305648804, "learning_rate": 9.976266971107235e-07, "loss": 0.3874, "step": 12683 }, { "epoch": 0.7610247795044099, "grad_norm": 1.2715139389038086, "learning_rate": 9.97151198039648e-07, "loss": 0.3423, "step": 12684 }, { "epoch": 0.7610847783044339, "grad_norm": 1.42885422706604, "learning_rate": 9.96675793489768e-07, "loss": 0.3778, "step": 12685 }, { "epoch": 0.7611447771044579, "grad_norm": 1.2451746463775635, "learning_rate": 9.962004834790395e-07, "loss": 0.3721, "step": 12686 }, { "epoch": 0.7612047759044819, "grad_norm": 1.3020403385162354, "learning_rate": 9.957252680254112e-07, "loss": 0.3943, "step": 12687 }, { "epoch": 0.7612647747045059, "grad_norm": 1.3866305351257324, "learning_rate": 9.952501471468286e-07, "loss": 0.4291, "step": 12688 }, { "epoch": 0.7613247735045299, "grad_norm": 1.3534752130508423, "learning_rate": 9.947751208612378e-07, "loss": 0.3389, "step": 12689 }, { "epoch": 0.7613847723045539, "grad_norm": 1.4022858142852783, "learning_rate": 9.943001891865763e-07, "loss": 0.3508, "step": 12690 }, { "epoch": 0.7614447711045779, "grad_norm": 1.2009506225585938, "learning_rate": 9.938253521407809e-07, "loss": 0.3207, "step": 12691 }, { "epoch": 0.7615047699046019, "grad_norm": 1.3652037382125854, "learning_rate": 9.933506097417844e-07, "loss": 0.3738, "step": 12692 }, { "epoch": 0.7615647687046259, "grad_norm": 1.3789852857589722, "learning_rate": 9.92875962007514e-07, "loss": 0.3925, "step": 12693 }, { "epoch": 0.7616247675046499, "grad_norm": 1.315906286239624, "learning_rate": 9.924014089558968e-07, "loss": 0.3517, "step": 12694 }, { "epoch": 0.7616847663046739, "grad_norm": 1.2135707139968872, "learning_rate": 9.91926950604854e-07, "loss": 0.3313, "step": 12695 }, { "epoch": 0.7617447651046979, "grad_norm": 1.3768668174743652, "learning_rate": 9.91452586972304e-07, "loss": 0.3563, "step": 12696 }, { "epoch": 0.761804763904722, "grad_norm": 1.2219572067260742, "learning_rate": 9.90978318076159e-07, "loss": 0.3305, "step": 12697 }, { "epoch": 0.7618647627047459, "grad_norm": 1.2238582372665405, "learning_rate": 9.905041439343336e-07, "loss": 0.3405, "step": 12698 }, { "epoch": 0.76192476150477, "grad_norm": 1.4613032341003418, "learning_rate": 9.900300645647328e-07, "loss": 0.3855, "step": 12699 }, { "epoch": 0.7619847603047939, "grad_norm": 1.301163911819458, "learning_rate": 9.895560799852603e-07, "loss": 0.3591, "step": 12700 }, { "epoch": 0.762044759104818, "grad_norm": 1.2594943046569824, "learning_rate": 9.890821902138184e-07, "loss": 0.3331, "step": 12701 }, { "epoch": 0.7621047579048419, "grad_norm": 1.2274051904678345, "learning_rate": 9.88608395268301e-07, "loss": 0.3372, "step": 12702 }, { "epoch": 0.7621647567048659, "grad_norm": 1.362865924835205, "learning_rate": 9.881346951666055e-07, "loss": 0.4044, "step": 12703 }, { "epoch": 0.7622247555048899, "grad_norm": 1.4593865871429443, "learning_rate": 9.876610899266158e-07, "loss": 0.3797, "step": 12704 }, { "epoch": 0.7622847543049139, "grad_norm": 1.274781584739685, "learning_rate": 9.871875795662206e-07, "loss": 0.3298, "step": 12705 }, { "epoch": 0.7623447531049379, "grad_norm": 1.2726545333862305, "learning_rate": 9.867141641033032e-07, "loss": 0.3919, "step": 12706 }, { "epoch": 0.7624047519049619, "grad_norm": 1.2003828287124634, "learning_rate": 9.862408435557413e-07, "loss": 0.3431, "step": 12707 }, { "epoch": 0.7624647507049859, "grad_norm": 1.4275256395339966, "learning_rate": 9.8576761794141e-07, "loss": 0.3817, "step": 12708 }, { "epoch": 0.7625247495050099, "grad_norm": 1.3612819910049438, "learning_rate": 9.852944872781804e-07, "loss": 0.3651, "step": 12709 }, { "epoch": 0.7625847483050339, "grad_norm": 1.3829760551452637, "learning_rate": 9.848214515839215e-07, "loss": 0.364, "step": 12710 }, { "epoch": 0.7626447471050579, "grad_norm": 1.343312382698059, "learning_rate": 9.843485108764977e-07, "loss": 0.3925, "step": 12711 }, { "epoch": 0.7627047459050819, "grad_norm": 1.3909307718276978, "learning_rate": 9.838756651737681e-07, "loss": 0.3683, "step": 12712 }, { "epoch": 0.7627647447051059, "grad_norm": 1.3492149114608765, "learning_rate": 9.834029144935923e-07, "loss": 0.3852, "step": 12713 }, { "epoch": 0.7628247435051299, "grad_norm": 1.2200713157653809, "learning_rate": 9.82930258853823e-07, "loss": 0.3678, "step": 12714 }, { "epoch": 0.7628847423051539, "grad_norm": 1.2368061542510986, "learning_rate": 9.824576982723096e-07, "loss": 0.3341, "step": 12715 }, { "epoch": 0.7629447411051778, "grad_norm": 1.3678274154663086, "learning_rate": 9.819852327668984e-07, "loss": 0.3964, "step": 12716 }, { "epoch": 0.7630047399052019, "grad_norm": 1.327818512916565, "learning_rate": 9.815128623554327e-07, "loss": 0.3919, "step": 12717 }, { "epoch": 0.7630647387052258, "grad_norm": 1.2680270671844482, "learning_rate": 9.81040587055754e-07, "loss": 0.3576, "step": 12718 }, { "epoch": 0.7631247375052499, "grad_norm": 1.3528668880462646, "learning_rate": 9.805684068856942e-07, "loss": 0.3716, "step": 12719 }, { "epoch": 0.763184736305274, "grad_norm": 1.3437124490737915, "learning_rate": 9.800963218630883e-07, "loss": 0.4319, "step": 12720 }, { "epoch": 0.7632447351052979, "grad_norm": 1.2999616861343384, "learning_rate": 9.796243320057622e-07, "loss": 0.36, "step": 12721 }, { "epoch": 0.7633047339053219, "grad_norm": 1.352648138999939, "learning_rate": 9.791524373315444e-07, "loss": 0.3766, "step": 12722 }, { "epoch": 0.7633647327053459, "grad_norm": 1.2655713558197021, "learning_rate": 9.786806378582523e-07, "loss": 0.3635, "step": 12723 }, { "epoch": 0.7634247315053699, "grad_norm": 1.341775894165039, "learning_rate": 9.782089336037053e-07, "loss": 0.3619, "step": 12724 }, { "epoch": 0.7634847303053939, "grad_norm": 1.4085367918014526, "learning_rate": 9.777373245857185e-07, "loss": 0.4079, "step": 12725 }, { "epoch": 0.7635447291054179, "grad_norm": 1.5255253314971924, "learning_rate": 9.772658108221018e-07, "loss": 0.4325, "step": 12726 }, { "epoch": 0.7636047279054419, "grad_norm": 1.2751448154449463, "learning_rate": 9.767943923306619e-07, "loss": 0.3844, "step": 12727 }, { "epoch": 0.7636647267054659, "grad_norm": 1.3350406885147095, "learning_rate": 9.763230691292007e-07, "loss": 0.4182, "step": 12728 }, { "epoch": 0.7637247255054899, "grad_norm": 1.4502527713775635, "learning_rate": 9.758518412355208e-07, "loss": 0.4333, "step": 12729 }, { "epoch": 0.7637847243055139, "grad_norm": 1.3634881973266602, "learning_rate": 9.753807086674168e-07, "loss": 0.3707, "step": 12730 }, { "epoch": 0.7638447231055379, "grad_norm": 1.2439080476760864, "learning_rate": 9.749096714426798e-07, "loss": 0.409, "step": 12731 }, { "epoch": 0.7639047219055619, "grad_norm": 1.3900412321090698, "learning_rate": 9.744387295791017e-07, "loss": 0.3736, "step": 12732 }, { "epoch": 0.7639647207055859, "grad_norm": 1.27080237865448, "learning_rate": 9.739678830944664e-07, "loss": 0.3624, "step": 12733 }, { "epoch": 0.7640247195056099, "grad_norm": 1.4209405183792114, "learning_rate": 9.73497132006554e-07, "loss": 0.4141, "step": 12734 }, { "epoch": 0.7640847183056338, "grad_norm": 1.2705020904541016, "learning_rate": 9.73026476333146e-07, "loss": 0.3793, "step": 12735 }, { "epoch": 0.7641447171056579, "grad_norm": 1.3395708799362183, "learning_rate": 9.725559160920136e-07, "loss": 0.4033, "step": 12736 }, { "epoch": 0.7642047159056818, "grad_norm": 1.3166879415512085, "learning_rate": 9.720854513009303e-07, "loss": 0.3379, "step": 12737 }, { "epoch": 0.7642647147057059, "grad_norm": 1.2448854446411133, "learning_rate": 9.716150819776627e-07, "loss": 0.4003, "step": 12738 }, { "epoch": 0.7643247135057298, "grad_norm": 1.2652862071990967, "learning_rate": 9.711448081399737e-07, "loss": 0.3784, "step": 12739 }, { "epoch": 0.7643847123057539, "grad_norm": 1.2465903759002686, "learning_rate": 9.706746298056233e-07, "loss": 0.3328, "step": 12740 }, { "epoch": 0.7644447111057778, "grad_norm": 1.254616141319275, "learning_rate": 9.702045469923692e-07, "loss": 0.3329, "step": 12741 }, { "epoch": 0.7645047099058019, "grad_norm": 1.3502583503723145, "learning_rate": 9.697345597179637e-07, "loss": 0.3421, "step": 12742 }, { "epoch": 0.7645647087058259, "grad_norm": 1.2462774515151978, "learning_rate": 9.69264668000155e-07, "loss": 0.3645, "step": 12743 }, { "epoch": 0.7646247075058499, "grad_norm": 1.4019564390182495, "learning_rate": 9.687948718566909e-07, "loss": 0.3765, "step": 12744 }, { "epoch": 0.7646847063058739, "grad_norm": 1.1942542791366577, "learning_rate": 9.683251713053124e-07, "loss": 0.336, "step": 12745 }, { "epoch": 0.7647447051058979, "grad_norm": 1.3114432096481323, "learning_rate": 9.678555663637567e-07, "loss": 0.3915, "step": 12746 }, { "epoch": 0.7648047039059219, "grad_norm": 1.193048119544983, "learning_rate": 9.67386057049761e-07, "loss": 0.3618, "step": 12747 }, { "epoch": 0.7648647027059459, "grad_norm": 1.3282753229141235, "learning_rate": 9.669166433810544e-07, "loss": 0.307, "step": 12748 }, { "epoch": 0.7649247015059699, "grad_norm": 1.351399540901184, "learning_rate": 9.664473253753675e-07, "loss": 0.3975, "step": 12749 }, { "epoch": 0.7649847003059939, "grad_norm": 1.19589102268219, "learning_rate": 9.6597810305042e-07, "loss": 0.3433, "step": 12750 }, { "epoch": 0.7650446991060179, "grad_norm": 1.3876116275787354, "learning_rate": 9.655089764239356e-07, "loss": 0.411, "step": 12751 }, { "epoch": 0.7651046979060419, "grad_norm": 1.358136534690857, "learning_rate": 9.650399455136292e-07, "loss": 0.3826, "step": 12752 }, { "epoch": 0.7651646967060659, "grad_norm": 1.1990861892700195, "learning_rate": 9.645710103372158e-07, "loss": 0.3589, "step": 12753 }, { "epoch": 0.7652246955060898, "grad_norm": 1.3123698234558105, "learning_rate": 9.641021709124042e-07, "loss": 0.3944, "step": 12754 }, { "epoch": 0.7652846943061139, "grad_norm": 1.2843579053878784, "learning_rate": 9.636334272568987e-07, "loss": 0.3482, "step": 12755 }, { "epoch": 0.7653446931061378, "grad_norm": 1.4311532974243164, "learning_rate": 9.63164779388404e-07, "loss": 0.3847, "step": 12756 }, { "epoch": 0.7654046919061619, "grad_norm": 1.3484110832214355, "learning_rate": 9.62696227324618e-07, "loss": 0.3946, "step": 12757 }, { "epoch": 0.7654646907061858, "grad_norm": 1.3387690782546997, "learning_rate": 9.622277710832345e-07, "loss": 0.3852, "step": 12758 }, { "epoch": 0.7655246895062099, "grad_norm": 1.2234703302383423, "learning_rate": 9.617594106819473e-07, "loss": 0.4, "step": 12759 }, { "epoch": 0.7655846883062338, "grad_norm": 1.416429877281189, "learning_rate": 9.612911461384425e-07, "loss": 0.3961, "step": 12760 }, { "epoch": 0.7656446871062579, "grad_norm": 1.3610267639160156, "learning_rate": 9.60822977470405e-07, "loss": 0.3589, "step": 12761 }, { "epoch": 0.7657046859062818, "grad_norm": 1.2528347969055176, "learning_rate": 9.603549046955138e-07, "loss": 0.3549, "step": 12762 }, { "epoch": 0.7657646847063059, "grad_norm": 1.2771626710891724, "learning_rate": 9.598869278314486e-07, "loss": 0.3476, "step": 12763 }, { "epoch": 0.7658246835063299, "grad_norm": 1.314720869064331, "learning_rate": 9.594190468958817e-07, "loss": 0.3596, "step": 12764 }, { "epoch": 0.7658846823063539, "grad_norm": 1.251626968383789, "learning_rate": 9.58951261906481e-07, "loss": 0.3794, "step": 12765 }, { "epoch": 0.7659446811063779, "grad_norm": 1.3830461502075195, "learning_rate": 9.584835728809157e-07, "loss": 0.418, "step": 12766 }, { "epoch": 0.7660046799064019, "grad_norm": 1.3949729204177856, "learning_rate": 9.580159798368455e-07, "loss": 0.3999, "step": 12767 }, { "epoch": 0.7660646787064259, "grad_norm": 1.371104121208191, "learning_rate": 9.575484827919322e-07, "loss": 0.3542, "step": 12768 }, { "epoch": 0.7661246775064499, "grad_norm": 1.3316988945007324, "learning_rate": 9.570810817638274e-07, "loss": 0.3603, "step": 12769 }, { "epoch": 0.7661846763064739, "grad_norm": 1.3846828937530518, "learning_rate": 9.56613776770185e-07, "loss": 0.372, "step": 12770 }, { "epoch": 0.7662446751064979, "grad_norm": 1.3222885131835938, "learning_rate": 9.56146567828653e-07, "loss": 0.3401, "step": 12771 }, { "epoch": 0.7663046739065219, "grad_norm": 1.3730419874191284, "learning_rate": 9.556794549568762e-07, "loss": 0.4193, "step": 12772 }, { "epoch": 0.7663646727065458, "grad_norm": 1.3415340185165405, "learning_rate": 9.55212438172494e-07, "loss": 0.3288, "step": 12773 }, { "epoch": 0.7664246715065699, "grad_norm": 1.250502586364746, "learning_rate": 9.547455174931428e-07, "loss": 0.3825, "step": 12774 }, { "epoch": 0.7664846703065938, "grad_norm": 1.2746331691741943, "learning_rate": 9.542786929364586e-07, "loss": 0.3917, "step": 12775 }, { "epoch": 0.7665446691066179, "grad_norm": 1.4782956838607788, "learning_rate": 9.538119645200696e-07, "loss": 0.3682, "step": 12776 }, { "epoch": 0.7666046679066418, "grad_norm": 1.2888362407684326, "learning_rate": 9.533453322616015e-07, "loss": 0.4031, "step": 12777 }, { "epoch": 0.7666646667066659, "grad_norm": 1.2946816682815552, "learning_rate": 9.528787961786786e-07, "loss": 0.3589, "step": 12778 }, { "epoch": 0.7667246655066898, "grad_norm": 1.2915409803390503, "learning_rate": 9.524123562889189e-07, "loss": 0.3341, "step": 12779 }, { "epoch": 0.7667846643067139, "grad_norm": 1.3730783462524414, "learning_rate": 9.519460126099378e-07, "loss": 0.3549, "step": 12780 }, { "epoch": 0.7668446631067378, "grad_norm": 1.2830209732055664, "learning_rate": 9.514797651593458e-07, "loss": 0.3812, "step": 12781 }, { "epoch": 0.7669046619067619, "grad_norm": 1.3183047771453857, "learning_rate": 9.510136139547522e-07, "loss": 0.371, "step": 12782 }, { "epoch": 0.7669646607067858, "grad_norm": 1.296774983406067, "learning_rate": 9.505475590137623e-07, "loss": 0.3278, "step": 12783 }, { "epoch": 0.7670246595068099, "grad_norm": 1.3229748010635376, "learning_rate": 9.500816003539755e-07, "loss": 0.35, "step": 12784 }, { "epoch": 0.7670846583068338, "grad_norm": 1.4293954372406006, "learning_rate": 9.496157379929895e-07, "loss": 0.3317, "step": 12785 }, { "epoch": 0.7671446571068579, "grad_norm": 1.4236507415771484, "learning_rate": 9.491499719483964e-07, "loss": 0.363, "step": 12786 }, { "epoch": 0.7672046559068819, "grad_norm": 1.217055320739746, "learning_rate": 9.486843022377885e-07, "loss": 0.3512, "step": 12787 }, { "epoch": 0.7672646547069059, "grad_norm": 1.4854484796524048, "learning_rate": 9.482187288787502e-07, "loss": 0.4046, "step": 12788 }, { "epoch": 0.7673246535069299, "grad_norm": 1.3917334079742432, "learning_rate": 9.477532518888642e-07, "loss": 0.321, "step": 12789 }, { "epoch": 0.7673846523069539, "grad_norm": 1.3087055683135986, "learning_rate": 9.472878712857107e-07, "loss": 0.3609, "step": 12790 }, { "epoch": 0.7674446511069779, "grad_norm": 1.3049622774124146, "learning_rate": 9.468225870868643e-07, "loss": 0.3374, "step": 12791 }, { "epoch": 0.7675046499070018, "grad_norm": 1.399314284324646, "learning_rate": 9.46357399309896e-07, "loss": 0.4466, "step": 12792 }, { "epoch": 0.7675646487070259, "grad_norm": 1.2501968145370483, "learning_rate": 9.458923079723738e-07, "loss": 0.3459, "step": 12793 }, { "epoch": 0.7676246475070498, "grad_norm": 1.3222085237503052, "learning_rate": 9.454273130918625e-07, "loss": 0.3756, "step": 12794 }, { "epoch": 0.7676846463070739, "grad_norm": 1.3893004655838013, "learning_rate": 9.44962414685925e-07, "loss": 0.3514, "step": 12795 }, { "epoch": 0.7677446451070978, "grad_norm": 1.395268201828003, "learning_rate": 9.444976127721146e-07, "loss": 0.3311, "step": 12796 }, { "epoch": 0.7678046439071219, "grad_norm": 1.2462177276611328, "learning_rate": 9.440329073679871e-07, "loss": 0.3546, "step": 12797 }, { "epoch": 0.7678646427071458, "grad_norm": 1.3033336400985718, "learning_rate": 9.43568298491091e-07, "loss": 0.3854, "step": 12798 }, { "epoch": 0.7679246415071699, "grad_norm": 1.3155715465545654, "learning_rate": 9.431037861589742e-07, "loss": 0.3717, "step": 12799 }, { "epoch": 0.7679846403071938, "grad_norm": 1.4072601795196533, "learning_rate": 9.426393703891779e-07, "loss": 0.367, "step": 12800 }, { "epoch": 0.7680446391072179, "grad_norm": 1.4536535739898682, "learning_rate": 9.421750511992408e-07, "loss": 0.394, "step": 12801 }, { "epoch": 0.7681046379072418, "grad_norm": 1.294808268547058, "learning_rate": 9.417108286066992e-07, "loss": 0.3548, "step": 12802 }, { "epoch": 0.7681646367072659, "grad_norm": 1.3700281381607056, "learning_rate": 9.412467026290843e-07, "loss": 0.3943, "step": 12803 }, { "epoch": 0.7682246355072898, "grad_norm": 1.4241911172866821, "learning_rate": 9.407826732839236e-07, "loss": 0.3943, "step": 12804 }, { "epoch": 0.7682846343073139, "grad_norm": 1.294882893562317, "learning_rate": 9.403187405887405e-07, "loss": 0.3567, "step": 12805 }, { "epoch": 0.7683446331073378, "grad_norm": 1.2247178554534912, "learning_rate": 9.39854904561058e-07, "loss": 0.3333, "step": 12806 }, { "epoch": 0.7684046319073619, "grad_norm": 1.434106707572937, "learning_rate": 9.393911652183914e-07, "loss": 0.3347, "step": 12807 }, { "epoch": 0.7684646307073858, "grad_norm": 1.3371379375457764, "learning_rate": 9.389275225782537e-07, "loss": 0.3892, "step": 12808 }, { "epoch": 0.7685246295074099, "grad_norm": 1.2950483560562134, "learning_rate": 9.384639766581563e-07, "loss": 0.4066, "step": 12809 }, { "epoch": 0.7685846283074339, "grad_norm": 1.42036771774292, "learning_rate": 9.38000527475604e-07, "loss": 0.4237, "step": 12810 }, { "epoch": 0.7686446271074578, "grad_norm": 1.32439124584198, "learning_rate": 9.37537175048098e-07, "loss": 0.4005, "step": 12811 }, { "epoch": 0.7687046259074819, "grad_norm": 1.1710364818572998, "learning_rate": 9.3707391939314e-07, "loss": 0.2837, "step": 12812 }, { "epoch": 0.7687646247075058, "grad_norm": 1.3334599733352661, "learning_rate": 9.366107605282222e-07, "loss": 0.3992, "step": 12813 }, { "epoch": 0.7688246235075299, "grad_norm": 1.480143427848816, "learning_rate": 9.36147698470839e-07, "loss": 0.402, "step": 12814 }, { "epoch": 0.7688846223075538, "grad_norm": 1.1933375597000122, "learning_rate": 9.356847332384742e-07, "loss": 0.3652, "step": 12815 }, { "epoch": 0.7689446211075779, "grad_norm": 1.2362885475158691, "learning_rate": 9.352218648486142e-07, "loss": 0.3884, "step": 12816 }, { "epoch": 0.7690046199076018, "grad_norm": 1.3043314218521118, "learning_rate": 9.347590933187404e-07, "loss": 0.3564, "step": 12817 }, { "epoch": 0.7690646187076259, "grad_norm": 1.228980302810669, "learning_rate": 9.342964186663286e-07, "loss": 0.3285, "step": 12818 }, { "epoch": 0.7691246175076498, "grad_norm": 1.3742027282714844, "learning_rate": 9.338338409088516e-07, "loss": 0.3655, "step": 12819 }, { "epoch": 0.7691846163076739, "grad_norm": 1.2465304136276245, "learning_rate": 9.333713600637777e-07, "loss": 0.324, "step": 12820 }, { "epoch": 0.7692446151076978, "grad_norm": 1.2104650735855103, "learning_rate": 9.329089761485752e-07, "loss": 0.4084, "step": 12821 }, { "epoch": 0.7693046139077219, "grad_norm": 1.1964447498321533, "learning_rate": 9.324466891807051e-07, "loss": 0.3337, "step": 12822 }, { "epoch": 0.7693646127077458, "grad_norm": 1.2378934621810913, "learning_rate": 9.319844991776248e-07, "loss": 0.3789, "step": 12823 }, { "epoch": 0.7694246115077699, "grad_norm": 1.256229043006897, "learning_rate": 9.31522406156791e-07, "loss": 0.4064, "step": 12824 }, { "epoch": 0.7694846103077938, "grad_norm": 1.2485401630401611, "learning_rate": 9.310604101356531e-07, "loss": 0.3112, "step": 12825 }, { "epoch": 0.7695446091078179, "grad_norm": 1.3565089702606201, "learning_rate": 9.305985111316612e-07, "loss": 0.3496, "step": 12826 }, { "epoch": 0.7696046079078418, "grad_norm": 1.167555809020996, "learning_rate": 9.301367091622558e-07, "loss": 0.3109, "step": 12827 }, { "epoch": 0.7696646067078659, "grad_norm": 1.42489492893219, "learning_rate": 9.296750042448783e-07, "loss": 0.4351, "step": 12828 }, { "epoch": 0.7697246055078898, "grad_norm": 1.4892340898513794, "learning_rate": 9.292133963969668e-07, "loss": 0.4115, "step": 12829 }, { "epoch": 0.7697846043079138, "grad_norm": 1.3453917503356934, "learning_rate": 9.287518856359526e-07, "loss": 0.3737, "step": 12830 }, { "epoch": 0.7698446031079378, "grad_norm": 1.3519283533096313, "learning_rate": 9.282904719792652e-07, "loss": 0.3481, "step": 12831 }, { "epoch": 0.7699046019079618, "grad_norm": 1.267125129699707, "learning_rate": 9.278291554443288e-07, "loss": 0.3352, "step": 12832 }, { "epoch": 0.7699646007079859, "grad_norm": 1.3393839597702026, "learning_rate": 9.273679360485675e-07, "loss": 0.3977, "step": 12833 }, { "epoch": 0.7700245995080098, "grad_norm": 1.2641379833221436, "learning_rate": 9.269068138093984e-07, "loss": 0.3361, "step": 12834 }, { "epoch": 0.7700845983080339, "grad_norm": 1.3843443393707275, "learning_rate": 9.26445788744235e-07, "loss": 0.3835, "step": 12835 }, { "epoch": 0.7701445971080578, "grad_norm": 1.3429869413375854, "learning_rate": 9.2598486087049e-07, "loss": 0.3544, "step": 12836 }, { "epoch": 0.7702045959080819, "grad_norm": 1.3971643447875977, "learning_rate": 9.255240302055697e-07, "loss": 0.4101, "step": 12837 }, { "epoch": 0.7702645947081058, "grad_norm": 1.278681755065918, "learning_rate": 9.250632967668778e-07, "loss": 0.3768, "step": 12838 }, { "epoch": 0.7703245935081299, "grad_norm": 1.23419988155365, "learning_rate": 9.246026605718124e-07, "loss": 0.3237, "step": 12839 }, { "epoch": 0.7703845923081538, "grad_norm": 1.2685989141464233, "learning_rate": 9.241421216377709e-07, "loss": 0.3525, "step": 12840 }, { "epoch": 0.7704445911081779, "grad_norm": 1.3525511026382446, "learning_rate": 9.236816799821484e-07, "loss": 0.4039, "step": 12841 }, { "epoch": 0.7705045899082018, "grad_norm": 1.2166175842285156, "learning_rate": 9.232213356223289e-07, "loss": 0.3314, "step": 12842 }, { "epoch": 0.7705645887082259, "grad_norm": 1.333751916885376, "learning_rate": 9.227610885757006e-07, "loss": 0.3428, "step": 12843 }, { "epoch": 0.7706245875082498, "grad_norm": 1.3168590068817139, "learning_rate": 9.223009388596433e-07, "loss": 0.3788, "step": 12844 }, { "epoch": 0.7706845863082739, "grad_norm": 1.3224878311157227, "learning_rate": 9.218408864915364e-07, "loss": 0.3647, "step": 12845 }, { "epoch": 0.7707445851082978, "grad_norm": 1.383458137512207, "learning_rate": 9.213809314887532e-07, "loss": 0.4358, "step": 12846 }, { "epoch": 0.7708045839083218, "grad_norm": 1.3602092266082764, "learning_rate": 9.209210738686632e-07, "loss": 0.3957, "step": 12847 }, { "epoch": 0.7708645827083458, "grad_norm": 1.3855048418045044, "learning_rate": 9.20461313648635e-07, "loss": 0.416, "step": 12848 }, { "epoch": 0.7709245815083698, "grad_norm": 1.5404001474380493, "learning_rate": 9.200016508460304e-07, "loss": 0.4087, "step": 12849 }, { "epoch": 0.7709845803083938, "grad_norm": 1.3036773204803467, "learning_rate": 9.195420854782094e-07, "loss": 0.3354, "step": 12850 }, { "epoch": 0.7710445791084178, "grad_norm": 1.4608113765716553, "learning_rate": 9.19082617562526e-07, "loss": 0.389, "step": 12851 }, { "epoch": 0.7711045779084418, "grad_norm": 1.2519136667251587, "learning_rate": 9.186232471163344e-07, "loss": 0.3433, "step": 12852 }, { "epoch": 0.7711645767084658, "grad_norm": 1.2621406316757202, "learning_rate": 9.181639741569822e-07, "loss": 0.3824, "step": 12853 }, { "epoch": 0.7712245755084899, "grad_norm": 1.4222512245178223, "learning_rate": 9.177047987018129e-07, "loss": 0.4021, "step": 12854 }, { "epoch": 0.7712845743085138, "grad_norm": 1.3959606885910034, "learning_rate": 9.172457207681695e-07, "loss": 0.3568, "step": 12855 }, { "epoch": 0.7713445731085379, "grad_norm": 1.3899143934249878, "learning_rate": 9.167867403733881e-07, "loss": 0.3987, "step": 12856 }, { "epoch": 0.7714045719085618, "grad_norm": 1.428907036781311, "learning_rate": 9.163278575348015e-07, "loss": 0.4049, "step": 12857 }, { "epoch": 0.7714645707085859, "grad_norm": 1.2688236236572266, "learning_rate": 9.158690722697416e-07, "loss": 0.3985, "step": 12858 }, { "epoch": 0.7715245695086098, "grad_norm": 1.450690507888794, "learning_rate": 9.154103845955325e-07, "loss": 0.3872, "step": 12859 }, { "epoch": 0.7715845683086339, "grad_norm": 1.3646955490112305, "learning_rate": 9.149517945294995e-07, "loss": 0.4007, "step": 12860 }, { "epoch": 0.7716445671086578, "grad_norm": 1.3114861249923706, "learning_rate": 9.144933020889582e-07, "loss": 0.3703, "step": 12861 }, { "epoch": 0.7717045659086819, "grad_norm": 1.3959990739822388, "learning_rate": 9.140349072912264e-07, "loss": 0.3924, "step": 12862 }, { "epoch": 0.7717645647087058, "grad_norm": 1.2877893447875977, "learning_rate": 9.135766101536132e-07, "loss": 0.3785, "step": 12863 }, { "epoch": 0.7718245635087299, "grad_norm": 1.3288480043411255, "learning_rate": 9.131184106934288e-07, "loss": 0.3971, "step": 12864 }, { "epoch": 0.7718845623087538, "grad_norm": 1.369088053703308, "learning_rate": 9.126603089279762e-07, "loss": 0.3804, "step": 12865 }, { "epoch": 0.7719445611087778, "grad_norm": 1.4095871448516846, "learning_rate": 9.122023048745551e-07, "loss": 0.4116, "step": 12866 }, { "epoch": 0.7720045599088018, "grad_norm": 1.316737174987793, "learning_rate": 9.117443985504636e-07, "loss": 0.4022, "step": 12867 }, { "epoch": 0.7720645587088258, "grad_norm": 1.3162992000579834, "learning_rate": 9.112865899729943e-07, "loss": 0.3573, "step": 12868 }, { "epoch": 0.7721245575088498, "grad_norm": 1.392945408821106, "learning_rate": 9.108288791594354e-07, "loss": 0.3577, "step": 12869 }, { "epoch": 0.7721845563088738, "grad_norm": 1.3479102849960327, "learning_rate": 9.103712661270744e-07, "loss": 0.3632, "step": 12870 }, { "epoch": 0.7722445551088978, "grad_norm": 1.198236107826233, "learning_rate": 9.099137508931913e-07, "loss": 0.3831, "step": 12871 }, { "epoch": 0.7723045539089218, "grad_norm": 1.447535753250122, "learning_rate": 9.094563334750678e-07, "loss": 0.3748, "step": 12872 }, { "epoch": 0.7723645527089458, "grad_norm": 1.392871618270874, "learning_rate": 9.089990138899737e-07, "loss": 0.3921, "step": 12873 }, { "epoch": 0.7724245515089698, "grad_norm": 1.4588011503219604, "learning_rate": 9.085417921551835e-07, "loss": 0.3892, "step": 12874 }, { "epoch": 0.7724845503089938, "grad_norm": 1.4900926351547241, "learning_rate": 9.080846682879618e-07, "loss": 0.3971, "step": 12875 }, { "epoch": 0.7725445491090178, "grad_norm": 1.1858326196670532, "learning_rate": 9.076276423055749e-07, "loss": 0.3426, "step": 12876 }, { "epoch": 0.7726045479090419, "grad_norm": 1.2241251468658447, "learning_rate": 9.071707142252806e-07, "loss": 0.3658, "step": 12877 }, { "epoch": 0.7726645467090658, "grad_norm": 1.307502269744873, "learning_rate": 9.06713884064335e-07, "loss": 0.3434, "step": 12878 }, { "epoch": 0.7727245455090899, "grad_norm": 1.1841593980789185, "learning_rate": 9.062571518399921e-07, "loss": 0.3811, "step": 12879 }, { "epoch": 0.7727845443091138, "grad_norm": 1.248215913772583, "learning_rate": 9.058005175694993e-07, "loss": 0.3461, "step": 12880 }, { "epoch": 0.7728445431091379, "grad_norm": 1.3764729499816895, "learning_rate": 9.053439812701004e-07, "loss": 0.3852, "step": 12881 }, { "epoch": 0.7729045419091618, "grad_norm": 1.3243907690048218, "learning_rate": 9.048875429590395e-07, "loss": 0.3756, "step": 12882 }, { "epoch": 0.7729645407091859, "grad_norm": 1.3961684703826904, "learning_rate": 9.044312026535531e-07, "loss": 0.3746, "step": 12883 }, { "epoch": 0.7730245395092098, "grad_norm": 1.3305270671844482, "learning_rate": 9.039749603708742e-07, "loss": 0.3599, "step": 12884 }, { "epoch": 0.7730845383092338, "grad_norm": 1.2107104063034058, "learning_rate": 9.035188161282328e-07, "loss": 0.3355, "step": 12885 }, { "epoch": 0.7731445371092578, "grad_norm": 1.3580650091171265, "learning_rate": 9.030627699428565e-07, "loss": 0.3543, "step": 12886 }, { "epoch": 0.7732045359092818, "grad_norm": 1.3134303092956543, "learning_rate": 9.026068218319693e-07, "loss": 0.3592, "step": 12887 }, { "epoch": 0.7732645347093058, "grad_norm": 1.355136752128601, "learning_rate": 9.02150971812787e-07, "loss": 0.4291, "step": 12888 }, { "epoch": 0.7733245335093298, "grad_norm": 1.1992466449737549, "learning_rate": 9.016952199025277e-07, "loss": 0.3514, "step": 12889 }, { "epoch": 0.7733845323093538, "grad_norm": 1.3267170190811157, "learning_rate": 9.012395661184011e-07, "loss": 0.4202, "step": 12890 }, { "epoch": 0.7734445311093778, "grad_norm": 1.2142934799194336, "learning_rate": 9.007840104776179e-07, "loss": 0.3422, "step": 12891 }, { "epoch": 0.7735045299094018, "grad_norm": 1.4062920808792114, "learning_rate": 9.003285529973791e-07, "loss": 0.3718, "step": 12892 }, { "epoch": 0.7735645287094258, "grad_norm": 1.2185707092285156, "learning_rate": 8.998731936948864e-07, "loss": 0.3394, "step": 12893 }, { "epoch": 0.7736245275094498, "grad_norm": 1.2514286041259766, "learning_rate": 8.994179325873381e-07, "loss": 0.3236, "step": 12894 }, { "epoch": 0.7736845263094738, "grad_norm": 1.2299867868423462, "learning_rate": 8.989627696919263e-07, "loss": 0.3845, "step": 12895 }, { "epoch": 0.7737445251094978, "grad_norm": 1.3129125833511353, "learning_rate": 8.985077050258403e-07, "loss": 0.3348, "step": 12896 }, { "epoch": 0.7738045239095218, "grad_norm": 1.3227535486221313, "learning_rate": 8.980527386062651e-07, "loss": 0.3547, "step": 12897 }, { "epoch": 0.7738645227095458, "grad_norm": 1.3232338428497314, "learning_rate": 8.975978704503844e-07, "loss": 0.3819, "step": 12898 }, { "epoch": 0.7739245215095698, "grad_norm": 1.2806943655014038, "learning_rate": 8.97143100575376e-07, "loss": 0.3869, "step": 12899 }, { "epoch": 0.7739845203095939, "grad_norm": 1.483825922012329, "learning_rate": 8.966884289984126e-07, "loss": 0.4083, "step": 12900 }, { "epoch": 0.7740445191096178, "grad_norm": 1.2654516696929932, "learning_rate": 8.962338557366677e-07, "loss": 0.3696, "step": 12901 }, { "epoch": 0.7741045179096419, "grad_norm": 1.2871190309524536, "learning_rate": 8.957793808073074e-07, "loss": 0.4106, "step": 12902 }, { "epoch": 0.7741645167096658, "grad_norm": 1.5059914588928223, "learning_rate": 8.953250042274951e-07, "loss": 0.4197, "step": 12903 }, { "epoch": 0.7742245155096898, "grad_norm": 1.509498953819275, "learning_rate": 8.948707260143893e-07, "loss": 0.3993, "step": 12904 }, { "epoch": 0.7742845143097138, "grad_norm": 1.4471412897109985, "learning_rate": 8.944165461851473e-07, "loss": 0.4294, "step": 12905 }, { "epoch": 0.7743445131097378, "grad_norm": 1.3532798290252686, "learning_rate": 8.939624647569236e-07, "loss": 0.3255, "step": 12906 }, { "epoch": 0.7744045119097618, "grad_norm": 1.410019874572754, "learning_rate": 8.935084817468624e-07, "loss": 0.4112, "step": 12907 }, { "epoch": 0.7744645107097858, "grad_norm": 1.3132272958755493, "learning_rate": 8.930545971721118e-07, "loss": 0.3433, "step": 12908 }, { "epoch": 0.7745245095098098, "grad_norm": 1.3123456239700317, "learning_rate": 8.926008110498106e-07, "loss": 0.3365, "step": 12909 }, { "epoch": 0.7745845083098338, "grad_norm": 1.3077601194381714, "learning_rate": 8.921471233970986e-07, "loss": 0.3819, "step": 12910 }, { "epoch": 0.7746445071098578, "grad_norm": 1.5065537691116333, "learning_rate": 8.916935342311085e-07, "loss": 0.377, "step": 12911 }, { "epoch": 0.7747045059098818, "grad_norm": 1.221079707145691, "learning_rate": 8.912400435689693e-07, "loss": 0.4194, "step": 12912 }, { "epoch": 0.7747645047099058, "grad_norm": 1.4724434614181519, "learning_rate": 8.90786651427809e-07, "loss": 0.3818, "step": 12913 }, { "epoch": 0.7748245035099298, "grad_norm": 1.2676838636398315, "learning_rate": 8.903333578247493e-07, "loss": 0.3516, "step": 12914 }, { "epoch": 0.7748845023099538, "grad_norm": 1.3955849409103394, "learning_rate": 8.898801627769089e-07, "loss": 0.3905, "step": 12915 }, { "epoch": 0.7749445011099778, "grad_norm": 1.3091418743133545, "learning_rate": 8.894270663014023e-07, "loss": 0.3551, "step": 12916 }, { "epoch": 0.7750044999100018, "grad_norm": 1.3875234127044678, "learning_rate": 8.889740684153418e-07, "loss": 0.4072, "step": 12917 }, { "epoch": 0.7750644987100258, "grad_norm": 1.265021562576294, "learning_rate": 8.885211691358368e-07, "loss": 0.3322, "step": 12918 }, { "epoch": 0.7751244975100497, "grad_norm": 1.4751781225204468, "learning_rate": 8.880683684799871e-07, "loss": 0.3971, "step": 12919 }, { "epoch": 0.7751844963100738, "grad_norm": 1.5718183517456055, "learning_rate": 8.876156664648964e-07, "loss": 0.3988, "step": 12920 }, { "epoch": 0.7752444951100979, "grad_norm": 1.4248182773590088, "learning_rate": 8.87163063107659e-07, "loss": 0.3838, "step": 12921 }, { "epoch": 0.7753044939101218, "grad_norm": 1.3577568531036377, "learning_rate": 8.867105584253699e-07, "loss": 0.4155, "step": 12922 }, { "epoch": 0.7753644927101458, "grad_norm": 1.2133177518844604, "learning_rate": 8.862581524351165e-07, "loss": 0.3585, "step": 12923 }, { "epoch": 0.7754244915101698, "grad_norm": 1.4162970781326294, "learning_rate": 8.858058451539839e-07, "loss": 0.3879, "step": 12924 }, { "epoch": 0.7754844903101938, "grad_norm": 1.3830657005310059, "learning_rate": 8.853536365990549e-07, "loss": 0.3506, "step": 12925 }, { "epoch": 0.7755444891102178, "grad_norm": 1.2766337394714355, "learning_rate": 8.849015267874067e-07, "loss": 0.3819, "step": 12926 }, { "epoch": 0.7756044879102418, "grad_norm": 1.3596656322479248, "learning_rate": 8.844495157361127e-07, "loss": 0.4266, "step": 12927 }, { "epoch": 0.7756644867102658, "grad_norm": 1.4638265371322632, "learning_rate": 8.839976034622453e-07, "loss": 0.3931, "step": 12928 }, { "epoch": 0.7757244855102898, "grad_norm": 1.4291623830795288, "learning_rate": 8.835457899828696e-07, "loss": 0.4178, "step": 12929 }, { "epoch": 0.7757844843103138, "grad_norm": 1.3231350183486938, "learning_rate": 8.830940753150488e-07, "loss": 0.3993, "step": 12930 }, { "epoch": 0.7758444831103378, "grad_norm": 1.4155937433242798, "learning_rate": 8.826424594758415e-07, "loss": 0.3512, "step": 12931 }, { "epoch": 0.7759044819103618, "grad_norm": 1.3852168321609497, "learning_rate": 8.821909424823048e-07, "loss": 0.3934, "step": 12932 }, { "epoch": 0.7759644807103858, "grad_norm": 1.1851756572723389, "learning_rate": 8.817395243514894e-07, "loss": 0.3401, "step": 12933 }, { "epoch": 0.7760244795104098, "grad_norm": 1.546251654624939, "learning_rate": 8.812882051004423e-07, "loss": 0.3639, "step": 12934 }, { "epoch": 0.7760844783104338, "grad_norm": 1.2507396936416626, "learning_rate": 8.808369847462099e-07, "loss": 0.3689, "step": 12935 }, { "epoch": 0.7761444771104578, "grad_norm": 1.3775686025619507, "learning_rate": 8.803858633058307e-07, "loss": 0.3712, "step": 12936 }, { "epoch": 0.7762044759104818, "grad_norm": 1.282509446144104, "learning_rate": 8.799348407963446e-07, "loss": 0.3913, "step": 12937 }, { "epoch": 0.7762644747105057, "grad_norm": 1.4876055717468262, "learning_rate": 8.794839172347803e-07, "loss": 0.3652, "step": 12938 }, { "epoch": 0.7763244735105298, "grad_norm": 1.3490643501281738, "learning_rate": 8.790330926381698e-07, "loss": 0.3979, "step": 12939 }, { "epoch": 0.7763844723105537, "grad_norm": 1.373842477798462, "learning_rate": 8.785823670235393e-07, "loss": 0.3777, "step": 12940 }, { "epoch": 0.7764444711105778, "grad_norm": 1.3069369792938232, "learning_rate": 8.781317404079093e-07, "loss": 0.3594, "step": 12941 }, { "epoch": 0.7765044699106017, "grad_norm": 1.433976173400879, "learning_rate": 8.776812128082988e-07, "loss": 0.3783, "step": 12942 }, { "epoch": 0.7765644687106258, "grad_norm": 1.252100944519043, "learning_rate": 8.772307842417201e-07, "loss": 0.379, "step": 12943 }, { "epoch": 0.7766244675106498, "grad_norm": 1.444839358329773, "learning_rate": 8.767804547251866e-07, "loss": 0.3981, "step": 12944 }, { "epoch": 0.7766844663106738, "grad_norm": 1.4289519786834717, "learning_rate": 8.76330224275704e-07, "loss": 0.3755, "step": 12945 }, { "epoch": 0.7767444651106978, "grad_norm": 1.3837624788284302, "learning_rate": 8.758800929102744e-07, "loss": 0.3885, "step": 12946 }, { "epoch": 0.7768044639107218, "grad_norm": 1.28825044631958, "learning_rate": 8.754300606458988e-07, "loss": 0.4323, "step": 12947 }, { "epoch": 0.7768644627107458, "grad_norm": 1.350875735282898, "learning_rate": 8.749801274995728e-07, "loss": 0.3543, "step": 12948 }, { "epoch": 0.7769244615107698, "grad_norm": 1.2613649368286133, "learning_rate": 8.745302934882879e-07, "loss": 0.3497, "step": 12949 }, { "epoch": 0.7769844603107938, "grad_norm": 1.2805534601211548, "learning_rate": 8.740805586290307e-07, "loss": 0.3963, "step": 12950 }, { "epoch": 0.7770444591108178, "grad_norm": 1.3723989725112915, "learning_rate": 8.736309229387877e-07, "loss": 0.384, "step": 12951 }, { "epoch": 0.7771044579108418, "grad_norm": 1.3570841550827026, "learning_rate": 8.731813864345396e-07, "loss": 0.3886, "step": 12952 }, { "epoch": 0.7771644567108658, "grad_norm": 1.3554178476333618, "learning_rate": 8.727319491332631e-07, "loss": 0.3911, "step": 12953 }, { "epoch": 0.7772244555108898, "grad_norm": 1.3544095754623413, "learning_rate": 8.722826110519309e-07, "loss": 0.4038, "step": 12954 }, { "epoch": 0.7772844543109138, "grad_norm": 1.2830803394317627, "learning_rate": 8.718333722075117e-07, "loss": 0.4005, "step": 12955 }, { "epoch": 0.7773444531109378, "grad_norm": 1.44031822681427, "learning_rate": 8.713842326169729e-07, "loss": 0.3838, "step": 12956 }, { "epoch": 0.7774044519109617, "grad_norm": 1.4678329229354858, "learning_rate": 8.709351922972762e-07, "loss": 0.3385, "step": 12957 }, { "epoch": 0.7774644507109858, "grad_norm": 1.5390994548797607, "learning_rate": 8.704862512653777e-07, "loss": 0.409, "step": 12958 }, { "epoch": 0.7775244495110097, "grad_norm": 1.233145833015442, "learning_rate": 8.700374095382349e-07, "loss": 0.3307, "step": 12959 }, { "epoch": 0.7775844483110338, "grad_norm": 1.2991670370101929, "learning_rate": 8.695886671327967e-07, "loss": 0.3686, "step": 12960 }, { "epoch": 0.7776444471110577, "grad_norm": 1.2200781106948853, "learning_rate": 8.691400240660108e-07, "loss": 0.3289, "step": 12961 }, { "epoch": 0.7777044459110818, "grad_norm": 1.3402283191680908, "learning_rate": 8.686914803548189e-07, "loss": 0.3864, "step": 12962 }, { "epoch": 0.7777644447111057, "grad_norm": 1.247165560722351, "learning_rate": 8.682430360161612e-07, "loss": 0.3346, "step": 12963 }, { "epoch": 0.7778244435111298, "grad_norm": 1.2071752548217773, "learning_rate": 8.677946910669763e-07, "loss": 0.3889, "step": 12964 }, { "epoch": 0.7778844423111537, "grad_norm": 1.3230098485946655, "learning_rate": 8.673464455241913e-07, "loss": 0.4089, "step": 12965 }, { "epoch": 0.7779444411111778, "grad_norm": 1.4399954080581665, "learning_rate": 8.668982994047382e-07, "loss": 0.355, "step": 12966 }, { "epoch": 0.7780044399112018, "grad_norm": 1.3725889921188354, "learning_rate": 8.664502527255383e-07, "loss": 0.3743, "step": 12967 }, { "epoch": 0.7780644387112258, "grad_norm": 1.3638801574707031, "learning_rate": 8.660023055035154e-07, "loss": 0.3704, "step": 12968 }, { "epoch": 0.7781244375112498, "grad_norm": 1.2754158973693848, "learning_rate": 8.655544577555846e-07, "loss": 0.3692, "step": 12969 }, { "epoch": 0.7781844363112738, "grad_norm": 1.3961620330810547, "learning_rate": 8.651067094986585e-07, "loss": 0.389, "step": 12970 }, { "epoch": 0.7782444351112978, "grad_norm": 1.3175915479660034, "learning_rate": 8.646590607496485e-07, "loss": 0.3517, "step": 12971 }, { "epoch": 0.7783044339113218, "grad_norm": 1.4136896133422852, "learning_rate": 8.642115115254594e-07, "loss": 0.4232, "step": 12972 }, { "epoch": 0.7783644327113458, "grad_norm": 1.4501383304595947, "learning_rate": 8.637640618429927e-07, "loss": 0.3797, "step": 12973 }, { "epoch": 0.7784244315113698, "grad_norm": 1.316245436668396, "learning_rate": 8.633167117191458e-07, "loss": 0.3776, "step": 12974 }, { "epoch": 0.7784844303113938, "grad_norm": 1.3026466369628906, "learning_rate": 8.628694611708147e-07, "loss": 0.3463, "step": 12975 }, { "epoch": 0.7785444291114177, "grad_norm": 1.3094897270202637, "learning_rate": 8.624223102148896e-07, "loss": 0.3364, "step": 12976 }, { "epoch": 0.7786044279114418, "grad_norm": 1.280299425125122, "learning_rate": 8.619752588682562e-07, "loss": 0.3743, "step": 12977 }, { "epoch": 0.7786644267114657, "grad_norm": 1.3733327388763428, "learning_rate": 8.615283071477991e-07, "loss": 0.3717, "step": 12978 }, { "epoch": 0.7787244255114898, "grad_norm": 1.1785763502120972, "learning_rate": 8.610814550703972e-07, "loss": 0.3432, "step": 12979 }, { "epoch": 0.7787844243115137, "grad_norm": 1.2848418951034546, "learning_rate": 8.606347026529249e-07, "loss": 0.3398, "step": 12980 }, { "epoch": 0.7788444231115378, "grad_norm": 1.2307106256484985, "learning_rate": 8.601880499122559e-07, "loss": 0.3584, "step": 12981 }, { "epoch": 0.7789044219115617, "grad_norm": 1.2943297624588013, "learning_rate": 8.597414968652563e-07, "loss": 0.3938, "step": 12982 }, { "epoch": 0.7789644207115858, "grad_norm": 1.357437014579773, "learning_rate": 8.592950435287933e-07, "loss": 0.3741, "step": 12983 }, { "epoch": 0.7790244195116097, "grad_norm": 1.353209137916565, "learning_rate": 8.588486899197237e-07, "loss": 0.4024, "step": 12984 }, { "epoch": 0.7790844183116338, "grad_norm": 1.5181468725204468, "learning_rate": 8.584024360549066e-07, "loss": 0.3743, "step": 12985 }, { "epoch": 0.7791444171116577, "grad_norm": 1.3771153688430786, "learning_rate": 8.579562819511938e-07, "loss": 0.3798, "step": 12986 }, { "epoch": 0.7792044159116818, "grad_norm": 1.2093687057495117, "learning_rate": 8.575102276254361e-07, "loss": 0.3496, "step": 12987 }, { "epoch": 0.7792644147117057, "grad_norm": 1.3590121269226074, "learning_rate": 8.570642730944775e-07, "loss": 0.3402, "step": 12988 }, { "epoch": 0.7793244135117298, "grad_norm": 1.210932731628418, "learning_rate": 8.566184183751598e-07, "loss": 0.3649, "step": 12989 }, { "epoch": 0.7793844123117538, "grad_norm": 1.4226053953170776, "learning_rate": 8.561726634843215e-07, "loss": 0.3655, "step": 12990 }, { "epoch": 0.7794444111117778, "grad_norm": 1.331453561782837, "learning_rate": 8.557270084387969e-07, "loss": 0.354, "step": 12991 }, { "epoch": 0.7795044099118018, "grad_norm": 1.2777858972549438, "learning_rate": 8.552814532554149e-07, "loss": 0.3451, "step": 12992 }, { "epoch": 0.7795644087118258, "grad_norm": 1.2071497440338135, "learning_rate": 8.54835997951004e-07, "loss": 0.3104, "step": 12993 }, { "epoch": 0.7796244075118498, "grad_norm": 1.3877592086791992, "learning_rate": 8.543906425423861e-07, "loss": 0.3962, "step": 12994 }, { "epoch": 0.7796844063118737, "grad_norm": 1.3867477178573608, "learning_rate": 8.539453870463804e-07, "loss": 0.3683, "step": 12995 }, { "epoch": 0.7797444051118978, "grad_norm": 1.3772737979888916, "learning_rate": 8.535002314798012e-07, "loss": 0.3781, "step": 12996 }, { "epoch": 0.7798044039119217, "grad_norm": 1.4191237688064575, "learning_rate": 8.530551758594613e-07, "loss": 0.4103, "step": 12997 }, { "epoch": 0.7798644027119458, "grad_norm": 1.5068234205245972, "learning_rate": 8.526102202021673e-07, "loss": 0.4453, "step": 12998 }, { "epoch": 0.7799244015119697, "grad_norm": 1.2758300304412842, "learning_rate": 8.52165364524725e-07, "loss": 0.3832, "step": 12999 }, { "epoch": 0.7799844003119938, "grad_norm": 1.382976770401001, "learning_rate": 8.517206088439331e-07, "loss": 0.359, "step": 13000 }, { "epoch": 0.7800443991120177, "grad_norm": 1.1984596252441406, "learning_rate": 8.512759531765871e-07, "loss": 0.3462, "step": 13001 }, { "epoch": 0.7801043979120418, "grad_norm": 1.4426352977752686, "learning_rate": 8.508313975394822e-07, "loss": 0.3917, "step": 13002 }, { "epoch": 0.7801643967120657, "grad_norm": 1.1973000764846802, "learning_rate": 8.503869419494056e-07, "loss": 0.3137, "step": 13003 }, { "epoch": 0.7802243955120898, "grad_norm": 1.1933919191360474, "learning_rate": 8.499425864231412e-07, "loss": 0.3701, "step": 13004 }, { "epoch": 0.7802843943121137, "grad_norm": 1.2724741697311401, "learning_rate": 8.494983309774734e-07, "loss": 0.3655, "step": 13005 }, { "epoch": 0.7803443931121378, "grad_norm": 1.3451335430145264, "learning_rate": 8.490541756291778e-07, "loss": 0.3286, "step": 13006 }, { "epoch": 0.7804043919121617, "grad_norm": 1.2915042638778687, "learning_rate": 8.48610120395028e-07, "loss": 0.3733, "step": 13007 }, { "epoch": 0.7804643907121858, "grad_norm": 1.16936194896698, "learning_rate": 8.481661652917937e-07, "loss": 0.3481, "step": 13008 }, { "epoch": 0.7805243895122097, "grad_norm": 1.3544524908065796, "learning_rate": 8.47722310336241e-07, "loss": 0.4041, "step": 13009 }, { "epoch": 0.7805843883122338, "grad_norm": 1.3133214712142944, "learning_rate": 8.472785555451354e-07, "loss": 0.3779, "step": 13010 }, { "epoch": 0.7806443871122578, "grad_norm": 1.1637520790100098, "learning_rate": 8.468349009352306e-07, "loss": 0.3288, "step": 13011 }, { "epoch": 0.7807043859122818, "grad_norm": 1.3561710119247437, "learning_rate": 8.463913465232854e-07, "loss": 0.3701, "step": 13012 }, { "epoch": 0.7807643847123058, "grad_norm": 1.3682708740234375, "learning_rate": 8.459478923260478e-07, "loss": 0.3611, "step": 13013 }, { "epoch": 0.7808243835123297, "grad_norm": 1.2352490425109863, "learning_rate": 8.455045383602685e-07, "loss": 0.3168, "step": 13014 }, { "epoch": 0.7808843823123538, "grad_norm": 1.2109613418579102, "learning_rate": 8.450612846426871e-07, "loss": 0.3187, "step": 13015 }, { "epoch": 0.7809443811123777, "grad_norm": 1.425033450126648, "learning_rate": 8.446181311900449e-07, "loss": 0.3679, "step": 13016 }, { "epoch": 0.7810043799124018, "grad_norm": 1.2646358013153076, "learning_rate": 8.44175078019079e-07, "loss": 0.3462, "step": 13017 }, { "epoch": 0.7810643787124257, "grad_norm": 1.2421544790267944, "learning_rate": 8.437321251465207e-07, "loss": 0.3399, "step": 13018 }, { "epoch": 0.7811243775124498, "grad_norm": 1.3678957223892212, "learning_rate": 8.432892725890976e-07, "loss": 0.3676, "step": 13019 }, { "epoch": 0.7811843763124737, "grad_norm": 1.236520767211914, "learning_rate": 8.428465203635342e-07, "loss": 0.363, "step": 13020 }, { "epoch": 0.7812443751124978, "grad_norm": 1.31830632686615, "learning_rate": 8.424038684865528e-07, "loss": 0.3566, "step": 13021 }, { "epoch": 0.7813043739125217, "grad_norm": 1.260790467262268, "learning_rate": 8.419613169748694e-07, "loss": 0.364, "step": 13022 }, { "epoch": 0.7813643727125458, "grad_norm": 1.23410964012146, "learning_rate": 8.415188658451958e-07, "loss": 0.3887, "step": 13023 }, { "epoch": 0.7814243715125697, "grad_norm": 1.3673419952392578, "learning_rate": 8.410765151142436e-07, "loss": 0.3569, "step": 13024 }, { "epoch": 0.7814843703125938, "grad_norm": 1.2809438705444336, "learning_rate": 8.406342647987174e-07, "loss": 0.399, "step": 13025 }, { "epoch": 0.7815443691126177, "grad_norm": 1.3331561088562012, "learning_rate": 8.401921149153192e-07, "loss": 0.3701, "step": 13026 }, { "epoch": 0.7816043679126418, "grad_norm": 1.2987786531448364, "learning_rate": 8.397500654807456e-07, "loss": 0.3899, "step": 13027 }, { "epoch": 0.7816643667126657, "grad_norm": 1.404889702796936, "learning_rate": 8.393081165116917e-07, "loss": 0.4067, "step": 13028 }, { "epoch": 0.7817243655126898, "grad_norm": 1.3268964290618896, "learning_rate": 8.388662680248506e-07, "loss": 0.3669, "step": 13029 }, { "epoch": 0.7817843643127137, "grad_norm": 1.2982063293457031, "learning_rate": 8.384245200369044e-07, "loss": 0.3967, "step": 13030 }, { "epoch": 0.7818443631127378, "grad_norm": 1.3521842956542969, "learning_rate": 8.379828725645388e-07, "loss": 0.3467, "step": 13031 }, { "epoch": 0.7819043619127617, "grad_norm": 1.4382917881011963, "learning_rate": 8.375413256244309e-07, "loss": 0.347, "step": 13032 }, { "epoch": 0.7819643607127857, "grad_norm": 1.3670014142990112, "learning_rate": 8.370998792332582e-07, "loss": 0.391, "step": 13033 }, { "epoch": 0.7820243595128098, "grad_norm": 1.347015619277954, "learning_rate": 8.366585334076909e-07, "loss": 0.3134, "step": 13034 }, { "epoch": 0.7820843583128337, "grad_norm": 1.3082764148712158, "learning_rate": 8.362172881643956e-07, "loss": 0.3305, "step": 13035 }, { "epoch": 0.7821443571128578, "grad_norm": 1.434064269065857, "learning_rate": 8.357761435200383e-07, "loss": 0.404, "step": 13036 }, { "epoch": 0.7822043559128817, "grad_norm": 1.350813865661621, "learning_rate": 8.353350994912774e-07, "loss": 0.4239, "step": 13037 }, { "epoch": 0.7822643547129058, "grad_norm": 1.437636137008667, "learning_rate": 8.3489415609477e-07, "loss": 0.4116, "step": 13038 }, { "epoch": 0.7823243535129297, "grad_norm": 1.3215100765228271, "learning_rate": 8.344533133471672e-07, "loss": 0.3717, "step": 13039 }, { "epoch": 0.7823843523129538, "grad_norm": 1.2450973987579346, "learning_rate": 8.340125712651194e-07, "loss": 0.3676, "step": 13040 }, { "epoch": 0.7824443511129777, "grad_norm": 1.5136855840682983, "learning_rate": 8.335719298652707e-07, "loss": 0.3549, "step": 13041 }, { "epoch": 0.7825043499130018, "grad_norm": 1.4386489391326904, "learning_rate": 8.331313891642606e-07, "loss": 0.3905, "step": 13042 }, { "epoch": 0.7825643487130257, "grad_norm": 1.3963837623596191, "learning_rate": 8.326909491787292e-07, "loss": 0.3579, "step": 13043 }, { "epoch": 0.7826243475130498, "grad_norm": 1.2934600114822388, "learning_rate": 8.322506099253071e-07, "loss": 0.3743, "step": 13044 }, { "epoch": 0.7826843463130737, "grad_norm": 1.375584363937378, "learning_rate": 8.318103714206268e-07, "loss": 0.3554, "step": 13045 }, { "epoch": 0.7827443451130978, "grad_norm": 1.2600948810577393, "learning_rate": 8.313702336813118e-07, "loss": 0.3908, "step": 13046 }, { "epoch": 0.7828043439131217, "grad_norm": 1.3552072048187256, "learning_rate": 8.309301967239841e-07, "loss": 0.36, "step": 13047 }, { "epoch": 0.7828643427131458, "grad_norm": 1.375577449798584, "learning_rate": 8.304902605652635e-07, "loss": 0.3951, "step": 13048 }, { "epoch": 0.7829243415131697, "grad_norm": 1.4431387186050415, "learning_rate": 8.300504252217638e-07, "loss": 0.3538, "step": 13049 }, { "epoch": 0.7829843403131937, "grad_norm": 1.461739182472229, "learning_rate": 8.296106907100941e-07, "loss": 0.3905, "step": 13050 }, { "epoch": 0.7830443391132177, "grad_norm": 1.5514870882034302, "learning_rate": 8.291710570468632e-07, "loss": 0.4039, "step": 13051 }, { "epoch": 0.7831043379132417, "grad_norm": 1.4727678298950195, "learning_rate": 8.287315242486735e-07, "loss": 0.3849, "step": 13052 }, { "epoch": 0.7831643367132657, "grad_norm": 1.5511786937713623, "learning_rate": 8.282920923321239e-07, "loss": 0.3817, "step": 13053 }, { "epoch": 0.7832243355132897, "grad_norm": 1.2391719818115234, "learning_rate": 8.278527613138086e-07, "loss": 0.4027, "step": 13054 }, { "epoch": 0.7832843343133137, "grad_norm": 1.2983571290969849, "learning_rate": 8.274135312103208e-07, "loss": 0.3892, "step": 13055 }, { "epoch": 0.7833443331133377, "grad_norm": 1.3372502326965332, "learning_rate": 8.269744020382481e-07, "loss": 0.348, "step": 13056 }, { "epoch": 0.7834043319133618, "grad_norm": 1.2390899658203125, "learning_rate": 8.265353738141729e-07, "loss": 0.3674, "step": 13057 }, { "epoch": 0.7834643307133857, "grad_norm": 1.3984956741333008, "learning_rate": 8.260964465546773e-07, "loss": 0.3634, "step": 13058 }, { "epoch": 0.7835243295134098, "grad_norm": 1.2737884521484375, "learning_rate": 8.25657620276336e-07, "loss": 0.404, "step": 13059 }, { "epoch": 0.7835843283134337, "grad_norm": 1.2745379209518433, "learning_rate": 8.252188949957234e-07, "loss": 0.3593, "step": 13060 }, { "epoch": 0.7836443271134578, "grad_norm": 1.3245015144348145, "learning_rate": 8.247802707294053e-07, "loss": 0.3413, "step": 13061 }, { "epoch": 0.7837043259134817, "grad_norm": 1.4651179313659668, "learning_rate": 8.243417474939482e-07, "loss": 0.4069, "step": 13062 }, { "epoch": 0.7837643247135058, "grad_norm": 1.4629123210906982, "learning_rate": 8.239033253059137e-07, "loss": 0.3782, "step": 13063 }, { "epoch": 0.7838243235135297, "grad_norm": 1.2156111001968384, "learning_rate": 8.234650041818586e-07, "loss": 0.3269, "step": 13064 }, { "epoch": 0.7838843223135538, "grad_norm": 1.407307744026184, "learning_rate": 8.230267841383358e-07, "loss": 0.3841, "step": 13065 }, { "epoch": 0.7839443211135777, "grad_norm": 1.3111865520477295, "learning_rate": 8.225886651918943e-07, "loss": 0.4056, "step": 13066 }, { "epoch": 0.7840043199136018, "grad_norm": 1.2792999744415283, "learning_rate": 8.221506473590817e-07, "loss": 0.3761, "step": 13067 }, { "epoch": 0.7840643187136257, "grad_norm": 1.4455671310424805, "learning_rate": 8.21712730656439e-07, "loss": 0.374, "step": 13068 }, { "epoch": 0.7841243175136497, "grad_norm": 1.303188681602478, "learning_rate": 8.212749151005029e-07, "loss": 0.3206, "step": 13069 }, { "epoch": 0.7841843163136737, "grad_norm": 1.4490901231765747, "learning_rate": 8.208372007078102e-07, "loss": 0.3839, "step": 13070 }, { "epoch": 0.7842443151136977, "grad_norm": 1.3740034103393555, "learning_rate": 8.203995874948901e-07, "loss": 0.3609, "step": 13071 }, { "epoch": 0.7843043139137217, "grad_norm": 1.2475312948226929, "learning_rate": 8.199620754782693e-07, "loss": 0.3703, "step": 13072 }, { "epoch": 0.7843643127137457, "grad_norm": 1.3409022092819214, "learning_rate": 8.195246646744697e-07, "loss": 0.3667, "step": 13073 }, { "epoch": 0.7844243115137697, "grad_norm": 1.310485601425171, "learning_rate": 8.190873551000114e-07, "loss": 0.3952, "step": 13074 }, { "epoch": 0.7844843103137937, "grad_norm": 1.3185206651687622, "learning_rate": 8.186501467714116e-07, "loss": 0.3614, "step": 13075 }, { "epoch": 0.7845443091138177, "grad_norm": 1.3689420223236084, "learning_rate": 8.182130397051772e-07, "loss": 0.4034, "step": 13076 }, { "epoch": 0.7846043079138417, "grad_norm": 1.2516849040985107, "learning_rate": 8.177760339178195e-07, "loss": 0.3424, "step": 13077 }, { "epoch": 0.7846643067138658, "grad_norm": 1.3235262632369995, "learning_rate": 8.173391294258396e-07, "loss": 0.3082, "step": 13078 }, { "epoch": 0.7847243055138897, "grad_norm": 1.257352590560913, "learning_rate": 8.169023262457396e-07, "loss": 0.4026, "step": 13079 }, { "epoch": 0.7847843043139138, "grad_norm": 1.4128772020339966, "learning_rate": 8.164656243940146e-07, "loss": 0.3649, "step": 13080 }, { "epoch": 0.7848443031139377, "grad_norm": 1.3957467079162598, "learning_rate": 8.160290238871559e-07, "loss": 0.3707, "step": 13081 }, { "epoch": 0.7849043019139618, "grad_norm": 1.2912260293960571, "learning_rate": 8.155925247416535e-07, "loss": 0.3255, "step": 13082 }, { "epoch": 0.7849643007139857, "grad_norm": 1.3131266832351685, "learning_rate": 8.151561269739918e-07, "loss": 0.4041, "step": 13083 }, { "epoch": 0.7850242995140098, "grad_norm": 1.410682201385498, "learning_rate": 8.147198306006506e-07, "loss": 0.367, "step": 13084 }, { "epoch": 0.7850842983140337, "grad_norm": 1.4066182374954224, "learning_rate": 8.142836356381066e-07, "loss": 0.3867, "step": 13085 }, { "epoch": 0.7851442971140578, "grad_norm": 1.3228561878204346, "learning_rate": 8.138475421028346e-07, "loss": 0.3753, "step": 13086 }, { "epoch": 0.7852042959140817, "grad_norm": 1.3021886348724365, "learning_rate": 8.134115500113029e-07, "loss": 0.4167, "step": 13087 }, { "epoch": 0.7852642947141057, "grad_norm": 1.1965655088424683, "learning_rate": 8.129756593799756e-07, "loss": 0.3487, "step": 13088 }, { "epoch": 0.7853242935141297, "grad_norm": 1.499071478843689, "learning_rate": 8.125398702253169e-07, "loss": 0.3792, "step": 13089 }, { "epoch": 0.7853842923141537, "grad_norm": 1.487583875656128, "learning_rate": 8.12104182563782e-07, "loss": 0.367, "step": 13090 }, { "epoch": 0.7854442911141777, "grad_norm": 1.4582895040512085, "learning_rate": 8.11668596411827e-07, "loss": 0.4006, "step": 13091 }, { "epoch": 0.7855042899142017, "grad_norm": 1.353002667427063, "learning_rate": 8.112331117859015e-07, "loss": 0.3798, "step": 13092 }, { "epoch": 0.7855642887142257, "grad_norm": 1.2815382480621338, "learning_rate": 8.107977287024502e-07, "loss": 0.341, "step": 13093 }, { "epoch": 0.7856242875142497, "grad_norm": 1.226006031036377, "learning_rate": 8.103624471779176e-07, "loss": 0.3967, "step": 13094 }, { "epoch": 0.7856842863142737, "grad_norm": 1.3077894449234009, "learning_rate": 8.099272672287416e-07, "loss": 0.397, "step": 13095 }, { "epoch": 0.7857442851142977, "grad_norm": 1.4785187244415283, "learning_rate": 8.094921888713567e-07, "loss": 0.3868, "step": 13096 }, { "epoch": 0.7858042839143217, "grad_norm": 1.1860319375991821, "learning_rate": 8.090572121221927e-07, "loss": 0.3603, "step": 13097 }, { "epoch": 0.7858642827143457, "grad_norm": 1.2979321479797363, "learning_rate": 8.086223369976792e-07, "loss": 0.3726, "step": 13098 }, { "epoch": 0.7859242815143697, "grad_norm": 1.3462347984313965, "learning_rate": 8.081875635142378e-07, "loss": 0.3886, "step": 13099 }, { "epoch": 0.7859842803143937, "grad_norm": 1.2387468814849854, "learning_rate": 8.077528916882877e-07, "loss": 0.3302, "step": 13100 }, { "epoch": 0.7860442791144178, "grad_norm": 1.301631212234497, "learning_rate": 8.073183215362459e-07, "loss": 0.3356, "step": 13101 }, { "epoch": 0.7861042779144417, "grad_norm": 1.385658860206604, "learning_rate": 8.068838530745233e-07, "loss": 0.3656, "step": 13102 }, { "epoch": 0.7861642767144658, "grad_norm": 1.3837473392486572, "learning_rate": 8.064494863195263e-07, "loss": 0.3803, "step": 13103 }, { "epoch": 0.7862242755144897, "grad_norm": 1.4214177131652832, "learning_rate": 8.060152212876621e-07, "loss": 0.4235, "step": 13104 }, { "epoch": 0.7862842743145138, "grad_norm": 1.3615431785583496, "learning_rate": 8.055810579953276e-07, "loss": 0.3924, "step": 13105 }, { "epoch": 0.7863442731145377, "grad_norm": 1.2282506227493286, "learning_rate": 8.05146996458923e-07, "loss": 0.3827, "step": 13106 }, { "epoch": 0.7864042719145617, "grad_norm": 1.2380379438400269, "learning_rate": 8.047130366948369e-07, "loss": 0.3612, "step": 13107 }, { "epoch": 0.7864642707145857, "grad_norm": 1.272119402885437, "learning_rate": 8.042791787194605e-07, "loss": 0.3526, "step": 13108 }, { "epoch": 0.7865242695146097, "grad_norm": 1.3505525588989258, "learning_rate": 8.038454225491769e-07, "loss": 0.3797, "step": 13109 }, { "epoch": 0.7865842683146337, "grad_norm": 1.5264942646026611, "learning_rate": 8.034117682003687e-07, "loss": 0.3825, "step": 13110 }, { "epoch": 0.7866442671146577, "grad_norm": 1.3014531135559082, "learning_rate": 8.029782156894129e-07, "loss": 0.3427, "step": 13111 }, { "epoch": 0.7867042659146817, "grad_norm": 1.2863798141479492, "learning_rate": 8.02544765032681e-07, "loss": 0.3723, "step": 13112 }, { "epoch": 0.7867642647147057, "grad_norm": 1.384572148323059, "learning_rate": 8.021114162465448e-07, "loss": 0.4342, "step": 13113 }, { "epoch": 0.7868242635147297, "grad_norm": 1.3148428201675415, "learning_rate": 8.016781693473689e-07, "loss": 0.4213, "step": 13114 }, { "epoch": 0.7868842623147537, "grad_norm": 1.1754461526870728, "learning_rate": 8.012450243515143e-07, "loss": 0.301, "step": 13115 }, { "epoch": 0.7869442611147777, "grad_norm": 1.3467952013015747, "learning_rate": 8.008119812753403e-07, "loss": 0.3802, "step": 13116 }, { "epoch": 0.7870042599148017, "grad_norm": 1.3379664421081543, "learning_rate": 8.003790401352004e-07, "loss": 0.3978, "step": 13117 }, { "epoch": 0.7870642587148257, "grad_norm": 1.4589276313781738, "learning_rate": 7.999462009474444e-07, "loss": 0.3892, "step": 13118 }, { "epoch": 0.7871242575148497, "grad_norm": 1.3753265142440796, "learning_rate": 7.995134637284184e-07, "loss": 0.3833, "step": 13119 }, { "epoch": 0.7871842563148737, "grad_norm": 1.3677054643630981, "learning_rate": 7.990808284944663e-07, "loss": 0.3174, "step": 13120 }, { "epoch": 0.7872442551148977, "grad_norm": 1.4246907234191895, "learning_rate": 7.986482952619261e-07, "loss": 0.3928, "step": 13121 }, { "epoch": 0.7873042539149216, "grad_norm": 1.2470284700393677, "learning_rate": 7.982158640471311e-07, "loss": 0.3686, "step": 13122 }, { "epoch": 0.7873642527149457, "grad_norm": 1.3416695594787598, "learning_rate": 7.977835348664149e-07, "loss": 0.3896, "step": 13123 }, { "epoch": 0.7874242515149698, "grad_norm": 1.252021074295044, "learning_rate": 7.973513077361025e-07, "loss": 0.4177, "step": 13124 }, { "epoch": 0.7874842503149937, "grad_norm": 1.443522572517395, "learning_rate": 7.969191826725186e-07, "loss": 0.3831, "step": 13125 }, { "epoch": 0.7875442491150177, "grad_norm": 1.3034876585006714, "learning_rate": 7.96487159691982e-07, "loss": 0.3588, "step": 13126 }, { "epoch": 0.7876042479150417, "grad_norm": 1.3930999040603638, "learning_rate": 7.960552388108074e-07, "loss": 0.3611, "step": 13127 }, { "epoch": 0.7876642467150657, "grad_norm": 1.2516411542892456, "learning_rate": 7.956234200453082e-07, "loss": 0.3372, "step": 13128 }, { "epoch": 0.7877242455150897, "grad_norm": 1.4038366079330444, "learning_rate": 7.951917034117911e-07, "loss": 0.4167, "step": 13129 }, { "epoch": 0.7877842443151137, "grad_norm": 1.5194664001464844, "learning_rate": 7.947600889265607e-07, "loss": 0.4425, "step": 13130 }, { "epoch": 0.7878442431151377, "grad_norm": 1.2274240255355835, "learning_rate": 7.943285766059156e-07, "loss": 0.3579, "step": 13131 }, { "epoch": 0.7879042419151617, "grad_norm": 1.291590690612793, "learning_rate": 7.938971664661539e-07, "loss": 0.3502, "step": 13132 }, { "epoch": 0.7879642407151857, "grad_norm": 1.6275055408477783, "learning_rate": 7.934658585235672e-07, "loss": 0.3757, "step": 13133 }, { "epoch": 0.7880242395152097, "grad_norm": 1.531295657157898, "learning_rate": 7.930346527944434e-07, "loss": 0.3952, "step": 13134 }, { "epoch": 0.7880842383152337, "grad_norm": 1.2885414361953735, "learning_rate": 7.926035492950688e-07, "loss": 0.3387, "step": 13135 }, { "epoch": 0.7881442371152577, "grad_norm": 1.3686281442642212, "learning_rate": 7.921725480417222e-07, "loss": 0.3614, "step": 13136 }, { "epoch": 0.7882042359152817, "grad_norm": 1.4580358266830444, "learning_rate": 7.917416490506837e-07, "loss": 0.3815, "step": 13137 }, { "epoch": 0.7882642347153057, "grad_norm": 1.2849408388137817, "learning_rate": 7.913108523382221e-07, "loss": 0.3545, "step": 13138 }, { "epoch": 0.7883242335153297, "grad_norm": 1.423054814338684, "learning_rate": 7.908801579206092e-07, "loss": 0.3568, "step": 13139 }, { "epoch": 0.7883842323153537, "grad_norm": 1.280476689338684, "learning_rate": 7.904495658141106e-07, "loss": 0.4175, "step": 13140 }, { "epoch": 0.7884442311153776, "grad_norm": 1.2924848794937134, "learning_rate": 7.900190760349873e-07, "loss": 0.3395, "step": 13141 }, { "epoch": 0.7885042299154017, "grad_norm": 1.4131797552108765, "learning_rate": 7.895886885994973e-07, "loss": 0.3488, "step": 13142 }, { "epoch": 0.7885642287154256, "grad_norm": 1.447797417640686, "learning_rate": 7.891584035238925e-07, "loss": 0.3805, "step": 13143 }, { "epoch": 0.7886242275154497, "grad_norm": 1.3038748502731323, "learning_rate": 7.887282208244255e-07, "loss": 0.3461, "step": 13144 }, { "epoch": 0.7886842263154736, "grad_norm": 1.3594751358032227, "learning_rate": 7.88298140517341e-07, "loss": 0.4026, "step": 13145 }, { "epoch": 0.7887442251154977, "grad_norm": 1.376536250114441, "learning_rate": 7.878681626188801e-07, "loss": 0.3283, "step": 13146 }, { "epoch": 0.7888042239155217, "grad_norm": 1.3945224285125732, "learning_rate": 7.874382871452835e-07, "loss": 0.383, "step": 13147 }, { "epoch": 0.7888642227155457, "grad_norm": 1.334775686264038, "learning_rate": 7.870085141127843e-07, "loss": 0.3734, "step": 13148 }, { "epoch": 0.7889242215155697, "grad_norm": 1.266175627708435, "learning_rate": 7.865788435376129e-07, "loss": 0.3925, "step": 13149 }, { "epoch": 0.7889842203155937, "grad_norm": 1.3351318836212158, "learning_rate": 7.861492754359954e-07, "loss": 0.3519, "step": 13150 }, { "epoch": 0.7890442191156177, "grad_norm": 1.3543096780776978, "learning_rate": 7.857198098241557e-07, "loss": 0.3372, "step": 13151 }, { "epoch": 0.7891042179156417, "grad_norm": 1.5391865968704224, "learning_rate": 7.852904467183144e-07, "loss": 0.3747, "step": 13152 }, { "epoch": 0.7891642167156657, "grad_norm": 1.2594630718231201, "learning_rate": 7.848611861346828e-07, "loss": 0.4064, "step": 13153 }, { "epoch": 0.7892242155156897, "grad_norm": 1.3252339363098145, "learning_rate": 7.844320280894749e-07, "loss": 0.3914, "step": 13154 }, { "epoch": 0.7892842143157137, "grad_norm": 1.3519415855407715, "learning_rate": 7.840029725988962e-07, "loss": 0.3813, "step": 13155 }, { "epoch": 0.7893442131157377, "grad_norm": 1.5014667510986328, "learning_rate": 7.835740196791522e-07, "loss": 0.3943, "step": 13156 }, { "epoch": 0.7894042119157617, "grad_norm": 1.5271188020706177, "learning_rate": 7.831451693464415e-07, "loss": 0.4262, "step": 13157 }, { "epoch": 0.7894642107157857, "grad_norm": 1.2276872396469116, "learning_rate": 7.82716421616959e-07, "loss": 0.3804, "step": 13158 }, { "epoch": 0.7895242095158097, "grad_norm": 1.3168140649795532, "learning_rate": 7.822877765068982e-07, "loss": 0.354, "step": 13159 }, { "epoch": 0.7895842083158336, "grad_norm": 1.4125165939331055, "learning_rate": 7.818592340324461e-07, "loss": 0.3812, "step": 13160 }, { "epoch": 0.7896442071158577, "grad_norm": 1.4021601676940918, "learning_rate": 7.81430794209787e-07, "loss": 0.3487, "step": 13161 }, { "epoch": 0.7897042059158816, "grad_norm": 1.4228088855743408, "learning_rate": 7.810024570551003e-07, "loss": 0.3839, "step": 13162 }, { "epoch": 0.7897642047159057, "grad_norm": 1.3450051546096802, "learning_rate": 7.805742225845637e-07, "loss": 0.3687, "step": 13163 }, { "epoch": 0.7898242035159296, "grad_norm": 1.2492625713348389, "learning_rate": 7.801460908143493e-07, "loss": 0.3638, "step": 13164 }, { "epoch": 0.7898842023159537, "grad_norm": 1.3921196460723877, "learning_rate": 7.797180617606243e-07, "loss": 0.4233, "step": 13165 }, { "epoch": 0.7899442011159776, "grad_norm": 1.3545724153518677, "learning_rate": 7.792901354395553e-07, "loss": 0.3677, "step": 13166 }, { "epoch": 0.7900041999160017, "grad_norm": 1.4112521409988403, "learning_rate": 7.788623118673026e-07, "loss": 0.3852, "step": 13167 }, { "epoch": 0.7900641987160257, "grad_norm": 1.2013736963272095, "learning_rate": 7.784345910600221e-07, "loss": 0.3555, "step": 13168 }, { "epoch": 0.7901241975160497, "grad_norm": 1.3395317792892456, "learning_rate": 7.780069730338682e-07, "loss": 0.337, "step": 13169 }, { "epoch": 0.7901841963160737, "grad_norm": 1.2033721208572388, "learning_rate": 7.77579457804989e-07, "loss": 0.354, "step": 13170 }, { "epoch": 0.7902441951160977, "grad_norm": 1.3681763410568237, "learning_rate": 7.771520453895309e-07, "loss": 0.3945, "step": 13171 }, { "epoch": 0.7903041939161217, "grad_norm": 1.2349421977996826, "learning_rate": 7.767247358036354e-07, "loss": 0.3971, "step": 13172 }, { "epoch": 0.7903641927161457, "grad_norm": 1.5115501880645752, "learning_rate": 7.76297529063438e-07, "loss": 0.3839, "step": 13173 }, { "epoch": 0.7904241915161697, "grad_norm": 1.3375533819198608, "learning_rate": 7.758704251850748e-07, "loss": 0.3812, "step": 13174 }, { "epoch": 0.7904841903161937, "grad_norm": 1.33442223072052, "learning_rate": 7.754434241846746e-07, "loss": 0.411, "step": 13175 }, { "epoch": 0.7905441891162177, "grad_norm": 1.226405143737793, "learning_rate": 7.750165260783632e-07, "loss": 0.3505, "step": 13176 }, { "epoch": 0.7906041879162417, "grad_norm": 1.4080426692962646, "learning_rate": 7.745897308822617e-07, "loss": 0.3685, "step": 13177 }, { "epoch": 0.7906641867162657, "grad_norm": 1.3385716676712036, "learning_rate": 7.741630386124902e-07, "loss": 0.3525, "step": 13178 }, { "epoch": 0.7907241855162896, "grad_norm": 1.350109338760376, "learning_rate": 7.737364492851615e-07, "loss": 0.402, "step": 13179 }, { "epoch": 0.7907841843163137, "grad_norm": 1.3540226221084595, "learning_rate": 7.733099629163857e-07, "loss": 0.3649, "step": 13180 }, { "epoch": 0.7908441831163376, "grad_norm": 1.2619853019714355, "learning_rate": 7.728835795222703e-07, "loss": 0.4049, "step": 13181 }, { "epoch": 0.7909041819163617, "grad_norm": 1.1936668157577515, "learning_rate": 7.724572991189168e-07, "loss": 0.3365, "step": 13182 }, { "epoch": 0.7909641807163856, "grad_norm": 1.3847278356552124, "learning_rate": 7.720311217224265e-07, "loss": 0.3905, "step": 13183 }, { "epoch": 0.7910241795164097, "grad_norm": 1.2442997694015503, "learning_rate": 7.716050473488898e-07, "loss": 0.3736, "step": 13184 }, { "epoch": 0.7910841783164336, "grad_norm": 1.3341209888458252, "learning_rate": 7.711790760144001e-07, "loss": 0.3745, "step": 13185 }, { "epoch": 0.7911441771164577, "grad_norm": 1.3749934434890747, "learning_rate": 7.707532077350455e-07, "loss": 0.3969, "step": 13186 }, { "epoch": 0.7912041759164816, "grad_norm": 1.3279883861541748, "learning_rate": 7.703274425269073e-07, "loss": 0.3427, "step": 13187 }, { "epoch": 0.7912641747165057, "grad_norm": 1.3321329355239868, "learning_rate": 7.699017804060655e-07, "loss": 0.3086, "step": 13188 }, { "epoch": 0.7913241735165296, "grad_norm": 1.3848694562911987, "learning_rate": 7.694762213885943e-07, "loss": 0.3785, "step": 13189 }, { "epoch": 0.7913841723165537, "grad_norm": 1.2988332509994507, "learning_rate": 7.69050765490567e-07, "loss": 0.3704, "step": 13190 }, { "epoch": 0.7914441711165777, "grad_norm": 1.3995260000228882, "learning_rate": 7.686254127280498e-07, "loss": 0.394, "step": 13191 }, { "epoch": 0.7915041699166017, "grad_norm": 1.1862564086914062, "learning_rate": 7.682001631171059e-07, "loss": 0.3735, "step": 13192 }, { "epoch": 0.7915641687166257, "grad_norm": 1.354900598526001, "learning_rate": 7.677750166737966e-07, "loss": 0.3675, "step": 13193 }, { "epoch": 0.7916241675166497, "grad_norm": 1.390126347541809, "learning_rate": 7.673499734141771e-07, "loss": 0.3578, "step": 13194 }, { "epoch": 0.7916841663166737, "grad_norm": 1.3699407577514648, "learning_rate": 7.66925033354299e-07, "loss": 0.4066, "step": 13195 }, { "epoch": 0.7917441651166977, "grad_norm": 1.3099861145019531, "learning_rate": 7.665001965102097e-07, "loss": 0.3565, "step": 13196 }, { "epoch": 0.7918041639167217, "grad_norm": 1.3814635276794434, "learning_rate": 7.660754628979541e-07, "loss": 0.3541, "step": 13197 }, { "epoch": 0.7918641627167456, "grad_norm": 1.3613653182983398, "learning_rate": 7.656508325335744e-07, "loss": 0.3548, "step": 13198 }, { "epoch": 0.7919241615167697, "grad_norm": 1.219913363456726, "learning_rate": 7.652263054331035e-07, "loss": 0.3381, "step": 13199 }, { "epoch": 0.7919841603167936, "grad_norm": 1.3248387575149536, "learning_rate": 7.64801881612576e-07, "loss": 0.3674, "step": 13200 }, { "epoch": 0.7920441591168177, "grad_norm": 1.4887884855270386, "learning_rate": 7.643775610880193e-07, "loss": 0.3935, "step": 13201 }, { "epoch": 0.7921041579168416, "grad_norm": 1.3284841775894165, "learning_rate": 7.639533438754605e-07, "loss": 0.3624, "step": 13202 }, { "epoch": 0.7921641567168657, "grad_norm": 1.3598023653030396, "learning_rate": 7.635292299909165e-07, "loss": 0.3459, "step": 13203 }, { "epoch": 0.7922241555168896, "grad_norm": 1.537655234336853, "learning_rate": 7.631052194504061e-07, "loss": 0.3947, "step": 13204 }, { "epoch": 0.7922841543169137, "grad_norm": 1.2877253293991089, "learning_rate": 7.62681312269943e-07, "loss": 0.3743, "step": 13205 }, { "epoch": 0.7923441531169376, "grad_norm": 1.33073890209198, "learning_rate": 7.622575084655361e-07, "loss": 0.3659, "step": 13206 }, { "epoch": 0.7924041519169617, "grad_norm": 1.3214316368103027, "learning_rate": 7.618338080531895e-07, "loss": 0.3543, "step": 13207 }, { "epoch": 0.7924641507169856, "grad_norm": 1.5180909633636475, "learning_rate": 7.614102110489044e-07, "loss": 0.3961, "step": 13208 }, { "epoch": 0.7925241495170097, "grad_norm": 1.3689109086990356, "learning_rate": 7.609867174686793e-07, "loss": 0.3843, "step": 13209 }, { "epoch": 0.7925841483170336, "grad_norm": 1.2841918468475342, "learning_rate": 7.605633273285069e-07, "loss": 0.3537, "step": 13210 }, { "epoch": 0.7926441471170577, "grad_norm": 1.3040075302124023, "learning_rate": 7.601400406443762e-07, "loss": 0.3902, "step": 13211 }, { "epoch": 0.7927041459170816, "grad_norm": 1.3761993646621704, "learning_rate": 7.597168574322742e-07, "loss": 0.4251, "step": 13212 }, { "epoch": 0.7927641447171057, "grad_norm": 1.279784917831421, "learning_rate": 7.592937777081814e-07, "loss": 0.3929, "step": 13213 }, { "epoch": 0.7928241435171297, "grad_norm": 1.320015788078308, "learning_rate": 7.588708014880756e-07, "loss": 0.3406, "step": 13214 }, { "epoch": 0.7928841423171537, "grad_norm": 1.131210207939148, "learning_rate": 7.584479287879317e-07, "loss": 0.3091, "step": 13215 }, { "epoch": 0.7929441411171777, "grad_norm": 1.3882356882095337, "learning_rate": 7.580251596237183e-07, "loss": 0.3869, "step": 13216 }, { "epoch": 0.7930041399172016, "grad_norm": 1.3768291473388672, "learning_rate": 7.576024940114029e-07, "loss": 0.3561, "step": 13217 }, { "epoch": 0.7930641387172257, "grad_norm": 1.1722625494003296, "learning_rate": 7.571799319669473e-07, "loss": 0.3658, "step": 13218 }, { "epoch": 0.7931241375172496, "grad_norm": 1.4178388118743896, "learning_rate": 7.567574735063091e-07, "loss": 0.38, "step": 13219 }, { "epoch": 0.7931841363172737, "grad_norm": 1.2932465076446533, "learning_rate": 7.56335118645442e-07, "loss": 0.3267, "step": 13220 }, { "epoch": 0.7932441351172976, "grad_norm": 1.341729760169983, "learning_rate": 7.559128674002985e-07, "loss": 0.3785, "step": 13221 }, { "epoch": 0.7933041339173217, "grad_norm": 1.255505919456482, "learning_rate": 7.55490719786824e-07, "loss": 0.3823, "step": 13222 }, { "epoch": 0.7933641327173456, "grad_norm": 1.3399333953857422, "learning_rate": 7.550686758209601e-07, "loss": 0.3815, "step": 13223 }, { "epoch": 0.7934241315173697, "grad_norm": 1.3966113328933716, "learning_rate": 7.546467355186473e-07, "loss": 0.3962, "step": 13224 }, { "epoch": 0.7934841303173936, "grad_norm": 1.249202847480774, "learning_rate": 7.542248988958194e-07, "loss": 0.3848, "step": 13225 }, { "epoch": 0.7935441291174177, "grad_norm": 1.290185570716858, "learning_rate": 7.538031659684064e-07, "loss": 0.3402, "step": 13226 }, { "epoch": 0.7936041279174416, "grad_norm": 1.2789782285690308, "learning_rate": 7.533815367523371e-07, "loss": 0.3446, "step": 13227 }, { "epoch": 0.7936641267174657, "grad_norm": 1.3813412189483643, "learning_rate": 7.529600112635329e-07, "loss": 0.392, "step": 13228 }, { "epoch": 0.7937241255174896, "grad_norm": 1.354955792427063, "learning_rate": 7.525385895179154e-07, "loss": 0.3874, "step": 13229 }, { "epoch": 0.7937841243175137, "grad_norm": 1.3389023542404175, "learning_rate": 7.521172715313959e-07, "loss": 0.3768, "step": 13230 }, { "epoch": 0.7938441231175376, "grad_norm": 1.3203309774398804, "learning_rate": 7.516960573198888e-07, "loss": 0.4198, "step": 13231 }, { "epoch": 0.7939041219175617, "grad_norm": 1.317644476890564, "learning_rate": 7.512749468992994e-07, "loss": 0.3176, "step": 13232 }, { "epoch": 0.7939641207175856, "grad_norm": 1.4648526906967163, "learning_rate": 7.508539402855331e-07, "loss": 0.3958, "step": 13233 }, { "epoch": 0.7940241195176097, "grad_norm": 1.282758116722107, "learning_rate": 7.504330374944886e-07, "loss": 0.3846, "step": 13234 }, { "epoch": 0.7940841183176337, "grad_norm": 1.3183666467666626, "learning_rate": 7.5001223854206e-07, "loss": 0.3643, "step": 13235 }, { "epoch": 0.7941441171176576, "grad_norm": 1.4085179567337036, "learning_rate": 7.495915434441412e-07, "loss": 0.399, "step": 13236 }, { "epoch": 0.7942041159176817, "grad_norm": 1.2626014947891235, "learning_rate": 7.491709522166191e-07, "loss": 0.3462, "step": 13237 }, { "epoch": 0.7942641147177056, "grad_norm": 1.4147838354110718, "learning_rate": 7.487504648753765e-07, "loss": 0.3393, "step": 13238 }, { "epoch": 0.7943241135177297, "grad_norm": 1.2998543977737427, "learning_rate": 7.483300814362949e-07, "loss": 0.3636, "step": 13239 }, { "epoch": 0.7943841123177536, "grad_norm": 1.3138291835784912, "learning_rate": 7.479098019152497e-07, "loss": 0.3721, "step": 13240 }, { "epoch": 0.7944441111177777, "grad_norm": 1.189150333404541, "learning_rate": 7.474896263281128e-07, "loss": 0.3392, "step": 13241 }, { "epoch": 0.7945041099178016, "grad_norm": 1.5096193552017212, "learning_rate": 7.470695546907513e-07, "loss": 0.3648, "step": 13242 }, { "epoch": 0.7945641087178257, "grad_norm": 1.3110082149505615, "learning_rate": 7.466495870190306e-07, "loss": 0.3659, "step": 13243 }, { "epoch": 0.7946241075178496, "grad_norm": 1.2710695266723633, "learning_rate": 7.462297233288123e-07, "loss": 0.3856, "step": 13244 }, { "epoch": 0.7946841063178737, "grad_norm": 1.3950926065444946, "learning_rate": 7.458099636359496e-07, "loss": 0.3773, "step": 13245 }, { "epoch": 0.7947441051178976, "grad_norm": 1.3985483646392822, "learning_rate": 7.453903079562975e-07, "loss": 0.4077, "step": 13246 }, { "epoch": 0.7948041039179217, "grad_norm": 1.2385889291763306, "learning_rate": 7.449707563057023e-07, "loss": 0.3763, "step": 13247 }, { "epoch": 0.7948641027179456, "grad_norm": 1.1813950538635254, "learning_rate": 7.445513087000117e-07, "loss": 0.34, "step": 13248 }, { "epoch": 0.7949241015179697, "grad_norm": 1.394366979598999, "learning_rate": 7.441319651550625e-07, "loss": 0.3839, "step": 13249 }, { "epoch": 0.7949841003179936, "grad_norm": 1.3091306686401367, "learning_rate": 7.437127256866933e-07, "loss": 0.3583, "step": 13250 }, { "epoch": 0.7950440991180177, "grad_norm": 1.3018471002578735, "learning_rate": 7.432935903107378e-07, "loss": 0.3805, "step": 13251 }, { "epoch": 0.7951040979180416, "grad_norm": 1.3212190866470337, "learning_rate": 7.428745590430239e-07, "loss": 0.3486, "step": 13252 }, { "epoch": 0.7951640967180656, "grad_norm": 1.4045963287353516, "learning_rate": 7.424556318993764e-07, "loss": 0.3627, "step": 13253 }, { "epoch": 0.7952240955180896, "grad_norm": 1.4696227312088013, "learning_rate": 7.420368088956156e-07, "loss": 0.3279, "step": 13254 }, { "epoch": 0.7952840943181136, "grad_norm": 1.352160930633545, "learning_rate": 7.4161809004756e-07, "loss": 0.3606, "step": 13255 }, { "epoch": 0.7953440931181376, "grad_norm": 1.3592355251312256, "learning_rate": 7.411994753710219e-07, "loss": 0.359, "step": 13256 }, { "epoch": 0.7954040919181616, "grad_norm": 1.3266342878341675, "learning_rate": 7.407809648818098e-07, "loss": 0.3903, "step": 13257 }, { "epoch": 0.7954640907181857, "grad_norm": 1.3206918239593506, "learning_rate": 7.403625585957305e-07, "loss": 0.3865, "step": 13258 }, { "epoch": 0.7955240895182096, "grad_norm": 1.3089966773986816, "learning_rate": 7.399442565285848e-07, "loss": 0.3786, "step": 13259 }, { "epoch": 0.7955840883182337, "grad_norm": 1.3830666542053223, "learning_rate": 7.395260586961693e-07, "loss": 0.3636, "step": 13260 }, { "epoch": 0.7956440871182576, "grad_norm": 1.33157479763031, "learning_rate": 7.391079651142772e-07, "loss": 0.4399, "step": 13261 }, { "epoch": 0.7957040859182817, "grad_norm": 1.4668751955032349, "learning_rate": 7.386899757986985e-07, "loss": 0.3748, "step": 13262 }, { "epoch": 0.7957640847183056, "grad_norm": 1.4055609703063965, "learning_rate": 7.382720907652199e-07, "loss": 0.3593, "step": 13263 }, { "epoch": 0.7958240835183297, "grad_norm": 1.3417199850082397, "learning_rate": 7.378543100296222e-07, "loss": 0.3908, "step": 13264 }, { "epoch": 0.7958840823183536, "grad_norm": 1.2876968383789062, "learning_rate": 7.374366336076826e-07, "loss": 0.3908, "step": 13265 }, { "epoch": 0.7959440811183777, "grad_norm": 1.2830970287322998, "learning_rate": 7.370190615151745e-07, "loss": 0.3985, "step": 13266 }, { "epoch": 0.7960040799184016, "grad_norm": 1.4532378911972046, "learning_rate": 7.366015937678694e-07, "loss": 0.4091, "step": 13267 }, { "epoch": 0.7960640787184257, "grad_norm": 1.3374942541122437, "learning_rate": 7.361842303815318e-07, "loss": 0.3765, "step": 13268 }, { "epoch": 0.7961240775184496, "grad_norm": 1.2868964672088623, "learning_rate": 7.357669713719229e-07, "loss": 0.3503, "step": 13269 }, { "epoch": 0.7961840763184737, "grad_norm": 1.4665273427963257, "learning_rate": 7.353498167548027e-07, "loss": 0.3858, "step": 13270 }, { "epoch": 0.7962440751184976, "grad_norm": 1.2909044027328491, "learning_rate": 7.349327665459245e-07, "loss": 0.359, "step": 13271 }, { "epoch": 0.7963040739185216, "grad_norm": 1.4479695558547974, "learning_rate": 7.345158207610377e-07, "loss": 0.3662, "step": 13272 }, { "epoch": 0.7963640727185456, "grad_norm": 1.308046817779541, "learning_rate": 7.34098979415888e-07, "loss": 0.3495, "step": 13273 }, { "epoch": 0.7964240715185696, "grad_norm": 1.3341734409332275, "learning_rate": 7.336822425262185e-07, "loss": 0.3758, "step": 13274 }, { "epoch": 0.7964840703185936, "grad_norm": 1.37309730052948, "learning_rate": 7.332656101077694e-07, "loss": 0.3898, "step": 13275 }, { "epoch": 0.7965440691186176, "grad_norm": 1.3009071350097656, "learning_rate": 7.328490821762708e-07, "loss": 0.4502, "step": 13276 }, { "epoch": 0.7966040679186416, "grad_norm": 1.3574588298797607, "learning_rate": 7.324326587474565e-07, "loss": 0.4054, "step": 13277 }, { "epoch": 0.7966640667186656, "grad_norm": 1.4454619884490967, "learning_rate": 7.320163398370507e-07, "loss": 0.3636, "step": 13278 }, { "epoch": 0.7967240655186896, "grad_norm": 1.3223427534103394, "learning_rate": 7.316001254607776e-07, "loss": 0.41, "step": 13279 }, { "epoch": 0.7967840643187136, "grad_norm": 1.4269561767578125, "learning_rate": 7.31184015634355e-07, "loss": 0.4277, "step": 13280 }, { "epoch": 0.7968440631187377, "grad_norm": 1.3692660331726074, "learning_rate": 7.307680103734963e-07, "loss": 0.3127, "step": 13281 }, { "epoch": 0.7969040619187616, "grad_norm": 1.2902741432189941, "learning_rate": 7.303521096939145e-07, "loss": 0.3525, "step": 13282 }, { "epoch": 0.7969640607187857, "grad_norm": 1.2418107986450195, "learning_rate": 7.29936313611315e-07, "loss": 0.3619, "step": 13283 }, { "epoch": 0.7970240595188096, "grad_norm": 1.3987733125686646, "learning_rate": 7.295206221413996e-07, "loss": 0.3959, "step": 13284 }, { "epoch": 0.7970840583188337, "grad_norm": 1.3474160432815552, "learning_rate": 7.291050352998689e-07, "loss": 0.3695, "step": 13285 }, { "epoch": 0.7971440571188576, "grad_norm": 1.3942795991897583, "learning_rate": 7.286895531024168e-07, "loss": 0.395, "step": 13286 }, { "epoch": 0.7972040559188817, "grad_norm": 1.4240745306015015, "learning_rate": 7.282741755647343e-07, "loss": 0.3886, "step": 13287 }, { "epoch": 0.7972640547189056, "grad_norm": 1.436879277229309, "learning_rate": 7.278589027025075e-07, "loss": 0.3562, "step": 13288 }, { "epoch": 0.7973240535189297, "grad_norm": 1.3839460611343384, "learning_rate": 7.274437345314206e-07, "loss": 0.3721, "step": 13289 }, { "epoch": 0.7973840523189536, "grad_norm": 1.2886056900024414, "learning_rate": 7.270286710671525e-07, "loss": 0.3497, "step": 13290 }, { "epoch": 0.7974440511189776, "grad_norm": 1.4027111530303955, "learning_rate": 7.266137123253768e-07, "loss": 0.3518, "step": 13291 }, { "epoch": 0.7975040499190016, "grad_norm": 1.4083547592163086, "learning_rate": 7.261988583217664e-07, "loss": 0.368, "step": 13292 }, { "epoch": 0.7975640487190256, "grad_norm": 1.3132585287094116, "learning_rate": 7.257841090719871e-07, "loss": 0.3827, "step": 13293 }, { "epoch": 0.7976240475190496, "grad_norm": 1.2880675792694092, "learning_rate": 7.253694645917045e-07, "loss": 0.4203, "step": 13294 }, { "epoch": 0.7976840463190736, "grad_norm": 1.4259506464004517, "learning_rate": 7.249549248965741e-07, "loss": 0.3952, "step": 13295 }, { "epoch": 0.7977440451190976, "grad_norm": 1.3274906873703003, "learning_rate": 7.245404900022532e-07, "loss": 0.3864, "step": 13296 }, { "epoch": 0.7978040439191216, "grad_norm": 1.3079618215560913, "learning_rate": 7.24126159924394e-07, "loss": 0.3442, "step": 13297 }, { "epoch": 0.7978640427191456, "grad_norm": 1.5450254678726196, "learning_rate": 7.237119346786427e-07, "loss": 0.3693, "step": 13298 }, { "epoch": 0.7979240415191696, "grad_norm": 1.3276187181472778, "learning_rate": 7.232978142806428e-07, "loss": 0.3504, "step": 13299 }, { "epoch": 0.7979840403191936, "grad_norm": 1.380534291267395, "learning_rate": 7.228837987460333e-07, "loss": 0.3587, "step": 13300 }, { "epoch": 0.7980440391192176, "grad_norm": 1.1696771383285522, "learning_rate": 7.224698880904507e-07, "loss": 0.3809, "step": 13301 }, { "epoch": 0.7981040379192416, "grad_norm": 1.3728183507919312, "learning_rate": 7.220560823295263e-07, "loss": 0.3998, "step": 13302 }, { "epoch": 0.7981640367192656, "grad_norm": 1.462113857269287, "learning_rate": 7.216423814788863e-07, "loss": 0.3969, "step": 13303 }, { "epoch": 0.7982240355192897, "grad_norm": 1.3716120719909668, "learning_rate": 7.212287855541567e-07, "loss": 0.41, "step": 13304 }, { "epoch": 0.7982840343193136, "grad_norm": 1.325188398361206, "learning_rate": 7.208152945709552e-07, "loss": 0.3557, "step": 13305 }, { "epoch": 0.7983440331193377, "grad_norm": 1.2943886518478394, "learning_rate": 7.204019085448987e-07, "loss": 0.3971, "step": 13306 }, { "epoch": 0.7984040319193616, "grad_norm": 1.2904510498046875, "learning_rate": 7.199886274915971e-07, "loss": 0.3502, "step": 13307 }, { "epoch": 0.7984640307193857, "grad_norm": 1.3320822715759277, "learning_rate": 7.19575451426659e-07, "loss": 0.3735, "step": 13308 }, { "epoch": 0.7985240295194096, "grad_norm": 1.3214194774627686, "learning_rate": 7.1916238036569e-07, "loss": 0.4254, "step": 13309 }, { "epoch": 0.7985840283194336, "grad_norm": 1.2068508863449097, "learning_rate": 7.187494143242883e-07, "loss": 0.3568, "step": 13310 }, { "epoch": 0.7986440271194576, "grad_norm": 1.1333820819854736, "learning_rate": 7.183365533180495e-07, "loss": 0.3271, "step": 13311 }, { "epoch": 0.7987040259194816, "grad_norm": 1.4034910202026367, "learning_rate": 7.179237973625655e-07, "loss": 0.3841, "step": 13312 }, { "epoch": 0.7987640247195056, "grad_norm": 1.2617946863174438, "learning_rate": 7.17511146473425e-07, "loss": 0.3572, "step": 13313 }, { "epoch": 0.7988240235195296, "grad_norm": 1.3526084423065186, "learning_rate": 7.170986006662117e-07, "loss": 0.3675, "step": 13314 }, { "epoch": 0.7988840223195536, "grad_norm": 1.378179907798767, "learning_rate": 7.166861599565045e-07, "loss": 0.3695, "step": 13315 }, { "epoch": 0.7989440211195776, "grad_norm": 1.1940464973449707, "learning_rate": 7.162738243598811e-07, "loss": 0.3403, "step": 13316 }, { "epoch": 0.7990040199196016, "grad_norm": 1.4220861196517944, "learning_rate": 7.158615938919131e-07, "loss": 0.3548, "step": 13317 }, { "epoch": 0.7990640187196256, "grad_norm": 1.2954033613204956, "learning_rate": 7.154494685681676e-07, "loss": 0.3452, "step": 13318 }, { "epoch": 0.7991240175196496, "grad_norm": 1.299458384513855, "learning_rate": 7.150374484042087e-07, "loss": 0.4003, "step": 13319 }, { "epoch": 0.7991840163196736, "grad_norm": 1.2520792484283447, "learning_rate": 7.146255334155973e-07, "loss": 0.4035, "step": 13320 }, { "epoch": 0.7992440151196976, "grad_norm": 1.20438551902771, "learning_rate": 7.14213723617891e-07, "loss": 0.3562, "step": 13321 }, { "epoch": 0.7993040139197216, "grad_norm": 1.4278435707092285, "learning_rate": 7.138020190266383e-07, "loss": 0.3591, "step": 13322 }, { "epoch": 0.7993640127197456, "grad_norm": 1.3376249074935913, "learning_rate": 7.133904196573908e-07, "loss": 0.3955, "step": 13323 }, { "epoch": 0.7994240115197696, "grad_norm": 1.23543119430542, "learning_rate": 7.129789255256902e-07, "loss": 0.3247, "step": 13324 }, { "epoch": 0.7994840103197937, "grad_norm": 1.3969630002975464, "learning_rate": 7.125675366470788e-07, "loss": 0.3872, "step": 13325 }, { "epoch": 0.7995440091198176, "grad_norm": 1.4870386123657227, "learning_rate": 7.121562530370924e-07, "loss": 0.3907, "step": 13326 }, { "epoch": 0.7996040079198417, "grad_norm": 1.2703969478607178, "learning_rate": 7.117450747112614e-07, "loss": 0.3568, "step": 13327 }, { "epoch": 0.7996640067198656, "grad_norm": 1.2982157468795776, "learning_rate": 7.113340016851173e-07, "loss": 0.3641, "step": 13328 }, { "epoch": 0.7997240055198896, "grad_norm": 1.326983094215393, "learning_rate": 7.109230339741823e-07, "loss": 0.3564, "step": 13329 }, { "epoch": 0.7997840043199136, "grad_norm": 1.4877755641937256, "learning_rate": 7.105121715939775e-07, "loss": 0.368, "step": 13330 }, { "epoch": 0.7998440031199376, "grad_norm": 1.3994412422180176, "learning_rate": 7.101014145600181e-07, "loss": 0.4028, "step": 13331 }, { "epoch": 0.7999040019199616, "grad_norm": 1.296735167503357, "learning_rate": 7.096907628878184e-07, "loss": 0.4071, "step": 13332 }, { "epoch": 0.7999640007199856, "grad_norm": 1.5331013202667236, "learning_rate": 7.092802165928862e-07, "loss": 0.4094, "step": 13333 }, { "epoch": 0.8000239995200096, "grad_norm": 1.3881443738937378, "learning_rate": 7.088697756907248e-07, "loss": 0.3723, "step": 13334 }, { "epoch": 0.8000839983200336, "grad_norm": 1.3419885635375977, "learning_rate": 7.084594401968364e-07, "loss": 0.3771, "step": 13335 }, { "epoch": 0.8001439971200576, "grad_norm": 1.3413276672363281, "learning_rate": 7.080492101267166e-07, "loss": 0.3861, "step": 13336 }, { "epoch": 0.8002039959200816, "grad_norm": 1.2534734010696411, "learning_rate": 7.076390854958572e-07, "loss": 0.3323, "step": 13337 }, { "epoch": 0.8002639947201056, "grad_norm": 1.2859973907470703, "learning_rate": 7.072290663197485e-07, "loss": 0.3848, "step": 13338 }, { "epoch": 0.8003239935201296, "grad_norm": 1.3259458541870117, "learning_rate": 7.068191526138735e-07, "loss": 0.3883, "step": 13339 }, { "epoch": 0.8003839923201536, "grad_norm": 1.2497092485427856, "learning_rate": 7.064093443937149e-07, "loss": 0.3768, "step": 13340 }, { "epoch": 0.8004439911201776, "grad_norm": 1.3232017755508423, "learning_rate": 7.05999641674746e-07, "loss": 0.3628, "step": 13341 }, { "epoch": 0.8005039899202016, "grad_norm": 1.3811556100845337, "learning_rate": 7.055900444724423e-07, "loss": 0.3544, "step": 13342 }, { "epoch": 0.8005639887202256, "grad_norm": 1.5079290866851807, "learning_rate": 7.051805528022703e-07, "loss": 0.3738, "step": 13343 }, { "epoch": 0.8006239875202495, "grad_norm": 1.5254535675048828, "learning_rate": 7.047711666796967e-07, "loss": 0.3877, "step": 13344 }, { "epoch": 0.8006839863202736, "grad_norm": 1.1444491147994995, "learning_rate": 7.043618861201806e-07, "loss": 0.3376, "step": 13345 }, { "epoch": 0.8007439851202975, "grad_norm": 1.1838326454162598, "learning_rate": 7.039527111391789e-07, "loss": 0.2977, "step": 13346 }, { "epoch": 0.8008039839203216, "grad_norm": 1.3889812231063843, "learning_rate": 7.035436417521453e-07, "loss": 0.3727, "step": 13347 }, { "epoch": 0.8008639827203456, "grad_norm": 1.3271843194961548, "learning_rate": 7.031346779745273e-07, "loss": 0.3645, "step": 13348 }, { "epoch": 0.8009239815203696, "grad_norm": 1.3401442766189575, "learning_rate": 7.027258198217695e-07, "loss": 0.4041, "step": 13349 }, { "epoch": 0.8009839803203936, "grad_norm": 1.3747799396514893, "learning_rate": 7.023170673093138e-07, "loss": 0.3665, "step": 13350 }, { "epoch": 0.8010439791204176, "grad_norm": 1.3483535051345825, "learning_rate": 7.019084204525957e-07, "loss": 0.3664, "step": 13351 }, { "epoch": 0.8011039779204416, "grad_norm": 1.4693630933761597, "learning_rate": 7.014998792670501e-07, "loss": 0.3592, "step": 13352 }, { "epoch": 0.8011639767204656, "grad_norm": 1.2052233219146729, "learning_rate": 7.010914437681021e-07, "loss": 0.3445, "step": 13353 }, { "epoch": 0.8012239755204896, "grad_norm": 1.2872133255004883, "learning_rate": 7.006831139711797e-07, "loss": 0.348, "step": 13354 }, { "epoch": 0.8012839743205136, "grad_norm": 1.4148372411727905, "learning_rate": 7.002748898917014e-07, "loss": 0.393, "step": 13355 }, { "epoch": 0.8013439731205376, "grad_norm": 2.4279298782348633, "learning_rate": 6.998667715450858e-07, "loss": 0.3561, "step": 13356 }, { "epoch": 0.8014039719205616, "grad_norm": 1.3542087078094482, "learning_rate": 6.994587589467451e-07, "loss": 0.3513, "step": 13357 }, { "epoch": 0.8014639707205856, "grad_norm": 1.3289885520935059, "learning_rate": 6.99050852112087e-07, "loss": 0.3562, "step": 13358 }, { "epoch": 0.8015239695206096, "grad_norm": 1.3363914489746094, "learning_rate": 6.986430510565176e-07, "loss": 0.3839, "step": 13359 }, { "epoch": 0.8015839683206336, "grad_norm": 1.32133150100708, "learning_rate": 6.982353557954377e-07, "loss": 0.34, "step": 13360 }, { "epoch": 0.8016439671206576, "grad_norm": 1.2849677801132202, "learning_rate": 6.978277663442421e-07, "loss": 0.3687, "step": 13361 }, { "epoch": 0.8017039659206816, "grad_norm": 1.4359833002090454, "learning_rate": 6.974202827183266e-07, "loss": 0.382, "step": 13362 }, { "epoch": 0.8017639647207055, "grad_norm": 1.3346116542816162, "learning_rate": 6.97012904933078e-07, "loss": 0.3716, "step": 13363 }, { "epoch": 0.8018239635207296, "grad_norm": 1.2905079126358032, "learning_rate": 6.966056330038819e-07, "loss": 0.3899, "step": 13364 }, { "epoch": 0.8018839623207535, "grad_norm": 1.2168221473693848, "learning_rate": 6.961984669461181e-07, "loss": 0.3295, "step": 13365 }, { "epoch": 0.8019439611207776, "grad_norm": 1.2284388542175293, "learning_rate": 6.957914067751636e-07, "loss": 0.3547, "step": 13366 }, { "epoch": 0.8020039599208015, "grad_norm": 1.317991018295288, "learning_rate": 6.95384452506394e-07, "loss": 0.3923, "step": 13367 }, { "epoch": 0.8020639587208256, "grad_norm": 1.2352149486541748, "learning_rate": 6.949776041551739e-07, "loss": 0.3768, "step": 13368 }, { "epoch": 0.8021239575208495, "grad_norm": 1.2499293088912964, "learning_rate": 6.945708617368708e-07, "loss": 0.3253, "step": 13369 }, { "epoch": 0.8021839563208736, "grad_norm": 1.333640694618225, "learning_rate": 6.94164225266844e-07, "loss": 0.348, "step": 13370 }, { "epoch": 0.8022439551208976, "grad_norm": 1.566837191581726, "learning_rate": 6.937576947604528e-07, "loss": 0.4193, "step": 13371 }, { "epoch": 0.8023039539209216, "grad_norm": 1.3122144937515259, "learning_rate": 6.933512702330465e-07, "loss": 0.3941, "step": 13372 }, { "epoch": 0.8023639527209456, "grad_norm": 1.2206047773361206, "learning_rate": 6.929449516999754e-07, "loss": 0.3516, "step": 13373 }, { "epoch": 0.8024239515209696, "grad_norm": 1.2382917404174805, "learning_rate": 6.925387391765858e-07, "loss": 0.3953, "step": 13374 }, { "epoch": 0.8024839503209936, "grad_norm": 1.326558232307434, "learning_rate": 6.92132632678217e-07, "loss": 0.3775, "step": 13375 }, { "epoch": 0.8025439491210176, "grad_norm": 1.2954206466674805, "learning_rate": 6.917266322202061e-07, "loss": 0.3697, "step": 13376 }, { "epoch": 0.8026039479210416, "grad_norm": 1.260903239250183, "learning_rate": 6.913207378178852e-07, "loss": 0.3764, "step": 13377 }, { "epoch": 0.8026639467210656, "grad_norm": 1.3371106386184692, "learning_rate": 6.909149494865842e-07, "loss": 0.3988, "step": 13378 }, { "epoch": 0.8027239455210896, "grad_norm": 1.389039158821106, "learning_rate": 6.90509267241628e-07, "loss": 0.383, "step": 13379 }, { "epoch": 0.8027839443211136, "grad_norm": 1.3036047220230103, "learning_rate": 6.901036910983352e-07, "loss": 0.41, "step": 13380 }, { "epoch": 0.8028439431211376, "grad_norm": 1.4393327236175537, "learning_rate": 6.896982210720255e-07, "loss": 0.4381, "step": 13381 }, { "epoch": 0.8029039419211615, "grad_norm": 1.3716301918029785, "learning_rate": 6.892928571780098e-07, "loss": 0.3637, "step": 13382 }, { "epoch": 0.8029639407211856, "grad_norm": 1.1647409200668335, "learning_rate": 6.888875994315978e-07, "loss": 0.2858, "step": 13383 }, { "epoch": 0.8030239395212095, "grad_norm": 1.3190199136734009, "learning_rate": 6.884824478480927e-07, "loss": 0.3985, "step": 13384 }, { "epoch": 0.8030839383212336, "grad_norm": 1.273659110069275, "learning_rate": 6.880774024427962e-07, "loss": 0.3629, "step": 13385 }, { "epoch": 0.8031439371212575, "grad_norm": 1.1658881902694702, "learning_rate": 6.876724632310071e-07, "loss": 0.3805, "step": 13386 }, { "epoch": 0.8032039359212816, "grad_norm": 1.1937179565429688, "learning_rate": 6.872676302280142e-07, "loss": 0.3334, "step": 13387 }, { "epoch": 0.8032639347213055, "grad_norm": 1.3106337785720825, "learning_rate": 6.868629034491093e-07, "loss": 0.3715, "step": 13388 }, { "epoch": 0.8033239335213296, "grad_norm": 1.4271423816680908, "learning_rate": 6.864582829095749e-07, "loss": 0.4104, "step": 13389 }, { "epoch": 0.8033839323213535, "grad_norm": 1.394730567932129, "learning_rate": 6.860537686246936e-07, "loss": 0.4413, "step": 13390 }, { "epoch": 0.8034439311213776, "grad_norm": 1.3880106210708618, "learning_rate": 6.85649360609741e-07, "loss": 0.3763, "step": 13391 }, { "epoch": 0.8035039299214016, "grad_norm": 1.3865891695022583, "learning_rate": 6.852450588799891e-07, "loss": 0.3866, "step": 13392 }, { "epoch": 0.8035639287214256, "grad_norm": 1.2962197065353394, "learning_rate": 6.848408634507079e-07, "loss": 0.3123, "step": 13393 }, { "epoch": 0.8036239275214496, "grad_norm": 1.3392804861068726, "learning_rate": 6.844367743371618e-07, "loss": 0.4056, "step": 13394 }, { "epoch": 0.8036839263214736, "grad_norm": 1.3570436239242554, "learning_rate": 6.840327915546111e-07, "loss": 0.3822, "step": 13395 }, { "epoch": 0.8037439251214976, "grad_norm": 1.2920695543289185, "learning_rate": 6.836289151183114e-07, "loss": 0.3395, "step": 13396 }, { "epoch": 0.8038039239215216, "grad_norm": 1.3644630908966064, "learning_rate": 6.83225145043516e-07, "loss": 0.3328, "step": 13397 }, { "epoch": 0.8038639227215456, "grad_norm": 1.300100564956665, "learning_rate": 6.828214813454757e-07, "loss": 0.3436, "step": 13398 }, { "epoch": 0.8039239215215696, "grad_norm": 1.4266096353530884, "learning_rate": 6.824179240394312e-07, "loss": 0.3417, "step": 13399 }, { "epoch": 0.8039839203215936, "grad_norm": 1.457216739654541, "learning_rate": 6.820144731406259e-07, "loss": 0.3927, "step": 13400 }, { "epoch": 0.8040439191216175, "grad_norm": 1.3644648790359497, "learning_rate": 6.816111286642938e-07, "loss": 0.3773, "step": 13401 }, { "epoch": 0.8041039179216416, "grad_norm": 1.3720065355300903, "learning_rate": 6.812078906256704e-07, "loss": 0.3469, "step": 13402 }, { "epoch": 0.8041639167216655, "grad_norm": 1.3658002614974976, "learning_rate": 6.808047590399821e-07, "loss": 0.3643, "step": 13403 }, { "epoch": 0.8042239155216896, "grad_norm": 1.4035334587097168, "learning_rate": 6.80401733922453e-07, "loss": 0.3981, "step": 13404 }, { "epoch": 0.8042839143217135, "grad_norm": 1.4786945581436157, "learning_rate": 6.799988152883055e-07, "loss": 0.38, "step": 13405 }, { "epoch": 0.8043439131217376, "grad_norm": 1.3792482614517212, "learning_rate": 6.795960031527546e-07, "loss": 0.364, "step": 13406 }, { "epoch": 0.8044039119217615, "grad_norm": 1.267408013343811, "learning_rate": 6.791932975310122e-07, "loss": 0.3832, "step": 13407 }, { "epoch": 0.8044639107217856, "grad_norm": 1.2415636777877808, "learning_rate": 6.787906984382881e-07, "loss": 0.3489, "step": 13408 }, { "epoch": 0.8045239095218095, "grad_norm": 1.4668391942977905, "learning_rate": 6.783882058897861e-07, "loss": 0.4023, "step": 13409 }, { "epoch": 0.8045839083218336, "grad_norm": 1.4338489770889282, "learning_rate": 6.779858199007064e-07, "loss": 0.3831, "step": 13410 }, { "epoch": 0.8046439071218575, "grad_norm": 1.3883880376815796, "learning_rate": 6.775835404862441e-07, "loss": 0.3715, "step": 13411 }, { "epoch": 0.8047039059218816, "grad_norm": 1.3086212873458862, "learning_rate": 6.771813676615936e-07, "loss": 0.339, "step": 13412 }, { "epoch": 0.8047639047219055, "grad_norm": 1.2494637966156006, "learning_rate": 6.767793014419419e-07, "loss": 0.3533, "step": 13413 }, { "epoch": 0.8048239035219296, "grad_norm": 1.293817162513733, "learning_rate": 6.763773418424728e-07, "loss": 0.3647, "step": 13414 }, { "epoch": 0.8048839023219536, "grad_norm": 1.4005111455917358, "learning_rate": 6.759754888783681e-07, "loss": 0.4337, "step": 13415 }, { "epoch": 0.8049439011219776, "grad_norm": 1.3571341037750244, "learning_rate": 6.755737425648015e-07, "loss": 0.3665, "step": 13416 }, { "epoch": 0.8050038999220016, "grad_norm": 1.3781354427337646, "learning_rate": 6.751721029169489e-07, "loss": 0.3578, "step": 13417 }, { "epoch": 0.8050638987220256, "grad_norm": 1.564483642578125, "learning_rate": 6.747705699499743e-07, "loss": 0.3902, "step": 13418 }, { "epoch": 0.8051238975220496, "grad_norm": 1.3228130340576172, "learning_rate": 6.743691436790432e-07, "loss": 0.3941, "step": 13419 }, { "epoch": 0.8051838963220735, "grad_norm": 1.4579122066497803, "learning_rate": 6.739678241193172e-07, "loss": 0.3761, "step": 13420 }, { "epoch": 0.8052438951220976, "grad_norm": 1.3962684869766235, "learning_rate": 6.73566611285951e-07, "loss": 0.3724, "step": 13421 }, { "epoch": 0.8053038939221215, "grad_norm": 1.3536256551742554, "learning_rate": 6.731655051940967e-07, "loss": 0.4174, "step": 13422 }, { "epoch": 0.8053638927221456, "grad_norm": 1.5763953924179077, "learning_rate": 6.727645058589015e-07, "loss": 0.4024, "step": 13423 }, { "epoch": 0.8054238915221695, "grad_norm": 1.5261180400848389, "learning_rate": 6.723636132955107e-07, "loss": 0.3769, "step": 13424 }, { "epoch": 0.8054838903221936, "grad_norm": 1.6018092632293701, "learning_rate": 6.719628275190635e-07, "loss": 0.4182, "step": 13425 }, { "epoch": 0.8055438891222175, "grad_norm": 1.439759612083435, "learning_rate": 6.71562148544695e-07, "loss": 0.3671, "step": 13426 }, { "epoch": 0.8056038879222416, "grad_norm": 1.2947765588760376, "learning_rate": 6.711615763875387e-07, "loss": 0.3876, "step": 13427 }, { "epoch": 0.8056638867222655, "grad_norm": 1.1973352432250977, "learning_rate": 6.707611110627216e-07, "loss": 0.3681, "step": 13428 }, { "epoch": 0.8057238855222896, "grad_norm": 1.2796745300292969, "learning_rate": 6.703607525853675e-07, "loss": 0.3559, "step": 13429 }, { "epoch": 0.8057838843223135, "grad_norm": 1.293638825416565, "learning_rate": 6.69960500970595e-07, "loss": 0.3264, "step": 13430 }, { "epoch": 0.8058438831223376, "grad_norm": 1.3055636882781982, "learning_rate": 6.695603562335203e-07, "loss": 0.3558, "step": 13431 }, { "epoch": 0.8059038819223615, "grad_norm": 1.2931116819381714, "learning_rate": 6.691603183892569e-07, "loss": 0.3409, "step": 13432 }, { "epoch": 0.8059638807223856, "grad_norm": 1.2125344276428223, "learning_rate": 6.687603874529108e-07, "loss": 0.3909, "step": 13433 }, { "epoch": 0.8060238795224095, "grad_norm": 1.221069574356079, "learning_rate": 6.683605634395859e-07, "loss": 0.3655, "step": 13434 }, { "epoch": 0.8060838783224336, "grad_norm": 1.3153525590896606, "learning_rate": 6.679608463643805e-07, "loss": 0.3399, "step": 13435 }, { "epoch": 0.8061438771224575, "grad_norm": 1.3841400146484375, "learning_rate": 6.675612362423923e-07, "loss": 0.3658, "step": 13436 }, { "epoch": 0.8062038759224815, "grad_norm": 1.2822210788726807, "learning_rate": 6.671617330887115e-07, "loss": 0.3669, "step": 13437 }, { "epoch": 0.8062638747225056, "grad_norm": 1.2925411462783813, "learning_rate": 6.66762336918425e-07, "loss": 0.4058, "step": 13438 }, { "epoch": 0.8063238735225295, "grad_norm": 1.2398487329483032, "learning_rate": 6.663630477466176e-07, "loss": 0.3583, "step": 13439 }, { "epoch": 0.8063838723225536, "grad_norm": 1.331648826599121, "learning_rate": 6.659638655883676e-07, "loss": 0.4129, "step": 13440 }, { "epoch": 0.8064438711225775, "grad_norm": 1.4505788087844849, "learning_rate": 6.655647904587508e-07, "loss": 0.3443, "step": 13441 }, { "epoch": 0.8065038699226016, "grad_norm": 1.3844040632247925, "learning_rate": 6.651658223728374e-07, "loss": 0.4017, "step": 13442 }, { "epoch": 0.8065638687226255, "grad_norm": 1.404754877090454, "learning_rate": 6.647669613456952e-07, "loss": 0.3901, "step": 13443 }, { "epoch": 0.8066238675226496, "grad_norm": 1.2770220041275024, "learning_rate": 6.643682073923895e-07, "loss": 0.3588, "step": 13444 }, { "epoch": 0.8066838663226735, "grad_norm": 1.2869336605072021, "learning_rate": 6.639695605279759e-07, "loss": 0.3592, "step": 13445 }, { "epoch": 0.8067438651226976, "grad_norm": 1.34116792678833, "learning_rate": 6.635710207675117e-07, "loss": 0.3845, "step": 13446 }, { "epoch": 0.8068038639227215, "grad_norm": 1.407518744468689, "learning_rate": 6.631725881260465e-07, "loss": 0.4114, "step": 13447 }, { "epoch": 0.8068638627227456, "grad_norm": 1.332484483718872, "learning_rate": 6.627742626186289e-07, "loss": 0.3886, "step": 13448 }, { "epoch": 0.8069238615227695, "grad_norm": 1.3004251718521118, "learning_rate": 6.62376044260301e-07, "loss": 0.3949, "step": 13449 }, { "epoch": 0.8069838603227936, "grad_norm": 1.3408522605895996, "learning_rate": 6.619779330661008e-07, "loss": 0.3752, "step": 13450 }, { "epoch": 0.8070438591228175, "grad_norm": 1.4078731536865234, "learning_rate": 6.615799290510653e-07, "loss": 0.3432, "step": 13451 }, { "epoch": 0.8071038579228416, "grad_norm": 1.1898239850997925, "learning_rate": 6.611820322302236e-07, "loss": 0.3054, "step": 13452 }, { "epoch": 0.8071638567228655, "grad_norm": 1.182539939880371, "learning_rate": 6.607842426186034e-07, "loss": 0.3479, "step": 13453 }, { "epoch": 0.8072238555228896, "grad_norm": 1.4985010623931885, "learning_rate": 6.603865602312256e-07, "loss": 0.3458, "step": 13454 }, { "epoch": 0.8072838543229135, "grad_norm": 1.3808510303497314, "learning_rate": 6.599889850831113e-07, "loss": 0.3804, "step": 13455 }, { "epoch": 0.8073438531229375, "grad_norm": 1.3818397521972656, "learning_rate": 6.595915171892741e-07, "loss": 0.367, "step": 13456 }, { "epoch": 0.8074038519229615, "grad_norm": 1.298328161239624, "learning_rate": 6.591941565647232e-07, "loss": 0.3964, "step": 13457 }, { "epoch": 0.8074638507229855, "grad_norm": 1.4871711730957031, "learning_rate": 6.587969032244678e-07, "loss": 0.4371, "step": 13458 }, { "epoch": 0.8075238495230095, "grad_norm": 1.210470199584961, "learning_rate": 6.583997571835081e-07, "loss": 0.3233, "step": 13459 }, { "epoch": 0.8075838483230335, "grad_norm": 1.292777419090271, "learning_rate": 6.58002718456843e-07, "loss": 0.3382, "step": 13460 }, { "epoch": 0.8076438471230576, "grad_norm": 1.3920824527740479, "learning_rate": 6.576057870594676e-07, "loss": 0.404, "step": 13461 }, { "epoch": 0.8077038459230815, "grad_norm": 1.1831779479980469, "learning_rate": 6.572089630063709e-07, "loss": 0.3404, "step": 13462 }, { "epoch": 0.8077638447231056, "grad_norm": 1.2650986909866333, "learning_rate": 6.56812246312542e-07, "loss": 0.3649, "step": 13463 }, { "epoch": 0.8078238435231295, "grad_norm": 1.4012194871902466, "learning_rate": 6.56415636992959e-07, "loss": 0.3596, "step": 13464 }, { "epoch": 0.8078838423231536, "grad_norm": 1.4019287824630737, "learning_rate": 6.560191350626032e-07, "loss": 0.3788, "step": 13465 }, { "epoch": 0.8079438411231775, "grad_norm": 1.390554666519165, "learning_rate": 6.556227405364462e-07, "loss": 0.3864, "step": 13466 }, { "epoch": 0.8080038399232016, "grad_norm": 1.2513784170150757, "learning_rate": 6.552264534294603e-07, "loss": 0.4263, "step": 13467 }, { "epoch": 0.8080638387232255, "grad_norm": 1.1993894577026367, "learning_rate": 6.548302737566107e-07, "loss": 0.3511, "step": 13468 }, { "epoch": 0.8081238375232496, "grad_norm": 1.267019271850586, "learning_rate": 6.544342015328583e-07, "loss": 0.3734, "step": 13469 }, { "epoch": 0.8081838363232735, "grad_norm": 1.3431146144866943, "learning_rate": 6.540382367731623e-07, "loss": 0.3341, "step": 13470 }, { "epoch": 0.8082438351232976, "grad_norm": 1.3437557220458984, "learning_rate": 6.536423794924764e-07, "loss": 0.3726, "step": 13471 }, { "epoch": 0.8083038339233215, "grad_norm": 1.3574092388153076, "learning_rate": 6.532466297057486e-07, "loss": 0.3532, "step": 13472 }, { "epoch": 0.8083638327233456, "grad_norm": 1.288913369178772, "learning_rate": 6.528509874279269e-07, "loss": 0.3769, "step": 13473 }, { "epoch": 0.8084238315233695, "grad_norm": 1.3591866493225098, "learning_rate": 6.524554526739517e-07, "loss": 0.4085, "step": 13474 }, { "epoch": 0.8084838303233935, "grad_norm": 1.339978814125061, "learning_rate": 6.520600254587607e-07, "loss": 0.3653, "step": 13475 }, { "epoch": 0.8085438291234175, "grad_norm": 1.2651944160461426, "learning_rate": 6.516647057972862e-07, "loss": 0.3824, "step": 13476 }, { "epoch": 0.8086038279234415, "grad_norm": 1.2784137725830078, "learning_rate": 6.512694937044601e-07, "loss": 0.3827, "step": 13477 }, { "epoch": 0.8086638267234655, "grad_norm": 1.288727879524231, "learning_rate": 6.508743891952054e-07, "loss": 0.3851, "step": 13478 }, { "epoch": 0.8087238255234895, "grad_norm": 1.4582778215408325, "learning_rate": 6.504793922844453e-07, "loss": 0.3833, "step": 13479 }, { "epoch": 0.8087838243235135, "grad_norm": 1.19333016872406, "learning_rate": 6.500845029870961e-07, "loss": 0.3585, "step": 13480 }, { "epoch": 0.8088438231235375, "grad_norm": 1.4992798566818237, "learning_rate": 6.496897213180701e-07, "loss": 0.3554, "step": 13481 }, { "epoch": 0.8089038219235616, "grad_norm": 1.4366157054901123, "learning_rate": 6.492950472922785e-07, "loss": 0.4086, "step": 13482 }, { "epoch": 0.8089638207235855, "grad_norm": 1.2349249124526978, "learning_rate": 6.489004809246252e-07, "loss": 0.3801, "step": 13483 }, { "epoch": 0.8090238195236096, "grad_norm": 1.3727651834487915, "learning_rate": 6.485060222300097e-07, "loss": 0.369, "step": 13484 }, { "epoch": 0.8090838183236335, "grad_norm": 1.4312536716461182, "learning_rate": 6.481116712233319e-07, "loss": 0.3583, "step": 13485 }, { "epoch": 0.8091438171236576, "grad_norm": 1.2434042692184448, "learning_rate": 6.477174279194827e-07, "loss": 0.3483, "step": 13486 }, { "epoch": 0.8092038159236815, "grad_norm": 1.3525487184524536, "learning_rate": 6.473232923333518e-07, "loss": 0.4221, "step": 13487 }, { "epoch": 0.8092638147237056, "grad_norm": 1.2945983409881592, "learning_rate": 6.469292644798219e-07, "loss": 0.3709, "step": 13488 }, { "epoch": 0.8093238135237295, "grad_norm": 1.3438695669174194, "learning_rate": 6.465353443737753e-07, "loss": 0.4521, "step": 13489 }, { "epoch": 0.8093838123237536, "grad_norm": 1.4086933135986328, "learning_rate": 6.461415320300901e-07, "loss": 0.3327, "step": 13490 }, { "epoch": 0.8094438111237775, "grad_norm": 1.3157353401184082, "learning_rate": 6.457478274636355e-07, "loss": 0.3755, "step": 13491 }, { "epoch": 0.8095038099238016, "grad_norm": 1.4752206802368164, "learning_rate": 6.453542306892824e-07, "loss": 0.4091, "step": 13492 }, { "epoch": 0.8095638087238255, "grad_norm": 1.309601902961731, "learning_rate": 6.449607417218935e-07, "loss": 0.3493, "step": 13493 }, { "epoch": 0.8096238075238495, "grad_norm": 1.3560729026794434, "learning_rate": 6.445673605763312e-07, "loss": 0.4317, "step": 13494 }, { "epoch": 0.8096838063238735, "grad_norm": 1.3928369283676147, "learning_rate": 6.44174087267449e-07, "loss": 0.3875, "step": 13495 }, { "epoch": 0.8097438051238975, "grad_norm": 1.4039990901947021, "learning_rate": 6.437809218101004e-07, "loss": 0.3622, "step": 13496 }, { "epoch": 0.8098038039239215, "grad_norm": 1.3001329898834229, "learning_rate": 6.433878642191342e-07, "loss": 0.383, "step": 13497 }, { "epoch": 0.8098638027239455, "grad_norm": 1.2968919277191162, "learning_rate": 6.429949145093935e-07, "loss": 0.3336, "step": 13498 }, { "epoch": 0.8099238015239695, "grad_norm": 1.3967735767364502, "learning_rate": 6.426020726957188e-07, "loss": 0.3777, "step": 13499 }, { "epoch": 0.8099838003239935, "grad_norm": 1.5080344676971436, "learning_rate": 6.422093387929445e-07, "loss": 0.4243, "step": 13500 }, { "epoch": 0.8100437991240175, "grad_norm": 1.3715444803237915, "learning_rate": 6.418167128159043e-07, "loss": 0.3613, "step": 13501 }, { "epoch": 0.8101037979240415, "grad_norm": 1.318644404411316, "learning_rate": 6.414241947794249e-07, "loss": 0.3714, "step": 13502 }, { "epoch": 0.8101637967240655, "grad_norm": 1.3399487733840942, "learning_rate": 6.410317846983292e-07, "loss": 0.3761, "step": 13503 }, { "epoch": 0.8102237955240895, "grad_norm": 1.4704737663269043, "learning_rate": 6.406394825874385e-07, "loss": 0.4141, "step": 13504 }, { "epoch": 0.8102837943241136, "grad_norm": 1.4712355136871338, "learning_rate": 6.402472884615674e-07, "loss": 0.387, "step": 13505 }, { "epoch": 0.8103437931241375, "grad_norm": 1.4140375852584839, "learning_rate": 6.39855202335527e-07, "loss": 0.3744, "step": 13506 }, { "epoch": 0.8104037919241616, "grad_norm": 1.2758702039718628, "learning_rate": 6.394632242241239e-07, "loss": 0.3658, "step": 13507 }, { "epoch": 0.8104637907241855, "grad_norm": 1.2942545413970947, "learning_rate": 6.390713541421623e-07, "loss": 0.4, "step": 13508 }, { "epoch": 0.8105237895242096, "grad_norm": 1.2386566400527954, "learning_rate": 6.386795921044433e-07, "loss": 0.3835, "step": 13509 }, { "epoch": 0.8105837883242335, "grad_norm": 1.3271714448928833, "learning_rate": 6.382879381257582e-07, "loss": 0.3873, "step": 13510 }, { "epoch": 0.8106437871242576, "grad_norm": 1.5245375633239746, "learning_rate": 6.378963922209001e-07, "loss": 0.4119, "step": 13511 }, { "epoch": 0.8107037859242815, "grad_norm": 1.2623125314712524, "learning_rate": 6.375049544046553e-07, "loss": 0.3674, "step": 13512 }, { "epoch": 0.8107637847243055, "grad_norm": 1.3310418128967285, "learning_rate": 6.371136246918075e-07, "loss": 0.3827, "step": 13513 }, { "epoch": 0.8108237835243295, "grad_norm": 1.3396072387695312, "learning_rate": 6.367224030971348e-07, "loss": 0.3521, "step": 13514 }, { "epoch": 0.8108837823243535, "grad_norm": 1.3389655351638794, "learning_rate": 6.363312896354109e-07, "loss": 0.3521, "step": 13515 }, { "epoch": 0.8109437811243775, "grad_norm": 1.4298720359802246, "learning_rate": 6.359402843214084e-07, "loss": 0.4226, "step": 13516 }, { "epoch": 0.8110037799244015, "grad_norm": 1.2797045707702637, "learning_rate": 6.355493871698929e-07, "loss": 0.3147, "step": 13517 }, { "epoch": 0.8110637787244255, "grad_norm": 1.2027602195739746, "learning_rate": 6.351585981956264e-07, "loss": 0.392, "step": 13518 }, { "epoch": 0.8111237775244495, "grad_norm": 1.3714168071746826, "learning_rate": 6.347679174133664e-07, "loss": 0.3765, "step": 13519 }, { "epoch": 0.8111837763244735, "grad_norm": 1.3738727569580078, "learning_rate": 6.343773448378691e-07, "loss": 0.3627, "step": 13520 }, { "epoch": 0.8112437751244975, "grad_norm": 1.3362033367156982, "learning_rate": 6.339868804838841e-07, "loss": 0.3396, "step": 13521 }, { "epoch": 0.8113037739245215, "grad_norm": 1.2675950527191162, "learning_rate": 6.335965243661562e-07, "loss": 0.356, "step": 13522 }, { "epoch": 0.8113637727245455, "grad_norm": 1.3525655269622803, "learning_rate": 6.33206276499429e-07, "loss": 0.3717, "step": 13523 }, { "epoch": 0.8114237715245695, "grad_norm": 1.2513784170150757, "learning_rate": 6.328161368984386e-07, "loss": 0.4024, "step": 13524 }, { "epoch": 0.8114837703245935, "grad_norm": 1.430576205253601, "learning_rate": 6.324261055779207e-07, "loss": 0.4109, "step": 13525 }, { "epoch": 0.8115437691246175, "grad_norm": 1.4417150020599365, "learning_rate": 6.320361825526047e-07, "loss": 0.4164, "step": 13526 }, { "epoch": 0.8116037679246415, "grad_norm": 1.3175506591796875, "learning_rate": 6.316463678372143e-07, "loss": 0.3836, "step": 13527 }, { "epoch": 0.8116637667246656, "grad_norm": 1.2585673332214355, "learning_rate": 6.312566614464733e-07, "loss": 0.3353, "step": 13528 }, { "epoch": 0.8117237655246895, "grad_norm": 1.2560229301452637, "learning_rate": 6.308670633950985e-07, "loss": 0.3788, "step": 13529 }, { "epoch": 0.8117837643247136, "grad_norm": 1.3929033279418945, "learning_rate": 6.304775736978018e-07, "loss": 0.3671, "step": 13530 }, { "epoch": 0.8118437631247375, "grad_norm": 1.3212684392929077, "learning_rate": 6.300881923692949e-07, "loss": 0.3423, "step": 13531 }, { "epoch": 0.8119037619247615, "grad_norm": 1.3068790435791016, "learning_rate": 6.296989194242813e-07, "loss": 0.375, "step": 13532 }, { "epoch": 0.8119637607247855, "grad_norm": 1.2983022928237915, "learning_rate": 6.293097548774627e-07, "loss": 0.3628, "step": 13533 }, { "epoch": 0.8120237595248095, "grad_norm": 1.2489893436431885, "learning_rate": 6.28920698743535e-07, "loss": 0.3174, "step": 13534 }, { "epoch": 0.8120837583248335, "grad_norm": 1.3013116121292114, "learning_rate": 6.285317510371927e-07, "loss": 0.3698, "step": 13535 }, { "epoch": 0.8121437571248575, "grad_norm": 1.3109652996063232, "learning_rate": 6.281429117731236e-07, "loss": 0.3793, "step": 13536 }, { "epoch": 0.8122037559248815, "grad_norm": 1.4985219240188599, "learning_rate": 6.277541809660118e-07, "loss": 0.3865, "step": 13537 }, { "epoch": 0.8122637547249055, "grad_norm": 1.346453070640564, "learning_rate": 6.273655586305394e-07, "loss": 0.3638, "step": 13538 }, { "epoch": 0.8123237535249295, "grad_norm": 1.2155168056488037, "learning_rate": 6.269770447813816e-07, "loss": 0.3509, "step": 13539 }, { "epoch": 0.8123837523249535, "grad_norm": 1.3359870910644531, "learning_rate": 6.265886394332128e-07, "loss": 0.3995, "step": 13540 }, { "epoch": 0.8124437511249775, "grad_norm": 1.2224534749984741, "learning_rate": 6.262003426006983e-07, "loss": 0.3959, "step": 13541 }, { "epoch": 0.8125037499250015, "grad_norm": 1.244869351387024, "learning_rate": 6.258121542985038e-07, "loss": 0.3373, "step": 13542 }, { "epoch": 0.8125637487250255, "grad_norm": 1.3925143480300903, "learning_rate": 6.254240745412903e-07, "loss": 0.3737, "step": 13543 }, { "epoch": 0.8126237475250495, "grad_norm": 1.304790735244751, "learning_rate": 6.250361033437132e-07, "loss": 0.3924, "step": 13544 }, { "epoch": 0.8126837463250735, "grad_norm": 1.2857911586761475, "learning_rate": 6.24648240720424e-07, "loss": 0.3794, "step": 13545 }, { "epoch": 0.8127437451250975, "grad_norm": 1.374711513519287, "learning_rate": 6.242604866860699e-07, "loss": 0.3923, "step": 13546 }, { "epoch": 0.8128037439251214, "grad_norm": 1.3357949256896973, "learning_rate": 6.23872841255296e-07, "loss": 0.3743, "step": 13547 }, { "epoch": 0.8128637427251455, "grad_norm": 1.224523901939392, "learning_rate": 6.234853044427416e-07, "loss": 0.4017, "step": 13548 }, { "epoch": 0.8129237415251696, "grad_norm": 1.4380366802215576, "learning_rate": 6.230978762630408e-07, "loss": 0.4198, "step": 13549 }, { "epoch": 0.8129837403251935, "grad_norm": 1.2073885202407837, "learning_rate": 6.22710556730827e-07, "loss": 0.3713, "step": 13550 }, { "epoch": 0.8130437391252175, "grad_norm": 1.339468240737915, "learning_rate": 6.223233458607267e-07, "loss": 0.3814, "step": 13551 }, { "epoch": 0.8131037379252415, "grad_norm": 1.3643105030059814, "learning_rate": 6.21936243667363e-07, "loss": 0.3762, "step": 13552 }, { "epoch": 0.8131637367252655, "grad_norm": 1.354014277458191, "learning_rate": 6.215492501653538e-07, "loss": 0.3863, "step": 13553 }, { "epoch": 0.8132237355252895, "grad_norm": 1.4820232391357422, "learning_rate": 6.211623653693151e-07, "loss": 0.4232, "step": 13554 }, { "epoch": 0.8132837343253135, "grad_norm": 1.3081061840057373, "learning_rate": 6.207755892938602e-07, "loss": 0.3444, "step": 13555 }, { "epoch": 0.8133437331253375, "grad_norm": 1.3250048160552979, "learning_rate": 6.203889219535915e-07, "loss": 0.4271, "step": 13556 }, { "epoch": 0.8134037319253615, "grad_norm": 1.2820159196853638, "learning_rate": 6.200023633631146e-07, "loss": 0.3431, "step": 13557 }, { "epoch": 0.8134637307253855, "grad_norm": 1.3035000562667847, "learning_rate": 6.196159135370265e-07, "loss": 0.3724, "step": 13558 }, { "epoch": 0.8135237295254095, "grad_norm": 1.478920817375183, "learning_rate": 6.19229572489923e-07, "loss": 0.4477, "step": 13559 }, { "epoch": 0.8135837283254335, "grad_norm": 1.3970249891281128, "learning_rate": 6.188433402363938e-07, "loss": 0.3694, "step": 13560 }, { "epoch": 0.8136437271254575, "grad_norm": 1.2830747365951538, "learning_rate": 6.184572167910244e-07, "loss": 0.3518, "step": 13561 }, { "epoch": 0.8137037259254815, "grad_norm": 1.3017995357513428, "learning_rate": 6.180712021683984e-07, "loss": 0.3668, "step": 13562 }, { "epoch": 0.8137637247255055, "grad_norm": 1.2502888441085815, "learning_rate": 6.17685296383093e-07, "loss": 0.3665, "step": 13563 }, { "epoch": 0.8138237235255295, "grad_norm": 1.4098421335220337, "learning_rate": 6.172994994496826e-07, "loss": 0.3695, "step": 13564 }, { "epoch": 0.8138837223255535, "grad_norm": 1.3357815742492676, "learning_rate": 6.169138113827354e-07, "loss": 0.3955, "step": 13565 }, { "epoch": 0.8139437211255774, "grad_norm": 1.3352197408676147, "learning_rate": 6.16528232196819e-07, "loss": 0.4132, "step": 13566 }, { "epoch": 0.8140037199256015, "grad_norm": 1.2970753908157349, "learning_rate": 6.161427619064942e-07, "loss": 0.3872, "step": 13567 }, { "epoch": 0.8140637187256254, "grad_norm": 1.5940710306167603, "learning_rate": 6.157574005263177e-07, "loss": 0.3359, "step": 13568 }, { "epoch": 0.8141237175256495, "grad_norm": 1.2969427108764648, "learning_rate": 6.153721480708443e-07, "loss": 0.3271, "step": 13569 }, { "epoch": 0.8141837163256734, "grad_norm": 1.4169262647628784, "learning_rate": 6.149870045546219e-07, "loss": 0.4156, "step": 13570 }, { "epoch": 0.8142437151256975, "grad_norm": 1.4339380264282227, "learning_rate": 6.14601969992197e-07, "loss": 0.3813, "step": 13571 }, { "epoch": 0.8143037139257215, "grad_norm": 1.200927972793579, "learning_rate": 6.142170443981101e-07, "loss": 0.3735, "step": 13572 }, { "epoch": 0.8143637127257455, "grad_norm": 1.3949726819992065, "learning_rate": 6.138322277868967e-07, "loss": 0.4278, "step": 13573 }, { "epoch": 0.8144237115257695, "grad_norm": 1.2990235090255737, "learning_rate": 6.134475201730916e-07, "loss": 0.3546, "step": 13574 }, { "epoch": 0.8144837103257935, "grad_norm": 1.2917561531066895, "learning_rate": 6.130629215712227e-07, "loss": 0.3826, "step": 13575 }, { "epoch": 0.8145437091258175, "grad_norm": 1.2980707883834839, "learning_rate": 6.126784319958142e-07, "loss": 0.3829, "step": 13576 }, { "epoch": 0.8146037079258415, "grad_norm": 1.3367993831634521, "learning_rate": 6.122940514613859e-07, "loss": 0.3734, "step": 13577 }, { "epoch": 0.8146637067258655, "grad_norm": 1.2106695175170898, "learning_rate": 6.119097799824561e-07, "loss": 0.3959, "step": 13578 }, { "epoch": 0.8147237055258895, "grad_norm": 1.246248722076416, "learning_rate": 6.115256175735353e-07, "loss": 0.3517, "step": 13579 }, { "epoch": 0.8147837043259135, "grad_norm": 1.2556750774383545, "learning_rate": 6.111415642491315e-07, "loss": 0.3734, "step": 13580 }, { "epoch": 0.8148437031259375, "grad_norm": 1.3454667329788208, "learning_rate": 6.107576200237501e-07, "loss": 0.3961, "step": 13581 }, { "epoch": 0.8149037019259615, "grad_norm": 1.32659113407135, "learning_rate": 6.103737849118899e-07, "loss": 0.3684, "step": 13582 }, { "epoch": 0.8149637007259855, "grad_norm": 1.2840371131896973, "learning_rate": 6.099900589280457e-07, "loss": 0.3242, "step": 13583 }, { "epoch": 0.8150236995260095, "grad_norm": 1.405009150505066, "learning_rate": 6.096064420867111e-07, "loss": 0.3356, "step": 13584 }, { "epoch": 0.8150836983260334, "grad_norm": 1.4337711334228516, "learning_rate": 6.09222934402372e-07, "loss": 0.3948, "step": 13585 }, { "epoch": 0.8151436971260575, "grad_norm": 1.343279480934143, "learning_rate": 6.088395358895139e-07, "loss": 0.4085, "step": 13586 }, { "epoch": 0.8152036959260814, "grad_norm": 1.3726228475570679, "learning_rate": 6.084562465626128e-07, "loss": 0.3806, "step": 13587 }, { "epoch": 0.8152636947261055, "grad_norm": 1.2767390012741089, "learning_rate": 6.080730664361462e-07, "loss": 0.3649, "step": 13588 }, { "epoch": 0.8153236935261294, "grad_norm": 1.1957193613052368, "learning_rate": 6.076899955245832e-07, "loss": 0.3031, "step": 13589 }, { "epoch": 0.8153836923261535, "grad_norm": 1.2346367835998535, "learning_rate": 6.07307033842393e-07, "loss": 0.3635, "step": 13590 }, { "epoch": 0.8154436911261774, "grad_norm": 1.3911336660385132, "learning_rate": 6.069241814040367e-07, "loss": 0.4078, "step": 13591 }, { "epoch": 0.8155036899262015, "grad_norm": 1.4206246137619019, "learning_rate": 6.065414382239728e-07, "loss": 0.3885, "step": 13592 }, { "epoch": 0.8155636887262254, "grad_norm": 1.3477952480316162, "learning_rate": 6.061588043166566e-07, "loss": 0.41, "step": 13593 }, { "epoch": 0.8156236875262495, "grad_norm": 1.3747165203094482, "learning_rate": 6.057762796965385e-07, "loss": 0.3378, "step": 13594 }, { "epoch": 0.8156836863262735, "grad_norm": 1.4581925868988037, "learning_rate": 6.053938643780628e-07, "loss": 0.3741, "step": 13595 }, { "epoch": 0.8157436851262975, "grad_norm": 1.2570451498031616, "learning_rate": 6.050115583756745e-07, "loss": 0.3641, "step": 13596 }, { "epoch": 0.8158036839263215, "grad_norm": 1.3095648288726807, "learning_rate": 6.046293617038097e-07, "loss": 0.4237, "step": 13597 }, { "epoch": 0.8158636827263455, "grad_norm": 1.4124516248703003, "learning_rate": 6.042472743769029e-07, "loss": 0.3896, "step": 13598 }, { "epoch": 0.8159236815263695, "grad_norm": 1.382400631904602, "learning_rate": 6.038652964093828e-07, "loss": 0.3738, "step": 13599 }, { "epoch": 0.8159836803263935, "grad_norm": 1.3953241109848022, "learning_rate": 6.03483427815676e-07, "loss": 0.3874, "step": 13600 }, { "epoch": 0.8160436791264175, "grad_norm": 1.310125470161438, "learning_rate": 6.031016686102043e-07, "loss": 0.3686, "step": 13601 }, { "epoch": 0.8161036779264415, "grad_norm": 1.4365850687026978, "learning_rate": 6.027200188073829e-07, "loss": 0.4012, "step": 13602 }, { "epoch": 0.8161636767264655, "grad_norm": 1.3956247568130493, "learning_rate": 6.023384784216278e-07, "loss": 0.3464, "step": 13603 }, { "epoch": 0.8162236755264894, "grad_norm": 1.454594373703003, "learning_rate": 6.019570474673456e-07, "loss": 0.3894, "step": 13604 }, { "epoch": 0.8162836743265135, "grad_norm": 1.4325695037841797, "learning_rate": 6.015757259589429e-07, "loss": 0.3787, "step": 13605 }, { "epoch": 0.8163436731265374, "grad_norm": 1.2631525993347168, "learning_rate": 6.0119451391082e-07, "loss": 0.3474, "step": 13606 }, { "epoch": 0.8164036719265615, "grad_norm": 1.2710351943969727, "learning_rate": 6.008134113373725e-07, "loss": 0.3548, "step": 13607 }, { "epoch": 0.8164636707265854, "grad_norm": 1.442588210105896, "learning_rate": 6.004324182529945e-07, "loss": 0.3491, "step": 13608 }, { "epoch": 0.8165236695266095, "grad_norm": 1.3793537616729736, "learning_rate": 6.00051534672074e-07, "loss": 0.3289, "step": 13609 }, { "epoch": 0.8165836683266334, "grad_norm": 1.3100155591964722, "learning_rate": 5.996707606089949e-07, "loss": 0.3486, "step": 13610 }, { "epoch": 0.8166436671266575, "grad_norm": 1.1983195543289185, "learning_rate": 5.992900960781361e-07, "loss": 0.3148, "step": 13611 }, { "epoch": 0.8167036659266814, "grad_norm": 1.4348305463790894, "learning_rate": 5.989095410938758e-07, "loss": 0.4123, "step": 13612 }, { "epoch": 0.8167636647267055, "grad_norm": 1.3864043951034546, "learning_rate": 5.985290956705849e-07, "loss": 0.3913, "step": 13613 }, { "epoch": 0.8168236635267294, "grad_norm": 1.3191182613372803, "learning_rate": 5.981487598226301e-07, "loss": 0.3655, "step": 13614 }, { "epoch": 0.8168836623267535, "grad_norm": 1.480812668800354, "learning_rate": 5.977685335643765e-07, "loss": 0.383, "step": 13615 }, { "epoch": 0.8169436611267774, "grad_norm": 1.4882681369781494, "learning_rate": 5.973884169101823e-07, "loss": 0.3935, "step": 13616 }, { "epoch": 0.8170036599268015, "grad_norm": 1.4054356813430786, "learning_rate": 5.970084098744049e-07, "loss": 0.3616, "step": 13617 }, { "epoch": 0.8170636587268255, "grad_norm": 1.2712280750274658, "learning_rate": 5.96628512471392e-07, "loss": 0.3782, "step": 13618 }, { "epoch": 0.8171236575268495, "grad_norm": 1.3679932355880737, "learning_rate": 5.962487247154926e-07, "loss": 0.354, "step": 13619 }, { "epoch": 0.8171836563268735, "grad_norm": 1.4401870965957642, "learning_rate": 5.958690466210504e-07, "loss": 0.3296, "step": 13620 }, { "epoch": 0.8172436551268975, "grad_norm": 1.326890230178833, "learning_rate": 5.954894782024027e-07, "loss": 0.3946, "step": 13621 }, { "epoch": 0.8173036539269215, "grad_norm": 1.271492600440979, "learning_rate": 5.951100194738848e-07, "loss": 0.3805, "step": 13622 }, { "epoch": 0.8173636527269454, "grad_norm": 1.2953280210494995, "learning_rate": 5.947306704498261e-07, "loss": 0.3751, "step": 13623 }, { "epoch": 0.8174236515269695, "grad_norm": 1.4342268705368042, "learning_rate": 5.943514311445543e-07, "loss": 0.3822, "step": 13624 }, { "epoch": 0.8174836503269934, "grad_norm": 1.3956046104431152, "learning_rate": 5.939723015723906e-07, "loss": 0.3307, "step": 13625 }, { "epoch": 0.8175436491270175, "grad_norm": 1.2110569477081299, "learning_rate": 5.935932817476527e-07, "loss": 0.3723, "step": 13626 }, { "epoch": 0.8176036479270414, "grad_norm": 1.4868128299713135, "learning_rate": 5.932143716846557e-07, "loss": 0.357, "step": 13627 }, { "epoch": 0.8176636467270655, "grad_norm": 1.408266305923462, "learning_rate": 5.928355713977088e-07, "loss": 0.4004, "step": 13628 }, { "epoch": 0.8177236455270894, "grad_norm": 1.3468612432479858, "learning_rate": 5.924568809011173e-07, "loss": 0.3639, "step": 13629 }, { "epoch": 0.8177836443271135, "grad_norm": 1.257384181022644, "learning_rate": 5.920783002091816e-07, "loss": 0.3253, "step": 13630 }, { "epoch": 0.8178436431271374, "grad_norm": 1.3111755847930908, "learning_rate": 5.916998293362001e-07, "loss": 0.3423, "step": 13631 }, { "epoch": 0.8179036419271615, "grad_norm": 1.384236454963684, "learning_rate": 5.913214682964677e-07, "loss": 0.4002, "step": 13632 }, { "epoch": 0.8179636407271854, "grad_norm": 1.4359335899353027, "learning_rate": 5.909432171042699e-07, "loss": 0.3877, "step": 13633 }, { "epoch": 0.8180236395272095, "grad_norm": 1.426973581314087, "learning_rate": 5.905650757738937e-07, "loss": 0.4132, "step": 13634 }, { "epoch": 0.8180836383272334, "grad_norm": 1.3675156831741333, "learning_rate": 5.901870443196186e-07, "loss": 0.3447, "step": 13635 }, { "epoch": 0.8181436371272575, "grad_norm": 1.3394200801849365, "learning_rate": 5.898091227557228e-07, "loss": 0.3568, "step": 13636 }, { "epoch": 0.8182036359272814, "grad_norm": 1.2040481567382812, "learning_rate": 5.894313110964774e-07, "loss": 0.337, "step": 13637 }, { "epoch": 0.8182636347273055, "grad_norm": 1.3172017335891724, "learning_rate": 5.890536093561499e-07, "loss": 0.3749, "step": 13638 }, { "epoch": 0.8183236335273295, "grad_norm": 1.2041773796081543, "learning_rate": 5.886760175490064e-07, "loss": 0.3762, "step": 13639 }, { "epoch": 0.8183836323273534, "grad_norm": 1.354537010192871, "learning_rate": 5.882985356893058e-07, "loss": 0.377, "step": 13640 }, { "epoch": 0.8184436311273775, "grad_norm": 1.320278525352478, "learning_rate": 5.879211637913025e-07, "loss": 0.3643, "step": 13641 }, { "epoch": 0.8185036299274014, "grad_norm": 1.3838526010513306, "learning_rate": 5.875439018692509e-07, "loss": 0.3927, "step": 13642 }, { "epoch": 0.8185636287274255, "grad_norm": 1.331766128540039, "learning_rate": 5.871667499373966e-07, "loss": 0.3793, "step": 13643 }, { "epoch": 0.8186236275274494, "grad_norm": 1.4266724586486816, "learning_rate": 5.867897080099836e-07, "loss": 0.4222, "step": 13644 }, { "epoch": 0.8186836263274735, "grad_norm": 1.3404370546340942, "learning_rate": 5.864127761012495e-07, "loss": 0.3867, "step": 13645 }, { "epoch": 0.8187436251274974, "grad_norm": 1.3724037408828735, "learning_rate": 5.86035954225432e-07, "loss": 0.393, "step": 13646 }, { "epoch": 0.8188036239275215, "grad_norm": 1.459476351737976, "learning_rate": 5.856592423967603e-07, "loss": 0.4081, "step": 13647 }, { "epoch": 0.8188636227275454, "grad_norm": 1.3863271474838257, "learning_rate": 5.852826406294604e-07, "loss": 0.3533, "step": 13648 }, { "epoch": 0.8189236215275695, "grad_norm": 1.2827173471450806, "learning_rate": 5.849061489377567e-07, "loss": 0.3291, "step": 13649 }, { "epoch": 0.8189836203275934, "grad_norm": 1.4408246278762817, "learning_rate": 5.845297673358657e-07, "loss": 0.3733, "step": 13650 }, { "epoch": 0.8190436191276175, "grad_norm": 1.3057252168655396, "learning_rate": 5.841534958380033e-07, "loss": 0.3731, "step": 13651 }, { "epoch": 0.8191036179276414, "grad_norm": 1.4370027780532837, "learning_rate": 5.837773344583788e-07, "loss": 0.4175, "step": 13652 }, { "epoch": 0.8191636167276655, "grad_norm": 1.2930974960327148, "learning_rate": 5.834012832111973e-07, "loss": 0.3361, "step": 13653 }, { "epoch": 0.8192236155276894, "grad_norm": 1.3897429704666138, "learning_rate": 5.830253421106623e-07, "loss": 0.3889, "step": 13654 }, { "epoch": 0.8192836143277135, "grad_norm": 1.433004379272461, "learning_rate": 5.826495111709703e-07, "loss": 0.3876, "step": 13655 }, { "epoch": 0.8193436131277374, "grad_norm": 1.1742476224899292, "learning_rate": 5.822737904063146e-07, "loss": 0.3538, "step": 13656 }, { "epoch": 0.8194036119277615, "grad_norm": 1.3761177062988281, "learning_rate": 5.818981798308845e-07, "loss": 0.4255, "step": 13657 }, { "epoch": 0.8194636107277854, "grad_norm": 1.4329739809036255, "learning_rate": 5.815226794588656e-07, "loss": 0.3448, "step": 13658 }, { "epoch": 0.8195236095278094, "grad_norm": 1.6624947786331177, "learning_rate": 5.811472893044386e-07, "loss": 0.4226, "step": 13659 }, { "epoch": 0.8195836083278334, "grad_norm": 1.3617210388183594, "learning_rate": 5.807720093817798e-07, "loss": 0.3571, "step": 13660 }, { "epoch": 0.8196436071278574, "grad_norm": 1.3316354751586914, "learning_rate": 5.803968397050628e-07, "loss": 0.4138, "step": 13661 }, { "epoch": 0.8197036059278815, "grad_norm": 1.3261197805404663, "learning_rate": 5.800217802884545e-07, "loss": 0.3906, "step": 13662 }, { "epoch": 0.8197636047279054, "grad_norm": 1.4017010927200317, "learning_rate": 5.796468311461219e-07, "loss": 0.3643, "step": 13663 }, { "epoch": 0.8198236035279295, "grad_norm": 1.4481505155563354, "learning_rate": 5.79271992292222e-07, "loss": 0.385, "step": 13664 }, { "epoch": 0.8198836023279534, "grad_norm": 1.3879493474960327, "learning_rate": 5.788972637409119e-07, "loss": 0.403, "step": 13665 }, { "epoch": 0.8199436011279775, "grad_norm": 1.273842453956604, "learning_rate": 5.785226455063446e-07, "loss": 0.3528, "step": 13666 }, { "epoch": 0.8200035999280014, "grad_norm": 1.320793867111206, "learning_rate": 5.781481376026666e-07, "loss": 0.3226, "step": 13667 }, { "epoch": 0.8200635987280255, "grad_norm": 1.3365042209625244, "learning_rate": 5.777737400440214e-07, "loss": 0.3938, "step": 13668 }, { "epoch": 0.8201235975280494, "grad_norm": 1.3270113468170166, "learning_rate": 5.773994528445477e-07, "loss": 0.3935, "step": 13669 }, { "epoch": 0.8201835963280735, "grad_norm": 1.2668287754058838, "learning_rate": 5.770252760183826e-07, "loss": 0.3669, "step": 13670 }, { "epoch": 0.8202435951280974, "grad_norm": 1.2193958759307861, "learning_rate": 5.766512095796553e-07, "loss": 0.3519, "step": 13671 }, { "epoch": 0.8203035939281215, "grad_norm": 1.406312108039856, "learning_rate": 5.762772535424923e-07, "loss": 0.3794, "step": 13672 }, { "epoch": 0.8203635927281454, "grad_norm": 1.162030577659607, "learning_rate": 5.75903407921018e-07, "loss": 0.3369, "step": 13673 }, { "epoch": 0.8204235915281695, "grad_norm": 1.3579412698745728, "learning_rate": 5.755296727293495e-07, "loss": 0.3589, "step": 13674 }, { "epoch": 0.8204835903281934, "grad_norm": 1.3962397575378418, "learning_rate": 5.751560479816017e-07, "loss": 0.4019, "step": 13675 }, { "epoch": 0.8205435891282175, "grad_norm": 1.3549959659576416, "learning_rate": 5.747825336918833e-07, "loss": 0.3884, "step": 13676 }, { "epoch": 0.8206035879282414, "grad_norm": 1.3486186265945435, "learning_rate": 5.744091298743016e-07, "loss": 0.3866, "step": 13677 }, { "epoch": 0.8206635867282654, "grad_norm": 1.286005973815918, "learning_rate": 5.7403583654296e-07, "loss": 0.3767, "step": 13678 }, { "epoch": 0.8207235855282894, "grad_norm": 1.4145196676254272, "learning_rate": 5.736626537119522e-07, "loss": 0.3707, "step": 13679 }, { "epoch": 0.8207835843283134, "grad_norm": 1.3635954856872559, "learning_rate": 5.732895813953743e-07, "loss": 0.3883, "step": 13680 }, { "epoch": 0.8208435831283374, "grad_norm": 1.3787099123001099, "learning_rate": 5.729166196073142e-07, "loss": 0.3746, "step": 13681 }, { "epoch": 0.8209035819283614, "grad_norm": 1.303706169128418, "learning_rate": 5.725437683618586e-07, "loss": 0.3518, "step": 13682 }, { "epoch": 0.8209635807283854, "grad_norm": 1.4307183027267456, "learning_rate": 5.721710276730873e-07, "loss": 0.3856, "step": 13683 }, { "epoch": 0.8210235795284094, "grad_norm": 1.3673391342163086, "learning_rate": 5.717983975550763e-07, "loss": 0.4203, "step": 13684 }, { "epoch": 0.8210835783284335, "grad_norm": 1.3630255460739136, "learning_rate": 5.714258780218995e-07, "loss": 0.3594, "step": 13685 }, { "epoch": 0.8211435771284574, "grad_norm": 1.3504104614257812, "learning_rate": 5.710534690876251e-07, "loss": 0.4118, "step": 13686 }, { "epoch": 0.8212035759284815, "grad_norm": 1.2135258913040161, "learning_rate": 5.70681170766317e-07, "loss": 0.3774, "step": 13687 }, { "epoch": 0.8212635747285054, "grad_norm": 1.5081862211227417, "learning_rate": 5.703089830720341e-07, "loss": 0.3658, "step": 13688 }, { "epoch": 0.8213235735285295, "grad_norm": 1.3975492715835571, "learning_rate": 5.69936906018834e-07, "loss": 0.3957, "step": 13689 }, { "epoch": 0.8213835723285534, "grad_norm": 1.5437895059585571, "learning_rate": 5.69564939620768e-07, "loss": 0.3466, "step": 13690 }, { "epoch": 0.8214435711285775, "grad_norm": 1.5370205640792847, "learning_rate": 5.691930838918818e-07, "loss": 0.3964, "step": 13691 }, { "epoch": 0.8215035699286014, "grad_norm": 1.2585277557373047, "learning_rate": 5.688213388462212e-07, "loss": 0.3369, "step": 13692 }, { "epoch": 0.8215635687286255, "grad_norm": 1.3861154317855835, "learning_rate": 5.684497044978243e-07, "loss": 0.338, "step": 13693 }, { "epoch": 0.8216235675286494, "grad_norm": 1.4440462589263916, "learning_rate": 5.680781808607252e-07, "loss": 0.3996, "step": 13694 }, { "epoch": 0.8216835663286735, "grad_norm": 1.2440769672393799, "learning_rate": 5.67706767948956e-07, "loss": 0.3529, "step": 13695 }, { "epoch": 0.8217435651286974, "grad_norm": 1.326135516166687, "learning_rate": 5.673354657765423e-07, "loss": 0.3312, "step": 13696 }, { "epoch": 0.8218035639287214, "grad_norm": 1.4700080156326294, "learning_rate": 5.669642743575072e-07, "loss": 0.4264, "step": 13697 }, { "epoch": 0.8218635627287454, "grad_norm": 1.320534348487854, "learning_rate": 5.665931937058686e-07, "loss": 0.3859, "step": 13698 }, { "epoch": 0.8219235615287694, "grad_norm": 1.4915035963058472, "learning_rate": 5.662222238356408e-07, "loss": 0.3418, "step": 13699 }, { "epoch": 0.8219835603287934, "grad_norm": 1.1838058233261108, "learning_rate": 5.658513647608324e-07, "loss": 0.3432, "step": 13700 }, { "epoch": 0.8220435591288174, "grad_norm": 1.349193811416626, "learning_rate": 5.654806164954508e-07, "loss": 0.3415, "step": 13701 }, { "epoch": 0.8221035579288414, "grad_norm": 1.2707439661026, "learning_rate": 5.651099790534967e-07, "loss": 0.3626, "step": 13702 }, { "epoch": 0.8221635567288654, "grad_norm": 1.3904225826263428, "learning_rate": 5.647394524489664e-07, "loss": 0.4009, "step": 13703 }, { "epoch": 0.8222235555288894, "grad_norm": 1.2255419492721558, "learning_rate": 5.643690366958551e-07, "loss": 0.395, "step": 13704 }, { "epoch": 0.8222835543289134, "grad_norm": 1.2876471281051636, "learning_rate": 5.639987318081504e-07, "loss": 0.3547, "step": 13705 }, { "epoch": 0.8223435531289375, "grad_norm": 1.2669692039489746, "learning_rate": 5.636285377998365e-07, "loss": 0.4063, "step": 13706 }, { "epoch": 0.8224035519289614, "grad_norm": 1.3510574102401733, "learning_rate": 5.63258454684895e-07, "loss": 0.339, "step": 13707 }, { "epoch": 0.8224635507289855, "grad_norm": 1.3965054750442505, "learning_rate": 5.628884824773016e-07, "loss": 0.4167, "step": 13708 }, { "epoch": 0.8225235495290094, "grad_norm": 1.1555525064468384, "learning_rate": 5.625186211910305e-07, "loss": 0.3316, "step": 13709 }, { "epoch": 0.8225835483290335, "grad_norm": 1.2943083047866821, "learning_rate": 5.621488708400457e-07, "loss": 0.2888, "step": 13710 }, { "epoch": 0.8226435471290574, "grad_norm": 1.3851386308670044, "learning_rate": 5.617792314383148e-07, "loss": 0.4134, "step": 13711 }, { "epoch": 0.8227035459290815, "grad_norm": 1.3513883352279663, "learning_rate": 5.614097029997946e-07, "loss": 0.3754, "step": 13712 }, { "epoch": 0.8227635447291054, "grad_norm": 1.3115562200546265, "learning_rate": 5.610402855384424e-07, "loss": 0.3806, "step": 13713 }, { "epoch": 0.8228235435291295, "grad_norm": 1.3211907148361206, "learning_rate": 5.60670979068209e-07, "loss": 0.3913, "step": 13714 }, { "epoch": 0.8228835423291534, "grad_norm": 1.3244370222091675, "learning_rate": 5.603017836030401e-07, "loss": 0.3551, "step": 13715 }, { "epoch": 0.8229435411291774, "grad_norm": 1.4646501541137695, "learning_rate": 5.599326991568806e-07, "loss": 0.3605, "step": 13716 }, { "epoch": 0.8230035399292014, "grad_norm": 1.3259910345077515, "learning_rate": 5.595637257436684e-07, "loss": 0.3621, "step": 13717 }, { "epoch": 0.8230635387292254, "grad_norm": 1.3179582357406616, "learning_rate": 5.591948633773362e-07, "loss": 0.3683, "step": 13718 }, { "epoch": 0.8231235375292494, "grad_norm": 1.3486876487731934, "learning_rate": 5.588261120718172e-07, "loss": 0.3633, "step": 13719 }, { "epoch": 0.8231835363292734, "grad_norm": 1.2773622274398804, "learning_rate": 5.584574718410358e-07, "loss": 0.3427, "step": 13720 }, { "epoch": 0.8232435351292974, "grad_norm": 1.3626042604446411, "learning_rate": 5.580889426989139e-07, "loss": 0.3872, "step": 13721 }, { "epoch": 0.8233035339293214, "grad_norm": 1.4191101789474487, "learning_rate": 5.577205246593683e-07, "loss": 0.3917, "step": 13722 }, { "epoch": 0.8233635327293454, "grad_norm": 1.444290041923523, "learning_rate": 5.57352217736314e-07, "loss": 0.3735, "step": 13723 }, { "epoch": 0.8234235315293694, "grad_norm": 1.4764535427093506, "learning_rate": 5.569840219436611e-07, "loss": 0.3873, "step": 13724 }, { "epoch": 0.8234835303293934, "grad_norm": 1.3438165187835693, "learning_rate": 5.566159372953118e-07, "loss": 0.3725, "step": 13725 }, { "epoch": 0.8235435291294174, "grad_norm": 1.23017156124115, "learning_rate": 5.562479638051695e-07, "loss": 0.335, "step": 13726 }, { "epoch": 0.8236035279294414, "grad_norm": 1.2283737659454346, "learning_rate": 5.55880101487129e-07, "loss": 0.3655, "step": 13727 }, { "epoch": 0.8236635267294654, "grad_norm": 1.4186508655548096, "learning_rate": 5.55512350355085e-07, "loss": 0.3345, "step": 13728 }, { "epoch": 0.8237235255294895, "grad_norm": 1.4733304977416992, "learning_rate": 5.551447104229233e-07, "loss": 0.3625, "step": 13729 }, { "epoch": 0.8237835243295134, "grad_norm": 1.410172939300537, "learning_rate": 5.547771817045287e-07, "loss": 0.4077, "step": 13730 }, { "epoch": 0.8238435231295375, "grad_norm": 1.2438685894012451, "learning_rate": 5.544097642137826e-07, "loss": 0.3358, "step": 13731 }, { "epoch": 0.8239035219295614, "grad_norm": 1.4758821725845337, "learning_rate": 5.540424579645598e-07, "loss": 0.4096, "step": 13732 }, { "epoch": 0.8239635207295855, "grad_norm": 1.3563307523727417, "learning_rate": 5.536752629707316e-07, "loss": 0.3619, "step": 13733 }, { "epoch": 0.8240235195296094, "grad_norm": 1.2094770669937134, "learning_rate": 5.533081792461643e-07, "loss": 0.3278, "step": 13734 }, { "epoch": 0.8240835183296334, "grad_norm": 1.3668583631515503, "learning_rate": 5.529412068047229e-07, "loss": 0.4021, "step": 13735 }, { "epoch": 0.8241435171296574, "grad_norm": 1.3169516324996948, "learning_rate": 5.525743456602654e-07, "loss": 0.395, "step": 13736 }, { "epoch": 0.8242035159296814, "grad_norm": 1.2445000410079956, "learning_rate": 5.522075958266451e-07, "loss": 0.3508, "step": 13737 }, { "epoch": 0.8242635147297054, "grad_norm": 1.2354767322540283, "learning_rate": 5.518409573177154e-07, "loss": 0.3296, "step": 13738 }, { "epoch": 0.8243235135297294, "grad_norm": 1.3809568881988525, "learning_rate": 5.514744301473203e-07, "loss": 0.3512, "step": 13739 }, { "epoch": 0.8243835123297534, "grad_norm": 1.3911317586898804, "learning_rate": 5.511080143293029e-07, "loss": 0.3852, "step": 13740 }, { "epoch": 0.8244435111297774, "grad_norm": 1.4828282594680786, "learning_rate": 5.507417098774997e-07, "loss": 0.3835, "step": 13741 }, { "epoch": 0.8245035099298014, "grad_norm": 1.463921308517456, "learning_rate": 5.503755168057455e-07, "loss": 0.3854, "step": 13742 }, { "epoch": 0.8245635087298254, "grad_norm": 1.3166775703430176, "learning_rate": 5.500094351278701e-07, "loss": 0.3658, "step": 13743 }, { "epoch": 0.8246235075298494, "grad_norm": 1.461736798286438, "learning_rate": 5.496434648576982e-07, "loss": 0.3675, "step": 13744 }, { "epoch": 0.8246835063298734, "grad_norm": 1.3261677026748657, "learning_rate": 5.492776060090513e-07, "loss": 0.3744, "step": 13745 }, { "epoch": 0.8247435051298974, "grad_norm": 1.3104645013809204, "learning_rate": 5.489118585957445e-07, "loss": 0.3677, "step": 13746 }, { "epoch": 0.8248035039299214, "grad_norm": 1.3145709037780762, "learning_rate": 5.48546222631593e-07, "loss": 0.4114, "step": 13747 }, { "epoch": 0.8248635027299454, "grad_norm": 1.3887653350830078, "learning_rate": 5.481806981304034e-07, "loss": 0.3678, "step": 13748 }, { "epoch": 0.8249235015299694, "grad_norm": 1.3262230157852173, "learning_rate": 5.478152851059795e-07, "loss": 0.3656, "step": 13749 }, { "epoch": 0.8249835003299933, "grad_norm": 1.3099623918533325, "learning_rate": 5.474499835721231e-07, "loss": 0.3767, "step": 13750 }, { "epoch": 0.8250434991300174, "grad_norm": 1.3666354417800903, "learning_rate": 5.470847935426291e-07, "loss": 0.4236, "step": 13751 }, { "epoch": 0.8251034979300415, "grad_norm": 1.272891879081726, "learning_rate": 5.467197150312894e-07, "loss": 0.3581, "step": 13752 }, { "epoch": 0.8251634967300654, "grad_norm": 1.3385604619979858, "learning_rate": 5.463547480518894e-07, "loss": 0.3618, "step": 13753 }, { "epoch": 0.8252234955300894, "grad_norm": 1.24595308303833, "learning_rate": 5.459898926182144e-07, "loss": 0.3302, "step": 13754 }, { "epoch": 0.8252834943301134, "grad_norm": 1.3341060876846313, "learning_rate": 5.456251487440438e-07, "loss": 0.3698, "step": 13755 }, { "epoch": 0.8253434931301374, "grad_norm": 1.4033399820327759, "learning_rate": 5.4526051644315e-07, "loss": 0.4243, "step": 13756 }, { "epoch": 0.8254034919301614, "grad_norm": 1.2495495080947876, "learning_rate": 5.448959957293055e-07, "loss": 0.3955, "step": 13757 }, { "epoch": 0.8254634907301854, "grad_norm": 1.2109681367874146, "learning_rate": 5.445315866162749e-07, "loss": 0.3106, "step": 13758 }, { "epoch": 0.8255234895302094, "grad_norm": 1.2754584550857544, "learning_rate": 5.44167289117822e-07, "loss": 0.3175, "step": 13759 }, { "epoch": 0.8255834883302334, "grad_norm": 1.5568599700927734, "learning_rate": 5.43803103247704e-07, "loss": 0.3793, "step": 13760 }, { "epoch": 0.8256434871302574, "grad_norm": 1.3916692733764648, "learning_rate": 5.434390290196736e-07, "loss": 0.3776, "step": 13761 }, { "epoch": 0.8257034859302814, "grad_norm": 1.2603553533554077, "learning_rate": 5.430750664474816e-07, "loss": 0.3893, "step": 13762 }, { "epoch": 0.8257634847303054, "grad_norm": 1.3993779420852661, "learning_rate": 5.427112155448729e-07, "loss": 0.3696, "step": 13763 }, { "epoch": 0.8258234835303294, "grad_norm": 1.2133104801177979, "learning_rate": 5.423474763255871e-07, "loss": 0.3214, "step": 13764 }, { "epoch": 0.8258834823303534, "grad_norm": 1.4252047538757324, "learning_rate": 5.419838488033631e-07, "loss": 0.427, "step": 13765 }, { "epoch": 0.8259434811303774, "grad_norm": 1.4056122303009033, "learning_rate": 5.416203329919324e-07, "loss": 0.3535, "step": 13766 }, { "epoch": 0.8260034799304014, "grad_norm": 1.4538301229476929, "learning_rate": 5.412569289050232e-07, "loss": 0.4443, "step": 13767 }, { "epoch": 0.8260634787304254, "grad_norm": 1.46762216091156, "learning_rate": 5.408936365563592e-07, "loss": 0.3572, "step": 13768 }, { "epoch": 0.8261234775304493, "grad_norm": 1.4615333080291748, "learning_rate": 5.405304559596617e-07, "loss": 0.4245, "step": 13769 }, { "epoch": 0.8261834763304734, "grad_norm": 1.328285574913025, "learning_rate": 5.401673871286456e-07, "loss": 0.397, "step": 13770 }, { "epoch": 0.8262434751304973, "grad_norm": 1.3805006742477417, "learning_rate": 5.398044300770214e-07, "loss": 0.3669, "step": 13771 }, { "epoch": 0.8263034739305214, "grad_norm": 1.2037075757980347, "learning_rate": 5.394415848184981e-07, "loss": 0.314, "step": 13772 }, { "epoch": 0.8263634727305453, "grad_norm": 1.2349520921707153, "learning_rate": 5.390788513667768e-07, "loss": 0.3573, "step": 13773 }, { "epoch": 0.8264234715305694, "grad_norm": 1.1976910829544067, "learning_rate": 5.387162297355593e-07, "loss": 0.3456, "step": 13774 }, { "epoch": 0.8264834703305934, "grad_norm": 1.379610300064087, "learning_rate": 5.383537199385365e-07, "loss": 0.3823, "step": 13775 }, { "epoch": 0.8265434691306174, "grad_norm": 1.3195955753326416, "learning_rate": 5.379913219894006e-07, "loss": 0.3913, "step": 13776 }, { "epoch": 0.8266034679306414, "grad_norm": 1.3182960748672485, "learning_rate": 5.376290359018383e-07, "loss": 0.4138, "step": 13777 }, { "epoch": 0.8266634667306654, "grad_norm": 1.3433034420013428, "learning_rate": 5.372668616895308e-07, "loss": 0.3254, "step": 13778 }, { "epoch": 0.8267234655306894, "grad_norm": 1.3456155061721802, "learning_rate": 5.369047993661559e-07, "loss": 0.3247, "step": 13779 }, { "epoch": 0.8267834643307134, "grad_norm": 1.4794367551803589, "learning_rate": 5.365428489453858e-07, "loss": 0.4227, "step": 13780 }, { "epoch": 0.8268434631307374, "grad_norm": 1.4439040422439575, "learning_rate": 5.361810104408919e-07, "loss": 0.3489, "step": 13781 }, { "epoch": 0.8269034619307614, "grad_norm": 1.186416745185852, "learning_rate": 5.35819283866338e-07, "loss": 0.3611, "step": 13782 }, { "epoch": 0.8269634607307854, "grad_norm": 1.4716432094573975, "learning_rate": 5.354576692353839e-07, "loss": 0.3982, "step": 13783 }, { "epoch": 0.8270234595308094, "grad_norm": 1.3476133346557617, "learning_rate": 5.350961665616885e-07, "loss": 0.3763, "step": 13784 }, { "epoch": 0.8270834583308334, "grad_norm": 1.2405226230621338, "learning_rate": 5.347347758589025e-07, "loss": 0.3921, "step": 13785 }, { "epoch": 0.8271434571308574, "grad_norm": 1.2912085056304932, "learning_rate": 5.343734971406744e-07, "loss": 0.349, "step": 13786 }, { "epoch": 0.8272034559308814, "grad_norm": 1.2886962890625, "learning_rate": 5.340123304206472e-07, "loss": 0.3648, "step": 13787 }, { "epoch": 0.8272634547309053, "grad_norm": 1.4136545658111572, "learning_rate": 5.336512757124609e-07, "loss": 0.3713, "step": 13788 }, { "epoch": 0.8273234535309294, "grad_norm": 1.3728086948394775, "learning_rate": 5.332903330297526e-07, "loss": 0.3769, "step": 13789 }, { "epoch": 0.8273834523309533, "grad_norm": 1.2971301078796387, "learning_rate": 5.329295023861518e-07, "loss": 0.4178, "step": 13790 }, { "epoch": 0.8274434511309774, "grad_norm": 1.3481152057647705, "learning_rate": 5.325687837952861e-07, "loss": 0.3978, "step": 13791 }, { "epoch": 0.8275034499310013, "grad_norm": 1.3485661745071411, "learning_rate": 5.322081772707766e-07, "loss": 0.3444, "step": 13792 }, { "epoch": 0.8275634487310254, "grad_norm": 1.4602398872375488, "learning_rate": 5.318476828262442e-07, "loss": 0.4132, "step": 13793 }, { "epoch": 0.8276234475310493, "grad_norm": 1.4413716793060303, "learning_rate": 5.314873004753015e-07, "loss": 0.3849, "step": 13794 }, { "epoch": 0.8276834463310734, "grad_norm": 1.375101923942566, "learning_rate": 5.311270302315586e-07, "loss": 0.3862, "step": 13795 }, { "epoch": 0.8277434451310974, "grad_norm": 1.2816102504730225, "learning_rate": 5.307668721086222e-07, "loss": 0.388, "step": 13796 }, { "epoch": 0.8278034439311214, "grad_norm": 1.4231555461883545, "learning_rate": 5.304068261200933e-07, "loss": 0.3962, "step": 13797 }, { "epoch": 0.8278634427311454, "grad_norm": 1.3425374031066895, "learning_rate": 5.30046892279569e-07, "loss": 0.3975, "step": 13798 }, { "epoch": 0.8279234415311694, "grad_norm": 1.1632769107818604, "learning_rate": 5.296870706006418e-07, "loss": 0.3538, "step": 13799 }, { "epoch": 0.8279834403311934, "grad_norm": 1.2969684600830078, "learning_rate": 5.293273610969012e-07, "loss": 0.4034, "step": 13800 }, { "epoch": 0.8280434391312174, "grad_norm": 1.2463583946228027, "learning_rate": 5.289677637819337e-07, "loss": 0.3464, "step": 13801 }, { "epoch": 0.8281034379312414, "grad_norm": 1.2377032041549683, "learning_rate": 5.286082786693158e-07, "loss": 0.3286, "step": 13802 }, { "epoch": 0.8281634367312654, "grad_norm": 1.4060590267181396, "learning_rate": 5.282489057726267e-07, "loss": 0.3198, "step": 13803 }, { "epoch": 0.8282234355312894, "grad_norm": 1.4233083724975586, "learning_rate": 5.278896451054362e-07, "loss": 0.3921, "step": 13804 }, { "epoch": 0.8282834343313134, "grad_norm": 1.358197808265686, "learning_rate": 5.275304966813139e-07, "loss": 0.3745, "step": 13805 }, { "epoch": 0.8283434331313374, "grad_norm": 1.481086254119873, "learning_rate": 5.271714605138218e-07, "loss": 0.3984, "step": 13806 }, { "epoch": 0.8284034319313613, "grad_norm": 1.3899602890014648, "learning_rate": 5.268125366165191e-07, "loss": 0.4305, "step": 13807 }, { "epoch": 0.8284634307313854, "grad_norm": 1.3426716327667236, "learning_rate": 5.264537250029616e-07, "loss": 0.4484, "step": 13808 }, { "epoch": 0.8285234295314093, "grad_norm": 1.298127293586731, "learning_rate": 5.260950256866995e-07, "loss": 0.3865, "step": 13809 }, { "epoch": 0.8285834283314334, "grad_norm": 1.1447696685791016, "learning_rate": 5.25736438681279e-07, "loss": 0.3614, "step": 13810 }, { "epoch": 0.8286434271314573, "grad_norm": 1.3049399852752686, "learning_rate": 5.253779640002421e-07, "loss": 0.3322, "step": 13811 }, { "epoch": 0.8287034259314814, "grad_norm": 1.3437119722366333, "learning_rate": 5.250196016571279e-07, "loss": 0.3559, "step": 13812 }, { "epoch": 0.8287634247315053, "grad_norm": 1.470239281654358, "learning_rate": 5.246613516654692e-07, "loss": 0.4399, "step": 13813 }, { "epoch": 0.8288234235315294, "grad_norm": 1.349865198135376, "learning_rate": 5.243032140387949e-07, "loss": 0.3533, "step": 13814 }, { "epoch": 0.8288834223315533, "grad_norm": 1.396532416343689, "learning_rate": 5.239451887906317e-07, "loss": 0.4121, "step": 13815 }, { "epoch": 0.8289434211315774, "grad_norm": 1.3706269264221191, "learning_rate": 5.235872759345001e-07, "loss": 0.3516, "step": 13816 }, { "epoch": 0.8290034199316013, "grad_norm": 1.2435683012008667, "learning_rate": 5.232294754839153e-07, "loss": 0.3301, "step": 13817 }, { "epoch": 0.8290634187316254, "grad_norm": 1.2384846210479736, "learning_rate": 5.228717874523921e-07, "loss": 0.3439, "step": 13818 }, { "epoch": 0.8291234175316494, "grad_norm": 1.2082563638687134, "learning_rate": 5.22514211853437e-07, "loss": 0.33, "step": 13819 }, { "epoch": 0.8291834163316734, "grad_norm": 1.3418092727661133, "learning_rate": 5.221567487005564e-07, "loss": 0.4058, "step": 13820 }, { "epoch": 0.8292434151316974, "grad_norm": 1.26670241355896, "learning_rate": 5.217993980072469e-07, "loss": 0.3478, "step": 13821 }, { "epoch": 0.8293034139317214, "grad_norm": 1.265872836112976, "learning_rate": 5.214421597870058e-07, "loss": 0.3531, "step": 13822 }, { "epoch": 0.8293634127317454, "grad_norm": 1.4428925514221191, "learning_rate": 5.210850340533235e-07, "loss": 0.3839, "step": 13823 }, { "epoch": 0.8294234115317694, "grad_norm": 1.2476365566253662, "learning_rate": 5.207280208196887e-07, "loss": 0.367, "step": 13824 }, { "epoch": 0.8294834103317934, "grad_norm": 1.4172052145004272, "learning_rate": 5.203711200995826e-07, "loss": 0.3842, "step": 13825 }, { "epoch": 0.8295434091318173, "grad_norm": 1.2786521911621094, "learning_rate": 5.200143319064835e-07, "loss": 0.331, "step": 13826 }, { "epoch": 0.8296034079318414, "grad_norm": 1.3587127923965454, "learning_rate": 5.196576562538669e-07, "loss": 0.389, "step": 13827 }, { "epoch": 0.8296634067318653, "grad_norm": 1.4934029579162598, "learning_rate": 5.193010931552025e-07, "loss": 0.4094, "step": 13828 }, { "epoch": 0.8297234055318894, "grad_norm": 1.301713228225708, "learning_rate": 5.189446426239548e-07, "loss": 0.3565, "step": 13829 }, { "epoch": 0.8297834043319133, "grad_norm": 1.276010513305664, "learning_rate": 5.18588304673587e-07, "loss": 0.3551, "step": 13830 }, { "epoch": 0.8298434031319374, "grad_norm": 1.411747932434082, "learning_rate": 5.182320793175551e-07, "loss": 0.3803, "step": 13831 }, { "epoch": 0.8299034019319613, "grad_norm": 1.4374264478683472, "learning_rate": 5.178759665693144e-07, "loss": 0.3905, "step": 13832 }, { "epoch": 0.8299634007319854, "grad_norm": 1.422485589981079, "learning_rate": 5.1751996644231e-07, "loss": 0.3829, "step": 13833 }, { "epoch": 0.8300233995320093, "grad_norm": 1.4071277379989624, "learning_rate": 5.171640789499892e-07, "loss": 0.3724, "step": 13834 }, { "epoch": 0.8300833983320334, "grad_norm": 1.28153395652771, "learning_rate": 5.168083041057905e-07, "loss": 0.3672, "step": 13835 }, { "epoch": 0.8301433971320573, "grad_norm": 1.2522339820861816, "learning_rate": 5.164526419231518e-07, "loss": 0.3712, "step": 13836 }, { "epoch": 0.8302033959320814, "grad_norm": 1.245992660522461, "learning_rate": 5.160970924155033e-07, "loss": 0.379, "step": 13837 }, { "epoch": 0.8302633947321053, "grad_norm": 1.4651540517807007, "learning_rate": 5.157416555962726e-07, "loss": 0.379, "step": 13838 }, { "epoch": 0.8303233935321294, "grad_norm": 1.3268420696258545, "learning_rate": 5.15386331478884e-07, "loss": 0.353, "step": 13839 }, { "epoch": 0.8303833923321533, "grad_norm": 1.2400151491165161, "learning_rate": 5.150311200767561e-07, "loss": 0.3523, "step": 13840 }, { "epoch": 0.8304433911321774, "grad_norm": 1.4612454175949097, "learning_rate": 5.146760214033021e-07, "loss": 0.422, "step": 13841 }, { "epoch": 0.8305033899322014, "grad_norm": 1.3747440576553345, "learning_rate": 5.143210354719346e-07, "loss": 0.4118, "step": 13842 }, { "epoch": 0.8305633887322253, "grad_norm": 1.455648422241211, "learning_rate": 5.139661622960589e-07, "loss": 0.4284, "step": 13843 }, { "epoch": 0.8306233875322494, "grad_norm": 1.3750808238983154, "learning_rate": 5.136114018890772e-07, "loss": 0.3259, "step": 13844 }, { "epoch": 0.8306833863322733, "grad_norm": 1.367917537689209, "learning_rate": 5.132567542643855e-07, "loss": 0.3647, "step": 13845 }, { "epoch": 0.8307433851322974, "grad_norm": 1.3594452142715454, "learning_rate": 5.129022194353792e-07, "loss": 0.3904, "step": 13846 }, { "epoch": 0.8308033839323213, "grad_norm": 1.4075708389282227, "learning_rate": 5.125477974154483e-07, "loss": 0.3568, "step": 13847 }, { "epoch": 0.8308633827323454, "grad_norm": 1.4225893020629883, "learning_rate": 5.121934882179746e-07, "loss": 0.351, "step": 13848 }, { "epoch": 0.8309233815323693, "grad_norm": 1.3897300958633423, "learning_rate": 5.118392918563412e-07, "loss": 0.3393, "step": 13849 }, { "epoch": 0.8309833803323934, "grad_norm": 1.4435425996780396, "learning_rate": 5.11485208343923e-07, "loss": 0.3566, "step": 13850 }, { "epoch": 0.8310433791324173, "grad_norm": 1.3838245868682861, "learning_rate": 5.111312376940944e-07, "loss": 0.3427, "step": 13851 }, { "epoch": 0.8311033779324414, "grad_norm": 1.3718873262405396, "learning_rate": 5.107773799202201e-07, "loss": 0.3922, "step": 13852 }, { "epoch": 0.8311633767324653, "grad_norm": 1.346909999847412, "learning_rate": 5.104236350356654e-07, "loss": 0.377, "step": 13853 }, { "epoch": 0.8312233755324894, "grad_norm": 1.4159668684005737, "learning_rate": 5.100700030537902e-07, "loss": 0.3847, "step": 13854 }, { "epoch": 0.8312833743325133, "grad_norm": 1.352679967880249, "learning_rate": 5.097164839879489e-07, "loss": 0.3952, "step": 13855 }, { "epoch": 0.8313433731325374, "grad_norm": 1.289483904838562, "learning_rate": 5.093630778514923e-07, "loss": 0.3299, "step": 13856 }, { "epoch": 0.8314033719325613, "grad_norm": 1.4261304140090942, "learning_rate": 5.090097846577663e-07, "loss": 0.3924, "step": 13857 }, { "epoch": 0.8314633707325854, "grad_norm": 1.2340446710586548, "learning_rate": 5.086566044201145e-07, "loss": 0.3328, "step": 13858 }, { "epoch": 0.8315233695326093, "grad_norm": 1.4634360074996948, "learning_rate": 5.083035371518738e-07, "loss": 0.4232, "step": 13859 }, { "epoch": 0.8315833683326334, "grad_norm": 1.31428861618042, "learning_rate": 5.079505828663783e-07, "loss": 0.3487, "step": 13860 }, { "epoch": 0.8316433671326573, "grad_norm": 1.3858511447906494, "learning_rate": 5.075977415769581e-07, "loss": 0.3771, "step": 13861 }, { "epoch": 0.8317033659326813, "grad_norm": 1.401063323020935, "learning_rate": 5.072450132969378e-07, "loss": 0.4293, "step": 13862 }, { "epoch": 0.8317633647327054, "grad_norm": 1.3225734233856201, "learning_rate": 5.068923980396381e-07, "loss": 0.3263, "step": 13863 }, { "epoch": 0.8318233635327293, "grad_norm": 1.344335675239563, "learning_rate": 5.065398958183754e-07, "loss": 0.3318, "step": 13864 }, { "epoch": 0.8318833623327534, "grad_norm": 1.324896216392517, "learning_rate": 5.06187506646463e-07, "loss": 0.3885, "step": 13865 }, { "epoch": 0.8319433611327773, "grad_norm": 1.352921485900879, "learning_rate": 5.058352305372097e-07, "loss": 0.3459, "step": 13866 }, { "epoch": 0.8320033599328014, "grad_norm": 1.4484343528747559, "learning_rate": 5.054830675039172e-07, "loss": 0.3673, "step": 13867 }, { "epoch": 0.8320633587328253, "grad_norm": 1.2429900169372559, "learning_rate": 5.051310175598868e-07, "loss": 0.3607, "step": 13868 }, { "epoch": 0.8321233575328494, "grad_norm": 1.310256004333496, "learning_rate": 5.047790807184127e-07, "loss": 0.3276, "step": 13869 }, { "epoch": 0.8321833563328733, "grad_norm": 1.329538106918335, "learning_rate": 5.044272569927872e-07, "loss": 0.3632, "step": 13870 }, { "epoch": 0.8322433551328974, "grad_norm": 1.442267656326294, "learning_rate": 5.040755463962963e-07, "loss": 0.3971, "step": 13871 }, { "epoch": 0.8323033539329213, "grad_norm": 1.3300743103027344, "learning_rate": 5.037239489422222e-07, "loss": 0.3932, "step": 13872 }, { "epoch": 0.8323633527329454, "grad_norm": 1.57696533203125, "learning_rate": 5.03372464643844e-07, "loss": 0.3997, "step": 13873 }, { "epoch": 0.8324233515329693, "grad_norm": 1.402011513710022, "learning_rate": 5.030210935144353e-07, "loss": 0.3534, "step": 13874 }, { "epoch": 0.8324833503329934, "grad_norm": 1.2787914276123047, "learning_rate": 5.026698355672655e-07, "loss": 0.3194, "step": 13875 }, { "epoch": 0.8325433491330173, "grad_norm": 1.4119346141815186, "learning_rate": 5.023186908155995e-07, "loss": 0.3785, "step": 13876 }, { "epoch": 0.8326033479330414, "grad_norm": 1.2512480020523071, "learning_rate": 5.019676592726992e-07, "loss": 0.3663, "step": 13877 }, { "epoch": 0.8326633467330653, "grad_norm": 1.3969082832336426, "learning_rate": 5.016167409518233e-07, "loss": 0.358, "step": 13878 }, { "epoch": 0.8327233455330894, "grad_norm": 1.3202136754989624, "learning_rate": 5.012659358662205e-07, "loss": 0.3308, "step": 13879 }, { "epoch": 0.8327833443331133, "grad_norm": 1.352555274963379, "learning_rate": 5.00915244029142e-07, "loss": 0.3534, "step": 13880 }, { "epoch": 0.8328433431331373, "grad_norm": 1.3743550777435303, "learning_rate": 5.005646654538305e-07, "loss": 0.3264, "step": 13881 }, { "epoch": 0.8329033419331613, "grad_norm": 1.4551862478256226, "learning_rate": 5.002142001535266e-07, "loss": 0.3833, "step": 13882 }, { "epoch": 0.8329633407331853, "grad_norm": 1.3103746175765991, "learning_rate": 4.998638481414653e-07, "loss": 0.3485, "step": 13883 }, { "epoch": 0.8330233395332093, "grad_norm": 1.5312074422836304, "learning_rate": 4.995136094308774e-07, "loss": 0.3957, "step": 13884 }, { "epoch": 0.8330833383332333, "grad_norm": 1.4743014574050903, "learning_rate": 4.991634840349908e-07, "loss": 0.3895, "step": 13885 }, { "epoch": 0.8331433371332574, "grad_norm": 1.4281885623931885, "learning_rate": 4.988134719670278e-07, "loss": 0.3542, "step": 13886 }, { "epoch": 0.8332033359332813, "grad_norm": 1.2860413789749146, "learning_rate": 4.984635732402058e-07, "loss": 0.3639, "step": 13887 }, { "epoch": 0.8332633347333054, "grad_norm": 1.4087861776351929, "learning_rate": 4.981137878677402e-07, "loss": 0.3917, "step": 13888 }, { "epoch": 0.8333233335333293, "grad_norm": 1.3262029886245728, "learning_rate": 4.977641158628406e-07, "loss": 0.3852, "step": 13889 }, { "epoch": 0.8333833323333534, "grad_norm": 1.3154823780059814, "learning_rate": 4.974145572387121e-07, "loss": 0.3699, "step": 13890 }, { "epoch": 0.8334433311333773, "grad_norm": 1.4423352479934692, "learning_rate": 4.970651120085549e-07, "loss": 0.391, "step": 13891 }, { "epoch": 0.8335033299334014, "grad_norm": 1.3958921432495117, "learning_rate": 4.967157801855682e-07, "loss": 0.3564, "step": 13892 }, { "epoch": 0.8335633287334253, "grad_norm": 1.3784382343292236, "learning_rate": 4.963665617829434e-07, "loss": 0.3382, "step": 13893 }, { "epoch": 0.8336233275334494, "grad_norm": 1.1530801057815552, "learning_rate": 4.960174568138686e-07, "loss": 0.317, "step": 13894 }, { "epoch": 0.8336833263334733, "grad_norm": 1.4947059154510498, "learning_rate": 4.956684652915289e-07, "loss": 0.4123, "step": 13895 }, { "epoch": 0.8337433251334974, "grad_norm": 1.3781548738479614, "learning_rate": 4.953195872291029e-07, "loss": 0.3736, "step": 13896 }, { "epoch": 0.8338033239335213, "grad_norm": 1.383152723312378, "learning_rate": 4.949708226397681e-07, "loss": 0.4183, "step": 13897 }, { "epoch": 0.8338633227335454, "grad_norm": 1.3455760478973389, "learning_rate": 4.946221715366929e-07, "loss": 0.3821, "step": 13898 }, { "epoch": 0.8339233215335693, "grad_norm": 1.3287514448165894, "learning_rate": 4.942736339330461e-07, "loss": 0.3464, "step": 13899 }, { "epoch": 0.8339833203335933, "grad_norm": 1.2968370914459229, "learning_rate": 4.939252098419906e-07, "loss": 0.3833, "step": 13900 }, { "epoch": 0.8340433191336173, "grad_norm": 1.2680425643920898, "learning_rate": 4.935768992766845e-07, "loss": 0.3608, "step": 13901 }, { "epoch": 0.8341033179336413, "grad_norm": 1.3258936405181885, "learning_rate": 4.932287022502814e-07, "loss": 0.3716, "step": 13902 }, { "epoch": 0.8341633167336653, "grad_norm": 1.3630481958389282, "learning_rate": 4.92880618775931e-07, "loss": 0.348, "step": 13903 }, { "epoch": 0.8342233155336893, "grad_norm": 1.289496898651123, "learning_rate": 4.925326488667798e-07, "loss": 0.3845, "step": 13904 }, { "epoch": 0.8342833143337133, "grad_norm": 1.2474350929260254, "learning_rate": 4.921847925359683e-07, "loss": 0.3521, "step": 13905 }, { "epoch": 0.8343433131337373, "grad_norm": 1.502213478088379, "learning_rate": 4.918370497966332e-07, "loss": 0.373, "step": 13906 }, { "epoch": 0.8344033119337613, "grad_norm": 1.488423228263855, "learning_rate": 4.91489420661908e-07, "loss": 0.3891, "step": 13907 }, { "epoch": 0.8344633107337853, "grad_norm": 1.4280951023101807, "learning_rate": 4.911419051449209e-07, "loss": 0.3885, "step": 13908 }, { "epoch": 0.8345233095338094, "grad_norm": 1.4791829586029053, "learning_rate": 4.907945032587959e-07, "loss": 0.3911, "step": 13909 }, { "epoch": 0.8345833083338333, "grad_norm": 1.3307853937149048, "learning_rate": 4.904472150166512e-07, "loss": 0.4015, "step": 13910 }, { "epoch": 0.8346433071338574, "grad_norm": 1.3425216674804688, "learning_rate": 4.90100040431604e-07, "loss": 0.3362, "step": 13911 }, { "epoch": 0.8347033059338813, "grad_norm": 1.4823442697525024, "learning_rate": 4.897529795167669e-07, "loss": 0.3807, "step": 13912 }, { "epoch": 0.8347633047339054, "grad_norm": 1.2863975763320923, "learning_rate": 4.894060322852434e-07, "loss": 0.4021, "step": 13913 }, { "epoch": 0.8348233035339293, "grad_norm": 1.3433539867401123, "learning_rate": 4.890591987501389e-07, "loss": 0.3641, "step": 13914 }, { "epoch": 0.8348833023339534, "grad_norm": 1.506282925605774, "learning_rate": 4.887124789245496e-07, "loss": 0.3934, "step": 13915 }, { "epoch": 0.8349433011339773, "grad_norm": 1.4366869926452637, "learning_rate": 4.883658728215713e-07, "loss": 0.3785, "step": 13916 }, { "epoch": 0.8350032999340014, "grad_norm": 1.348258376121521, "learning_rate": 4.880193804542932e-07, "loss": 0.3894, "step": 13917 }, { "epoch": 0.8350632987340253, "grad_norm": 1.3975940942764282, "learning_rate": 4.876730018357998e-07, "loss": 0.3834, "step": 13918 }, { "epoch": 0.8351232975340493, "grad_norm": 1.384766697883606, "learning_rate": 4.873267369791737e-07, "loss": 0.3415, "step": 13919 }, { "epoch": 0.8351832963340733, "grad_norm": 1.2754813432693481, "learning_rate": 4.869805858974908e-07, "loss": 0.36, "step": 13920 }, { "epoch": 0.8352432951340973, "grad_norm": 1.2344586849212646, "learning_rate": 4.866345486038242e-07, "loss": 0.3543, "step": 13921 }, { "epoch": 0.8353032939341213, "grad_norm": 1.3597677946090698, "learning_rate": 4.86288625111241e-07, "loss": 0.3613, "step": 13922 }, { "epoch": 0.8353632927341453, "grad_norm": 1.2667235136032104, "learning_rate": 4.859428154328057e-07, "loss": 0.3662, "step": 13923 }, { "epoch": 0.8354232915341693, "grad_norm": 1.2750194072723389, "learning_rate": 4.8559711958158e-07, "loss": 0.4071, "step": 13924 }, { "epoch": 0.8354832903341933, "grad_norm": 1.4360092878341675, "learning_rate": 4.852515375706159e-07, "loss": 0.4137, "step": 13925 }, { "epoch": 0.8355432891342173, "grad_norm": 1.3045040369033813, "learning_rate": 4.84906069412967e-07, "loss": 0.3453, "step": 13926 }, { "epoch": 0.8356032879342413, "grad_norm": 1.2180019617080688, "learning_rate": 4.845607151216784e-07, "loss": 0.3426, "step": 13927 }, { "epoch": 0.8356632867342653, "grad_norm": 1.3568731546401978, "learning_rate": 4.84215474709794e-07, "loss": 0.3494, "step": 13928 }, { "epoch": 0.8357232855342893, "grad_norm": 1.248023271560669, "learning_rate": 4.838703481903512e-07, "loss": 0.3737, "step": 13929 }, { "epoch": 0.8357832843343133, "grad_norm": 1.2777221202850342, "learning_rate": 4.835253355763828e-07, "loss": 0.3237, "step": 13930 }, { "epoch": 0.8358432831343373, "grad_norm": 1.3649784326553345, "learning_rate": 4.831804368809207e-07, "loss": 0.3961, "step": 13931 }, { "epoch": 0.8359032819343614, "grad_norm": 1.3991996049880981, "learning_rate": 4.828356521169887e-07, "loss": 0.3536, "step": 13932 }, { "epoch": 0.8359632807343853, "grad_norm": 1.3314156532287598, "learning_rate": 4.824909812976081e-07, "loss": 0.3695, "step": 13933 }, { "epoch": 0.8360232795344094, "grad_norm": 1.270527958869934, "learning_rate": 4.821464244357943e-07, "loss": 0.378, "step": 13934 }, { "epoch": 0.8360832783344333, "grad_norm": 1.3171340227127075, "learning_rate": 4.81801981544562e-07, "loss": 0.4111, "step": 13935 }, { "epoch": 0.8361432771344574, "grad_norm": 1.4260492324829102, "learning_rate": 4.814576526369176e-07, "loss": 0.3932, "step": 13936 }, { "epoch": 0.8362032759344813, "grad_norm": 1.2417137622833252, "learning_rate": 4.811134377258646e-07, "loss": 0.3557, "step": 13937 }, { "epoch": 0.8362632747345053, "grad_norm": 1.4394723176956177, "learning_rate": 4.807693368244036e-07, "loss": 0.4058, "step": 13938 }, { "epoch": 0.8363232735345293, "grad_norm": 1.3054291009902954, "learning_rate": 4.804253499455294e-07, "loss": 0.3718, "step": 13939 }, { "epoch": 0.8363832723345533, "grad_norm": 1.3390312194824219, "learning_rate": 4.800814771022318e-07, "loss": 0.394, "step": 13940 }, { "epoch": 0.8364432711345773, "grad_norm": 1.2568775415420532, "learning_rate": 4.79737718307499e-07, "loss": 0.3948, "step": 13941 }, { "epoch": 0.8365032699346013, "grad_norm": 1.3737668991088867, "learning_rate": 4.79394073574311e-07, "loss": 0.3968, "step": 13942 }, { "epoch": 0.8365632687346253, "grad_norm": 1.214822769165039, "learning_rate": 4.790505429156492e-07, "loss": 0.3336, "step": 13943 }, { "epoch": 0.8366232675346493, "grad_norm": 1.2953381538391113, "learning_rate": 4.78707126344483e-07, "loss": 0.384, "step": 13944 }, { "epoch": 0.8366832663346733, "grad_norm": 1.4323318004608154, "learning_rate": 4.783638238737845e-07, "loss": 0.3947, "step": 13945 }, { "epoch": 0.8367432651346973, "grad_norm": 1.4055325984954834, "learning_rate": 4.780206355165172e-07, "loss": 0.3769, "step": 13946 }, { "epoch": 0.8368032639347213, "grad_norm": 1.3351231813430786, "learning_rate": 4.77677561285643e-07, "loss": 0.4394, "step": 13947 }, { "epoch": 0.8368632627347453, "grad_norm": 1.4566950798034668, "learning_rate": 4.773346011941176e-07, "loss": 0.3874, "step": 13948 }, { "epoch": 0.8369232615347693, "grad_norm": 1.4629695415496826, "learning_rate": 4.769917552548925e-07, "loss": 0.3412, "step": 13949 }, { "epoch": 0.8369832603347933, "grad_norm": 1.365686297416687, "learning_rate": 4.766490234809168e-07, "loss": 0.3784, "step": 13950 }, { "epoch": 0.8370432591348173, "grad_norm": 1.2759146690368652, "learning_rate": 4.7630640588513285e-07, "loss": 0.3636, "step": 13951 }, { "epoch": 0.8371032579348413, "grad_norm": 1.2176014184951782, "learning_rate": 4.759639024804795e-07, "loss": 0.3435, "step": 13952 }, { "epoch": 0.8371632567348654, "grad_norm": 1.3437103033065796, "learning_rate": 4.756215132798929e-07, "loss": 0.3752, "step": 13953 }, { "epoch": 0.8372232555348893, "grad_norm": 1.3020559549331665, "learning_rate": 4.752792382963026e-07, "loss": 0.3892, "step": 13954 }, { "epoch": 0.8372832543349134, "grad_norm": 1.4383962154388428, "learning_rate": 4.74937077542635e-07, "loss": 0.3868, "step": 13955 }, { "epoch": 0.8373432531349373, "grad_norm": 1.2785052061080933, "learning_rate": 4.745950310318108e-07, "loss": 0.3631, "step": 13956 }, { "epoch": 0.8374032519349613, "grad_norm": 1.339792251586914, "learning_rate": 4.742530987767494e-07, "loss": 0.3933, "step": 13957 }, { "epoch": 0.8374632507349853, "grad_norm": 1.3068047761917114, "learning_rate": 4.739112807903625e-07, "loss": 0.3974, "step": 13958 }, { "epoch": 0.8375232495350093, "grad_norm": 1.3305248022079468, "learning_rate": 4.735695770855605e-07, "loss": 0.3773, "step": 13959 }, { "epoch": 0.8375832483350333, "grad_norm": 1.224021315574646, "learning_rate": 4.7322798767524706e-07, "loss": 0.3382, "step": 13960 }, { "epoch": 0.8376432471350573, "grad_norm": 1.4462566375732422, "learning_rate": 4.7288651257232205e-07, "loss": 0.401, "step": 13961 }, { "epoch": 0.8377032459350813, "grad_norm": 1.5054792165756226, "learning_rate": 4.725451517896825e-07, "loss": 0.3802, "step": 13962 }, { "epoch": 0.8377632447351053, "grad_norm": 1.2550008296966553, "learning_rate": 4.7220390534021977e-07, "loss": 0.3144, "step": 13963 }, { "epoch": 0.8378232435351293, "grad_norm": 1.4988799095153809, "learning_rate": 4.7186277323682e-07, "loss": 0.4059, "step": 13964 }, { "epoch": 0.8378832423351533, "grad_norm": 1.319041132926941, "learning_rate": 4.715217554923679e-07, "loss": 0.3519, "step": 13965 }, { "epoch": 0.8379432411351773, "grad_norm": 1.3424142599105835, "learning_rate": 4.711808521197415e-07, "loss": 0.3364, "step": 13966 }, { "epoch": 0.8380032399352013, "grad_norm": 1.3871098756790161, "learning_rate": 4.7084006313181465e-07, "loss": 0.3437, "step": 13967 }, { "epoch": 0.8380632387352253, "grad_norm": 1.5113170146942139, "learning_rate": 4.704993885414575e-07, "loss": 0.3777, "step": 13968 }, { "epoch": 0.8381232375352493, "grad_norm": 1.3316189050674438, "learning_rate": 4.701588283615359e-07, "loss": 0.3989, "step": 13969 }, { "epoch": 0.8381832363352733, "grad_norm": 1.4040828943252563, "learning_rate": 4.698183826049129e-07, "loss": 0.3783, "step": 13970 }, { "epoch": 0.8382432351352973, "grad_norm": 1.2628962993621826, "learning_rate": 4.6947805128444263e-07, "loss": 0.3636, "step": 13971 }, { "epoch": 0.8383032339353212, "grad_norm": 1.3471425771713257, "learning_rate": 4.6913783441298013e-07, "loss": 0.359, "step": 13972 }, { "epoch": 0.8383632327353453, "grad_norm": 1.4165380001068115, "learning_rate": 4.687977320033721e-07, "loss": 0.3859, "step": 13973 }, { "epoch": 0.8384232315353692, "grad_norm": 1.3093750476837158, "learning_rate": 4.684577440684653e-07, "loss": 0.372, "step": 13974 }, { "epoch": 0.8384832303353933, "grad_norm": 1.2973889112472534, "learning_rate": 4.6811787062109653e-07, "loss": 0.3628, "step": 13975 }, { "epoch": 0.8385432291354173, "grad_norm": 1.210537314414978, "learning_rate": 4.677781116741024e-07, "loss": 0.3891, "step": 13976 }, { "epoch": 0.8386032279354413, "grad_norm": 1.346970796585083, "learning_rate": 4.6743846724031485e-07, "loss": 0.3727, "step": 13977 }, { "epoch": 0.8386632267354653, "grad_norm": 1.3205004930496216, "learning_rate": 4.670989373325601e-07, "loss": 0.3387, "step": 13978 }, { "epoch": 0.8387232255354893, "grad_norm": 1.3339500427246094, "learning_rate": 4.6675952196366053e-07, "loss": 0.407, "step": 13979 }, { "epoch": 0.8387832243355133, "grad_norm": 1.2966715097427368, "learning_rate": 4.6642022114643413e-07, "loss": 0.3841, "step": 13980 }, { "epoch": 0.8388432231355373, "grad_norm": 1.4430897235870361, "learning_rate": 4.6608103489369535e-07, "loss": 0.3887, "step": 13981 }, { "epoch": 0.8389032219355613, "grad_norm": 1.226125717163086, "learning_rate": 4.6574196321825335e-07, "loss": 0.3478, "step": 13982 }, { "epoch": 0.8389632207355853, "grad_norm": 1.272547721862793, "learning_rate": 4.654030061329129e-07, "loss": 0.4008, "step": 13983 }, { "epoch": 0.8390232195356093, "grad_norm": 1.253653883934021, "learning_rate": 4.6506416365047575e-07, "loss": 0.3746, "step": 13984 }, { "epoch": 0.8390832183356333, "grad_norm": 1.3535710573196411, "learning_rate": 4.6472543578373816e-07, "loss": 0.3768, "step": 13985 }, { "epoch": 0.8391432171356573, "grad_norm": 1.3581498861312866, "learning_rate": 4.643868225454921e-07, "loss": 0.4216, "step": 13986 }, { "epoch": 0.8392032159356813, "grad_norm": 1.3292391300201416, "learning_rate": 4.6404832394852484e-07, "loss": 0.3903, "step": 13987 }, { "epoch": 0.8392632147357053, "grad_norm": 1.324472427368164, "learning_rate": 4.637099400056207e-07, "loss": 0.378, "step": 13988 }, { "epoch": 0.8393232135357293, "grad_norm": 1.4737941026687622, "learning_rate": 4.6337167072956035e-07, "loss": 0.4331, "step": 13989 }, { "epoch": 0.8393832123357533, "grad_norm": 1.4088586568832397, "learning_rate": 4.630335161331156e-07, "loss": 0.3879, "step": 13990 }, { "epoch": 0.8394432111357772, "grad_norm": 1.371077537536621, "learning_rate": 4.626954762290594e-07, "loss": 0.3797, "step": 13991 }, { "epoch": 0.8395032099358013, "grad_norm": 1.3682868480682373, "learning_rate": 4.623575510301561e-07, "loss": 0.3971, "step": 13992 }, { "epoch": 0.8395632087358252, "grad_norm": 1.274811029434204, "learning_rate": 4.620197405491693e-07, "loss": 0.3423, "step": 13993 }, { "epoch": 0.8396232075358493, "grad_norm": 1.2711979150772095, "learning_rate": 4.6168204479885614e-07, "loss": 0.3303, "step": 13994 }, { "epoch": 0.8396832063358732, "grad_norm": 1.343748927116394, "learning_rate": 4.613444637919689e-07, "loss": 0.3987, "step": 13995 }, { "epoch": 0.8397432051358973, "grad_norm": 1.32541024684906, "learning_rate": 4.61006997541258e-07, "loss": 0.3521, "step": 13996 }, { "epoch": 0.8398032039359212, "grad_norm": 1.370526671409607, "learning_rate": 4.6066964605946727e-07, "loss": 0.3925, "step": 13997 }, { "epoch": 0.8398632027359453, "grad_norm": 1.2942386865615845, "learning_rate": 4.6033240935933657e-07, "loss": 0.3542, "step": 13998 }, { "epoch": 0.8399232015359693, "grad_norm": 1.418542504310608, "learning_rate": 4.5999528745360117e-07, "loss": 0.4063, "step": 13999 }, { "epoch": 0.8399832003359933, "grad_norm": 1.2742023468017578, "learning_rate": 4.596582803549948e-07, "loss": 0.3661, "step": 14000 }, { "epoch": 0.8400431991360173, "grad_norm": 1.3699321746826172, "learning_rate": 4.5932138807624304e-07, "loss": 0.405, "step": 14001 }, { "epoch": 0.8401031979360413, "grad_norm": 1.3759825229644775, "learning_rate": 4.589846106300687e-07, "loss": 0.4061, "step": 14002 }, { "epoch": 0.8401631967360653, "grad_norm": 1.4835857152938843, "learning_rate": 4.58647948029191e-07, "loss": 0.3951, "step": 14003 }, { "epoch": 0.8402231955360893, "grad_norm": 1.5379167795181274, "learning_rate": 4.5831140028632353e-07, "loss": 0.3813, "step": 14004 }, { "epoch": 0.8402831943361133, "grad_norm": 1.3467204570770264, "learning_rate": 4.579749674141775e-07, "loss": 0.3708, "step": 14005 }, { "epoch": 0.8403431931361373, "grad_norm": 1.2923264503479004, "learning_rate": 4.5763864942545733e-07, "loss": 0.3577, "step": 14006 }, { "epoch": 0.8404031919361613, "grad_norm": 1.3587640523910522, "learning_rate": 4.573024463328638e-07, "loss": 0.3652, "step": 14007 }, { "epoch": 0.8404631907361853, "grad_norm": 1.45751953125, "learning_rate": 4.5696635814909486e-07, "loss": 0.4046, "step": 14008 }, { "epoch": 0.8405231895362093, "grad_norm": 1.4099239110946655, "learning_rate": 4.566303848868426e-07, "loss": 0.328, "step": 14009 }, { "epoch": 0.8405831883362332, "grad_norm": 1.4187651872634888, "learning_rate": 4.562945265587945e-07, "loss": 0.3795, "step": 14010 }, { "epoch": 0.8406431871362573, "grad_norm": 1.3802874088287354, "learning_rate": 4.5595878317763567e-07, "loss": 0.43, "step": 14011 }, { "epoch": 0.8407031859362812, "grad_norm": 1.2818784713745117, "learning_rate": 4.556231547560454e-07, "loss": 0.3815, "step": 14012 }, { "epoch": 0.8407631847363053, "grad_norm": 1.3571724891662598, "learning_rate": 4.55287641306698e-07, "loss": 0.3239, "step": 14013 }, { "epoch": 0.8408231835363292, "grad_norm": 1.2717242240905762, "learning_rate": 4.549522428422642e-07, "loss": 0.3508, "step": 14014 }, { "epoch": 0.8408831823363533, "grad_norm": 1.3505655527114868, "learning_rate": 4.5461695937541126e-07, "loss": 0.3965, "step": 14015 }, { "epoch": 0.8409431811363772, "grad_norm": 1.4443819522857666, "learning_rate": 4.5428179091880173e-07, "loss": 0.3752, "step": 14016 }, { "epoch": 0.8410031799364013, "grad_norm": 1.306976079940796, "learning_rate": 4.5394673748509125e-07, "loss": 0.3703, "step": 14017 }, { "epoch": 0.8410631787364252, "grad_norm": 1.3797407150268555, "learning_rate": 4.5361179908693614e-07, "loss": 0.4159, "step": 14018 }, { "epoch": 0.8411231775364493, "grad_norm": 1.3810298442840576, "learning_rate": 4.5327697573698275e-07, "loss": 0.4017, "step": 14019 }, { "epoch": 0.8411831763364732, "grad_norm": 1.5407721996307373, "learning_rate": 4.529422674478789e-07, "loss": 0.4146, "step": 14020 }, { "epoch": 0.8412431751364973, "grad_norm": 1.3884718418121338, "learning_rate": 4.526076742322619e-07, "loss": 0.3824, "step": 14021 }, { "epoch": 0.8413031739365213, "grad_norm": 1.4088906049728394, "learning_rate": 4.5227319610276904e-07, "loss": 0.3656, "step": 14022 }, { "epoch": 0.8413631727365453, "grad_norm": 1.452877163887024, "learning_rate": 4.519388330720327e-07, "loss": 0.3697, "step": 14023 }, { "epoch": 0.8414231715365693, "grad_norm": 1.2716330289840698, "learning_rate": 4.5160458515268e-07, "loss": 0.3643, "step": 14024 }, { "epoch": 0.8414831703365933, "grad_norm": 1.416445255279541, "learning_rate": 4.512704523573333e-07, "loss": 0.3938, "step": 14025 }, { "epoch": 0.8415431691366173, "grad_norm": 1.4462376832962036, "learning_rate": 4.5093643469861104e-07, "loss": 0.4018, "step": 14026 }, { "epoch": 0.8416031679366412, "grad_norm": 1.3054966926574707, "learning_rate": 4.50602532189129e-07, "loss": 0.3944, "step": 14027 }, { "epoch": 0.8416631667366653, "grad_norm": 1.3499581813812256, "learning_rate": 4.5026874484149644e-07, "loss": 0.4065, "step": 14028 }, { "epoch": 0.8417231655366892, "grad_norm": 1.2944339513778687, "learning_rate": 4.4993507266831803e-07, "loss": 0.3077, "step": 14029 }, { "epoch": 0.8417831643367133, "grad_norm": 1.486268162727356, "learning_rate": 4.4960151568219625e-07, "loss": 0.3609, "step": 14030 }, { "epoch": 0.8418431631367372, "grad_norm": 1.3745999336242676, "learning_rate": 4.492680738957279e-07, "loss": 0.3513, "step": 14031 }, { "epoch": 0.8419031619367613, "grad_norm": 1.2728191614151, "learning_rate": 4.4893474732150533e-07, "loss": 0.4129, "step": 14032 }, { "epoch": 0.8419631607367852, "grad_norm": 1.3637416362762451, "learning_rate": 4.486015359721161e-07, "loss": 0.3752, "step": 14033 }, { "epoch": 0.8420231595368093, "grad_norm": 1.3367412090301514, "learning_rate": 4.482684398601443e-07, "loss": 0.3815, "step": 14034 }, { "epoch": 0.8420831583368332, "grad_norm": 1.4094797372817993, "learning_rate": 4.479354589981717e-07, "loss": 0.4097, "step": 14035 }, { "epoch": 0.8421431571368573, "grad_norm": 1.45371413230896, "learning_rate": 4.4760259339877003e-07, "loss": 0.3843, "step": 14036 }, { "epoch": 0.8422031559368812, "grad_norm": 1.269095540046692, "learning_rate": 4.4726984307451277e-07, "loss": 0.3944, "step": 14037 }, { "epoch": 0.8422631547369053, "grad_norm": 1.3134304285049438, "learning_rate": 4.469372080379643e-07, "loss": 0.3581, "step": 14038 }, { "epoch": 0.8423231535369292, "grad_norm": 1.2758744955062866, "learning_rate": 4.466046883016893e-07, "loss": 0.372, "step": 14039 }, { "epoch": 0.8423831523369533, "grad_norm": 1.271321415901184, "learning_rate": 4.462722838782423e-07, "loss": 0.3479, "step": 14040 }, { "epoch": 0.8424431511369772, "grad_norm": 1.388180136680603, "learning_rate": 4.4593999478017866e-07, "loss": 0.3747, "step": 14041 }, { "epoch": 0.8425031499370013, "grad_norm": 1.1302107572555542, "learning_rate": 4.4560782102004786e-07, "loss": 0.3561, "step": 14042 }, { "epoch": 0.8425631487370253, "grad_norm": 1.4297600984573364, "learning_rate": 4.4527576261039365e-07, "loss": 0.416, "step": 14043 }, { "epoch": 0.8426231475370493, "grad_norm": 1.4715397357940674, "learning_rate": 4.4494381956375696e-07, "loss": 0.4148, "step": 14044 }, { "epoch": 0.8426831463370733, "grad_norm": 1.3960232734680176, "learning_rate": 4.4461199189267254e-07, "loss": 0.4062, "step": 14045 }, { "epoch": 0.8427431451370972, "grad_norm": 1.3043361902236938, "learning_rate": 4.442802796096736e-07, "loss": 0.3485, "step": 14046 }, { "epoch": 0.8428031439371213, "grad_norm": 1.371111273765564, "learning_rate": 4.439486827272863e-07, "loss": 0.3658, "step": 14047 }, { "epoch": 0.8428631427371452, "grad_norm": 1.3222687244415283, "learning_rate": 4.436172012580338e-07, "loss": 0.3793, "step": 14048 }, { "epoch": 0.8429231415371693, "grad_norm": 1.3870892524719238, "learning_rate": 4.4328583521443494e-07, "loss": 0.3878, "step": 14049 }, { "epoch": 0.8429831403371932, "grad_norm": 1.5507813692092896, "learning_rate": 4.429545846090034e-07, "loss": 0.3789, "step": 14050 }, { "epoch": 0.8430431391372173, "grad_norm": 1.446016788482666, "learning_rate": 4.4262344945424984e-07, "loss": 0.3853, "step": 14051 }, { "epoch": 0.8431031379372412, "grad_norm": 1.484959363937378, "learning_rate": 4.422924297626796e-07, "loss": 0.4028, "step": 14052 }, { "epoch": 0.8431631367372653, "grad_norm": 1.3692207336425781, "learning_rate": 4.4196152554679227e-07, "loss": 0.3895, "step": 14053 }, { "epoch": 0.8432231355372892, "grad_norm": 1.3265172243118286, "learning_rate": 4.4163073681908667e-07, "loss": 0.3863, "step": 14054 }, { "epoch": 0.8432831343373133, "grad_norm": 1.2093552350997925, "learning_rate": 4.413000635920539e-07, "loss": 0.3629, "step": 14055 }, { "epoch": 0.8433431331373372, "grad_norm": 1.4549217224121094, "learning_rate": 4.409695058781828e-07, "loss": 0.3708, "step": 14056 }, { "epoch": 0.8434031319373613, "grad_norm": 1.2867255210876465, "learning_rate": 4.4063906368995545e-07, "loss": 0.3437, "step": 14057 }, { "epoch": 0.8434631307373852, "grad_norm": 1.4348649978637695, "learning_rate": 4.403087370398533e-07, "loss": 0.378, "step": 14058 }, { "epoch": 0.8435231295374093, "grad_norm": 1.3346529006958008, "learning_rate": 4.3997852594035003e-07, "loss": 0.3868, "step": 14059 }, { "epoch": 0.8435831283374332, "grad_norm": 1.2188998460769653, "learning_rate": 4.396484304039155e-07, "loss": 0.3147, "step": 14060 }, { "epoch": 0.8436431271374573, "grad_norm": 1.1943880319595337, "learning_rate": 4.3931845044301775e-07, "loss": 0.3296, "step": 14061 }, { "epoch": 0.8437031259374812, "grad_norm": 1.3651269674301147, "learning_rate": 4.3898858607011796e-07, "loss": 0.3782, "step": 14062 }, { "epoch": 0.8437631247375053, "grad_norm": 1.3888754844665527, "learning_rate": 4.3865883729767205e-07, "loss": 0.3761, "step": 14063 }, { "epoch": 0.8438231235375292, "grad_norm": 1.2447115182876587, "learning_rate": 4.383292041381355e-07, "loss": 0.351, "step": 14064 }, { "epoch": 0.8438831223375532, "grad_norm": 1.320665717124939, "learning_rate": 4.3799968660395525e-07, "loss": 0.3258, "step": 14065 }, { "epoch": 0.8439431211375773, "grad_norm": 1.3411486148834229, "learning_rate": 4.3767028470757787e-07, "loss": 0.4015, "step": 14066 }, { "epoch": 0.8440031199376012, "grad_norm": 1.2924267053604126, "learning_rate": 4.3734099846144046e-07, "loss": 0.3905, "step": 14067 }, { "epoch": 0.8440631187376253, "grad_norm": 1.2963684797286987, "learning_rate": 4.3701182787798076e-07, "loss": 0.329, "step": 14068 }, { "epoch": 0.8441231175376492, "grad_norm": 1.287751317024231, "learning_rate": 4.3668277296962854e-07, "loss": 0.3656, "step": 14069 }, { "epoch": 0.8441831163376733, "grad_norm": 1.198594570159912, "learning_rate": 4.363538337488125e-07, "loss": 0.3333, "step": 14070 }, { "epoch": 0.8442431151376972, "grad_norm": 1.331178069114685, "learning_rate": 4.3602501022795427e-07, "loss": 0.3698, "step": 14071 }, { "epoch": 0.8443031139377213, "grad_norm": 1.5606611967086792, "learning_rate": 4.356963024194709e-07, "loss": 0.3537, "step": 14072 }, { "epoch": 0.8443631127377452, "grad_norm": 1.2229523658752441, "learning_rate": 4.3536771033577817e-07, "loss": 0.3317, "step": 14073 }, { "epoch": 0.8444231115377693, "grad_norm": 1.2471128702163696, "learning_rate": 4.3503923398928465e-07, "loss": 0.351, "step": 14074 }, { "epoch": 0.8444831103377932, "grad_norm": 1.4120994806289673, "learning_rate": 4.347108733923945e-07, "loss": 0.3657, "step": 14075 }, { "epoch": 0.8445431091378173, "grad_norm": 1.244033932685852, "learning_rate": 4.343826285575099e-07, "loss": 0.3734, "step": 14076 }, { "epoch": 0.8446031079378412, "grad_norm": 1.2360984086990356, "learning_rate": 4.3405449949702656e-07, "loss": 0.3226, "step": 14077 }, { "epoch": 0.8446631067378653, "grad_norm": 1.301377296447754, "learning_rate": 4.337264862233366e-07, "loss": 0.3639, "step": 14078 }, { "epoch": 0.8447231055378892, "grad_norm": 1.2993029356002808, "learning_rate": 4.333985887488263e-07, "loss": 0.3992, "step": 14079 }, { "epoch": 0.8447831043379133, "grad_norm": 1.3760820627212524, "learning_rate": 4.3307080708588005e-07, "loss": 0.4453, "step": 14080 }, { "epoch": 0.8448431031379372, "grad_norm": 1.2903603315353394, "learning_rate": 4.3274314124687803e-07, "loss": 0.3783, "step": 14081 }, { "epoch": 0.8449031019379613, "grad_norm": 1.2375835180282593, "learning_rate": 4.3241559124419166e-07, "loss": 0.3921, "step": 14082 }, { "epoch": 0.8449631007379852, "grad_norm": 1.2198387384414673, "learning_rate": 4.3208815709019295e-07, "loss": 0.3614, "step": 14083 }, { "epoch": 0.8450230995380092, "grad_norm": 1.3095041513442993, "learning_rate": 4.317608387972468e-07, "loss": 0.368, "step": 14084 }, { "epoch": 0.8450830983380332, "grad_norm": 1.3054559230804443, "learning_rate": 4.314336363777155e-07, "loss": 0.3445, "step": 14085 }, { "epoch": 0.8451430971380572, "grad_norm": 1.3071709871292114, "learning_rate": 4.3110654984395576e-07, "loss": 0.4044, "step": 14086 }, { "epoch": 0.8452030959380812, "grad_norm": 1.5182007551193237, "learning_rate": 4.3077957920831855e-07, "loss": 0.4245, "step": 14087 }, { "epoch": 0.8452630947381052, "grad_norm": 1.4255213737487793, "learning_rate": 4.3045272448315445e-07, "loss": 0.4063, "step": 14088 }, { "epoch": 0.8453230935381293, "grad_norm": 1.2600905895233154, "learning_rate": 4.301259856808059e-07, "loss": 0.3516, "step": 14089 }, { "epoch": 0.8453830923381532, "grad_norm": 1.351969838142395, "learning_rate": 4.2979936281361253e-07, "loss": 0.3864, "step": 14090 }, { "epoch": 0.8454430911381773, "grad_norm": 1.3317797183990479, "learning_rate": 4.2947285589390857e-07, "loss": 0.3677, "step": 14091 }, { "epoch": 0.8455030899382012, "grad_norm": 1.4850945472717285, "learning_rate": 4.291464649340264e-07, "loss": 0.3961, "step": 14092 }, { "epoch": 0.8455630887382253, "grad_norm": 1.3579449653625488, "learning_rate": 4.2882018994629165e-07, "loss": 0.3995, "step": 14093 }, { "epoch": 0.8456230875382492, "grad_norm": 1.2994078397750854, "learning_rate": 4.284940309430254e-07, "loss": 0.3762, "step": 14094 }, { "epoch": 0.8456830863382733, "grad_norm": 1.4122416973114014, "learning_rate": 4.281679879365463e-07, "loss": 0.4221, "step": 14095 }, { "epoch": 0.8457430851382972, "grad_norm": 1.393818974494934, "learning_rate": 4.278420609391665e-07, "loss": 0.3662, "step": 14096 }, { "epoch": 0.8458030839383213, "grad_norm": 1.4199398756027222, "learning_rate": 4.275162499631965e-07, "loss": 0.4135, "step": 14097 }, { "epoch": 0.8458630827383452, "grad_norm": 1.2125788927078247, "learning_rate": 4.271905550209383e-07, "loss": 0.3237, "step": 14098 }, { "epoch": 0.8459230815383693, "grad_norm": 1.2881100177764893, "learning_rate": 4.2686497612469346e-07, "loss": 0.3454, "step": 14099 }, { "epoch": 0.8459830803383932, "grad_norm": 1.374697208404541, "learning_rate": 4.265395132867575e-07, "loss": 0.336, "step": 14100 }, { "epoch": 0.8460430791384173, "grad_norm": 1.3817564249038696, "learning_rate": 4.262141665194218e-07, "loss": 0.3553, "step": 14101 }, { "epoch": 0.8461030779384412, "grad_norm": 1.3599637746810913, "learning_rate": 4.2588893583497285e-07, "loss": 0.3673, "step": 14102 }, { "epoch": 0.8461630767384652, "grad_norm": 1.4250946044921875, "learning_rate": 4.2556382124569244e-07, "loss": 0.3686, "step": 14103 }, { "epoch": 0.8462230755384892, "grad_norm": 1.3998538255691528, "learning_rate": 4.2523882276386e-07, "loss": 0.3764, "step": 14104 }, { "epoch": 0.8462830743385132, "grad_norm": 1.3045257329940796, "learning_rate": 4.249139404017488e-07, "loss": 0.3463, "step": 14105 }, { "epoch": 0.8463430731385372, "grad_norm": 1.2264811992645264, "learning_rate": 4.245891741716274e-07, "loss": 0.3575, "step": 14106 }, { "epoch": 0.8464030719385612, "grad_norm": 1.401340126991272, "learning_rate": 4.242645240857616e-07, "loss": 0.3494, "step": 14107 }, { "epoch": 0.8464630707385852, "grad_norm": 1.3824337720870972, "learning_rate": 4.23939990156412e-07, "loss": 0.3679, "step": 14108 }, { "epoch": 0.8465230695386092, "grad_norm": 1.2001605033874512, "learning_rate": 4.2361557239583455e-07, "loss": 0.3415, "step": 14109 }, { "epoch": 0.8465830683386333, "grad_norm": 1.205075979232788, "learning_rate": 4.232912708162804e-07, "loss": 0.3176, "step": 14110 }, { "epoch": 0.8466430671386572, "grad_norm": 1.3029356002807617, "learning_rate": 4.229670854299971e-07, "loss": 0.3773, "step": 14111 }, { "epoch": 0.8467030659386813, "grad_norm": 1.3467744588851929, "learning_rate": 4.226430162492296e-07, "loss": 0.3747, "step": 14112 }, { "epoch": 0.8467630647387052, "grad_norm": 1.2240186929702759, "learning_rate": 4.2231906328621384e-07, "loss": 0.3718, "step": 14113 }, { "epoch": 0.8468230635387293, "grad_norm": 1.414108157157898, "learning_rate": 4.2199522655318566e-07, "loss": 0.3766, "step": 14114 }, { "epoch": 0.8468830623387532, "grad_norm": 1.4645107984542847, "learning_rate": 4.216715060623737e-07, "loss": 0.3918, "step": 14115 }, { "epoch": 0.8469430611387773, "grad_norm": 1.3441331386566162, "learning_rate": 4.213479018260049e-07, "loss": 0.3751, "step": 14116 }, { "epoch": 0.8470030599388012, "grad_norm": 1.3157588243484497, "learning_rate": 4.210244138562993e-07, "loss": 0.3499, "step": 14117 }, { "epoch": 0.8470630587388253, "grad_norm": 1.2464005947113037, "learning_rate": 4.2070104216547336e-07, "loss": 0.3393, "step": 14118 }, { "epoch": 0.8471230575388492, "grad_norm": 1.3300018310546875, "learning_rate": 4.2037778676574057e-07, "loss": 0.378, "step": 14119 }, { "epoch": 0.8471830563388733, "grad_norm": 1.4614640474319458, "learning_rate": 4.20054647669308e-07, "loss": 0.3697, "step": 14120 }, { "epoch": 0.8472430551388972, "grad_norm": 1.3067597150802612, "learning_rate": 4.1973162488837824e-07, "loss": 0.3673, "step": 14121 }, { "epoch": 0.8473030539389212, "grad_norm": 1.3207334280014038, "learning_rate": 4.194087184351522e-07, "loss": 0.3596, "step": 14122 }, { "epoch": 0.8473630527389452, "grad_norm": 1.426119327545166, "learning_rate": 4.190859283218239e-07, "loss": 0.3683, "step": 14123 }, { "epoch": 0.8474230515389692, "grad_norm": 1.3355861902236938, "learning_rate": 4.1876325456058337e-07, "loss": 0.3581, "step": 14124 }, { "epoch": 0.8474830503389932, "grad_norm": 1.3622483015060425, "learning_rate": 4.184406971636157e-07, "loss": 0.346, "step": 14125 }, { "epoch": 0.8475430491390172, "grad_norm": 1.3790889978408813, "learning_rate": 4.181182561431043e-07, "loss": 0.4117, "step": 14126 }, { "epoch": 0.8476030479390412, "grad_norm": 1.2626471519470215, "learning_rate": 4.1779593151122536e-07, "loss": 0.3552, "step": 14127 }, { "epoch": 0.8476630467390652, "grad_norm": 1.427584171295166, "learning_rate": 4.174737232801506e-07, "loss": 0.3866, "step": 14128 }, { "epoch": 0.8477230455390892, "grad_norm": 1.3920522928237915, "learning_rate": 4.171516314620503e-07, "loss": 0.4424, "step": 14129 }, { "epoch": 0.8477830443391132, "grad_norm": 1.4705926179885864, "learning_rate": 4.1682965606908663e-07, "loss": 0.3575, "step": 14130 }, { "epoch": 0.8478430431391372, "grad_norm": 1.257088303565979, "learning_rate": 4.1650779711342085e-07, "loss": 0.3871, "step": 14131 }, { "epoch": 0.8479030419391612, "grad_norm": 1.3596097230911255, "learning_rate": 4.161860546072073e-07, "loss": 0.391, "step": 14132 }, { "epoch": 0.8479630407391853, "grad_norm": 1.2414504289627075, "learning_rate": 4.1586442856259567e-07, "loss": 0.3464, "step": 14133 }, { "epoch": 0.8480230395392092, "grad_norm": 1.3344887495040894, "learning_rate": 4.155429189917341e-07, "loss": 0.3705, "step": 14134 }, { "epoch": 0.8480830383392333, "grad_norm": 1.3830316066741943, "learning_rate": 4.152215259067637e-07, "loss": 0.4605, "step": 14135 }, { "epoch": 0.8481430371392572, "grad_norm": 1.1973357200622559, "learning_rate": 4.1490024931982206e-07, "loss": 0.3366, "step": 14136 }, { "epoch": 0.8482030359392813, "grad_norm": 1.2724286317825317, "learning_rate": 4.1457908924304173e-07, "loss": 0.4005, "step": 14137 }, { "epoch": 0.8482630347393052, "grad_norm": 1.37894606590271, "learning_rate": 4.1425804568855267e-07, "loss": 0.3199, "step": 14138 }, { "epoch": 0.8483230335393293, "grad_norm": 1.338544249534607, "learning_rate": 4.1393711866847844e-07, "loss": 0.3669, "step": 14139 }, { "epoch": 0.8483830323393532, "grad_norm": 1.502461314201355, "learning_rate": 4.136163081949388e-07, "loss": 0.3511, "step": 14140 }, { "epoch": 0.8484430311393772, "grad_norm": 1.3410155773162842, "learning_rate": 4.132956142800503e-07, "loss": 0.3243, "step": 14141 }, { "epoch": 0.8485030299394012, "grad_norm": 1.2670131921768188, "learning_rate": 4.129750369359224e-07, "loss": 0.3601, "step": 14142 }, { "epoch": 0.8485630287394252, "grad_norm": 1.4037744998931885, "learning_rate": 4.126545761746646e-07, "loss": 0.4023, "step": 14143 }, { "epoch": 0.8486230275394492, "grad_norm": 1.3435206413269043, "learning_rate": 4.123342320083761e-07, "loss": 0.3773, "step": 14144 }, { "epoch": 0.8486830263394732, "grad_norm": 1.4853113889694214, "learning_rate": 4.1201400444915644e-07, "loss": 0.4239, "step": 14145 }, { "epoch": 0.8487430251394972, "grad_norm": 1.3279165029525757, "learning_rate": 4.1169389350909967e-07, "loss": 0.3682, "step": 14146 }, { "epoch": 0.8488030239395212, "grad_norm": 1.3220192193984985, "learning_rate": 4.1137389920029414e-07, "loss": 0.3573, "step": 14147 }, { "epoch": 0.8488630227395452, "grad_norm": 1.3557655811309814, "learning_rate": 4.110540215348246e-07, "loss": 0.3751, "step": 14148 }, { "epoch": 0.8489230215395692, "grad_norm": 1.3344473838806152, "learning_rate": 4.107342605247708e-07, "loss": 0.3475, "step": 14149 }, { "epoch": 0.8489830203395932, "grad_norm": 1.4345160722732544, "learning_rate": 4.1041461618221007e-07, "loss": 0.4078, "step": 14150 }, { "epoch": 0.8490430191396172, "grad_norm": 1.3441710472106934, "learning_rate": 4.1009508851921327e-07, "loss": 0.3616, "step": 14151 }, { "epoch": 0.8491030179396412, "grad_norm": 1.3758618831634521, "learning_rate": 4.097756775478465e-07, "loss": 0.3698, "step": 14152 }, { "epoch": 0.8491630167396652, "grad_norm": 1.1902977228164673, "learning_rate": 4.094563832801743e-07, "loss": 0.3511, "step": 14153 }, { "epoch": 0.8492230155396892, "grad_norm": 1.4351097345352173, "learning_rate": 4.091372057282539e-07, "loss": 0.3956, "step": 14154 }, { "epoch": 0.8492830143397132, "grad_norm": 1.2885160446166992, "learning_rate": 4.0881814490413935e-07, "loss": 0.3062, "step": 14155 }, { "epoch": 0.8493430131397373, "grad_norm": 1.3576858043670654, "learning_rate": 4.0849920081987934e-07, "loss": 0.3811, "step": 14156 }, { "epoch": 0.8494030119397612, "grad_norm": 1.4449467658996582, "learning_rate": 4.0818037348751973e-07, "loss": 0.4069, "step": 14157 }, { "epoch": 0.8494630107397853, "grad_norm": 1.2838164567947388, "learning_rate": 4.078616629191026e-07, "loss": 0.3596, "step": 14158 }, { "epoch": 0.8495230095398092, "grad_norm": 1.374880075454712, "learning_rate": 4.07543069126661e-07, "loss": 0.3545, "step": 14159 }, { "epoch": 0.8495830083398332, "grad_norm": 1.4726277589797974, "learning_rate": 4.0722459212222923e-07, "loss": 0.3804, "step": 14160 }, { "epoch": 0.8496430071398572, "grad_norm": 1.4639651775360107, "learning_rate": 4.069062319178334e-07, "loss": 0.3652, "step": 14161 }, { "epoch": 0.8497030059398812, "grad_norm": 1.4049488306045532, "learning_rate": 4.0658798852549797e-07, "loss": 0.3642, "step": 14162 }, { "epoch": 0.8497630047399052, "grad_norm": 1.454708218574524, "learning_rate": 4.062698619572402e-07, "loss": 0.3858, "step": 14163 }, { "epoch": 0.8498230035399292, "grad_norm": 1.3955801725387573, "learning_rate": 4.0595185222507455e-07, "loss": 0.4084, "step": 14164 }, { "epoch": 0.8498830023399532, "grad_norm": 1.5087943077087402, "learning_rate": 4.056339593410115e-07, "loss": 0.3984, "step": 14165 }, { "epoch": 0.8499430011399772, "grad_norm": 1.323291301727295, "learning_rate": 4.0531618331705587e-07, "loss": 0.372, "step": 14166 }, { "epoch": 0.8500029999400012, "grad_norm": 1.3416111469268799, "learning_rate": 4.049985241652084e-07, "loss": 0.4157, "step": 14167 }, { "epoch": 0.8500629987400252, "grad_norm": 1.4451948404312134, "learning_rate": 4.0468098189746535e-07, "loss": 0.3382, "step": 14168 }, { "epoch": 0.8501229975400492, "grad_norm": 1.3694884777069092, "learning_rate": 4.043635565258202e-07, "loss": 0.3307, "step": 14169 }, { "epoch": 0.8501829963400732, "grad_norm": 1.2952256202697754, "learning_rate": 4.0404624806226006e-07, "loss": 0.3935, "step": 14170 }, { "epoch": 0.8502429951400972, "grad_norm": 1.369219422340393, "learning_rate": 4.037290565187669e-07, "loss": 0.3822, "step": 14171 }, { "epoch": 0.8503029939401212, "grad_norm": 1.4211472272872925, "learning_rate": 4.0341198190732165e-07, "loss": 0.3811, "step": 14172 }, { "epoch": 0.8503629927401452, "grad_norm": 1.4151856899261475, "learning_rate": 4.030950242398976e-07, "loss": 0.3466, "step": 14173 }, { "epoch": 0.8504229915401692, "grad_norm": 1.3473151922225952, "learning_rate": 4.027781835284644e-07, "loss": 0.4101, "step": 14174 }, { "epoch": 0.8504829903401931, "grad_norm": 1.3271182775497437, "learning_rate": 4.024614597849889e-07, "loss": 0.3674, "step": 14175 }, { "epoch": 0.8505429891402172, "grad_norm": 1.210867166519165, "learning_rate": 4.0214485302143105e-07, "loss": 0.3417, "step": 14176 }, { "epoch": 0.8506029879402411, "grad_norm": 1.337480902671814, "learning_rate": 4.018283632497493e-07, "loss": 0.3922, "step": 14177 }, { "epoch": 0.8506629867402652, "grad_norm": 1.2606347799301147, "learning_rate": 4.015119904818948e-07, "loss": 0.37, "step": 14178 }, { "epoch": 0.8507229855402892, "grad_norm": 1.4010323286056519, "learning_rate": 4.0119573472981587e-07, "loss": 0.3476, "step": 14179 }, { "epoch": 0.8507829843403132, "grad_norm": 1.259669303894043, "learning_rate": 4.008795960054548e-07, "loss": 0.3326, "step": 14180 }, { "epoch": 0.8508429831403372, "grad_norm": 1.4361449480056763, "learning_rate": 4.005635743207529e-07, "loss": 0.3959, "step": 14181 }, { "epoch": 0.8509029819403612, "grad_norm": 1.2637805938720703, "learning_rate": 4.002476696876438e-07, "loss": 0.353, "step": 14182 }, { "epoch": 0.8509629807403852, "grad_norm": 1.379593014717102, "learning_rate": 3.9993188211805723e-07, "loss": 0.4019, "step": 14183 }, { "epoch": 0.8510229795404092, "grad_norm": 1.3709139823913574, "learning_rate": 3.9961621162391986e-07, "loss": 0.3854, "step": 14184 }, { "epoch": 0.8510829783404332, "grad_norm": 1.2343426942825317, "learning_rate": 3.993006582171532e-07, "loss": 0.3882, "step": 14185 }, { "epoch": 0.8511429771404572, "grad_norm": 1.4384028911590576, "learning_rate": 3.989852219096731e-07, "loss": 0.3574, "step": 14186 }, { "epoch": 0.8512029759404812, "grad_norm": 1.4719117879867554, "learning_rate": 3.9866990271339357e-07, "loss": 0.3493, "step": 14187 }, { "epoch": 0.8512629747405052, "grad_norm": 1.4833811521530151, "learning_rate": 3.98354700640222e-07, "loss": 0.4007, "step": 14188 }, { "epoch": 0.8513229735405292, "grad_norm": 1.3814595937728882, "learning_rate": 3.980396157020631e-07, "loss": 0.3755, "step": 14189 }, { "epoch": 0.8513829723405532, "grad_norm": 1.546352744102478, "learning_rate": 3.9772464791081454e-07, "loss": 0.3766, "step": 14190 }, { "epoch": 0.8514429711405772, "grad_norm": 1.4846060276031494, "learning_rate": 3.974097972783724e-07, "loss": 0.3726, "step": 14191 }, { "epoch": 0.8515029699406012, "grad_norm": 1.4786560535430908, "learning_rate": 3.970950638166262e-07, "loss": 0.385, "step": 14192 }, { "epoch": 0.8515629687406252, "grad_norm": 1.3724946975708008, "learning_rate": 3.967804475374633e-07, "loss": 0.3347, "step": 14193 }, { "epoch": 0.8516229675406491, "grad_norm": 1.4065614938735962, "learning_rate": 3.964659484527647e-07, "loss": 0.4044, "step": 14194 }, { "epoch": 0.8516829663406732, "grad_norm": 1.3565033674240112, "learning_rate": 3.961515665744071e-07, "loss": 0.3663, "step": 14195 }, { "epoch": 0.8517429651406971, "grad_norm": 1.4529361724853516, "learning_rate": 3.9583730191426416e-07, "loss": 0.3803, "step": 14196 }, { "epoch": 0.8518029639407212, "grad_norm": 1.2909640073776245, "learning_rate": 3.955231544842042e-07, "loss": 0.3412, "step": 14197 }, { "epoch": 0.8518629627407451, "grad_norm": 1.2793188095092773, "learning_rate": 3.952091242960894e-07, "loss": 0.3699, "step": 14198 }, { "epoch": 0.8519229615407692, "grad_norm": 1.2068723440170288, "learning_rate": 3.94895211361782e-07, "loss": 0.3509, "step": 14199 }, { "epoch": 0.8519829603407932, "grad_norm": 1.3041247129440308, "learning_rate": 3.945814156931349e-07, "loss": 0.3629, "step": 14200 }, { "epoch": 0.8520429591408172, "grad_norm": 1.3169108629226685, "learning_rate": 3.94267737302e-07, "loss": 0.3498, "step": 14201 }, { "epoch": 0.8521029579408412, "grad_norm": 1.2211415767669678, "learning_rate": 3.9395417620022214e-07, "loss": 0.3491, "step": 14202 }, { "epoch": 0.8521629567408652, "grad_norm": 1.3842674493789673, "learning_rate": 3.9364073239964373e-07, "loss": 0.3754, "step": 14203 }, { "epoch": 0.8522229555408892, "grad_norm": 1.4995051622390747, "learning_rate": 3.93327405912104e-07, "loss": 0.399, "step": 14204 }, { "epoch": 0.8522829543409132, "grad_norm": 1.2828809022903442, "learning_rate": 3.9301419674943265e-07, "loss": 0.3713, "step": 14205 }, { "epoch": 0.8523429531409372, "grad_norm": 1.4030874967575073, "learning_rate": 3.927011049234605e-07, "loss": 0.3454, "step": 14206 }, { "epoch": 0.8524029519409612, "grad_norm": 1.3380355834960938, "learning_rate": 3.9238813044600994e-07, "loss": 0.3178, "step": 14207 }, { "epoch": 0.8524629507409852, "grad_norm": 1.1733967065811157, "learning_rate": 3.920752733289031e-07, "loss": 0.331, "step": 14208 }, { "epoch": 0.8525229495410092, "grad_norm": 1.3024964332580566, "learning_rate": 3.91762533583952e-07, "loss": 0.3532, "step": 14209 }, { "epoch": 0.8525829483410332, "grad_norm": 1.2691177129745483, "learning_rate": 3.91449911222969e-07, "loss": 0.3851, "step": 14210 }, { "epoch": 0.8526429471410572, "grad_norm": 1.294112205505371, "learning_rate": 3.9113740625776066e-07, "loss": 0.372, "step": 14211 }, { "epoch": 0.8527029459410812, "grad_norm": 1.3642816543579102, "learning_rate": 3.9082501870012884e-07, "loss": 0.3677, "step": 14212 }, { "epoch": 0.8527629447411051, "grad_norm": 1.3724206686019897, "learning_rate": 3.905127485618708e-07, "loss": 0.3775, "step": 14213 }, { "epoch": 0.8528229435411292, "grad_norm": 1.3312065601348877, "learning_rate": 3.902005958547784e-07, "loss": 0.382, "step": 14214 }, { "epoch": 0.8528829423411531, "grad_norm": 1.333289623260498, "learning_rate": 3.8988856059064213e-07, "loss": 0.3208, "step": 14215 }, { "epoch": 0.8529429411411772, "grad_norm": 1.3247419595718384, "learning_rate": 3.895766427812453e-07, "loss": 0.4235, "step": 14216 }, { "epoch": 0.8530029399412011, "grad_norm": 1.4709150791168213, "learning_rate": 3.8926484243836726e-07, "loss": 0.3921, "step": 14217 }, { "epoch": 0.8530629387412252, "grad_norm": 1.2997301816940308, "learning_rate": 3.889531595737839e-07, "loss": 0.3877, "step": 14218 }, { "epoch": 0.8531229375412491, "grad_norm": 1.4887878894805908, "learning_rate": 3.8864159419926596e-07, "loss": 0.3981, "step": 14219 }, { "epoch": 0.8531829363412732, "grad_norm": 1.2872170209884644, "learning_rate": 3.883301463265796e-07, "loss": 0.3713, "step": 14220 }, { "epoch": 0.8532429351412971, "grad_norm": 1.3201899528503418, "learning_rate": 3.8801881596748637e-07, "loss": 0.3585, "step": 14221 }, { "epoch": 0.8533029339413212, "grad_norm": 1.6883429288864136, "learning_rate": 3.877076031337442e-07, "loss": 0.3961, "step": 14222 }, { "epoch": 0.8533629327413452, "grad_norm": 1.2767422199249268, "learning_rate": 3.8739650783710717e-07, "loss": 0.3326, "step": 14223 }, { "epoch": 0.8534229315413692, "grad_norm": 1.3118149042129517, "learning_rate": 3.870855300893228e-07, "loss": 0.3471, "step": 14224 }, { "epoch": 0.8534829303413932, "grad_norm": 1.353139042854309, "learning_rate": 3.8677466990213584e-07, "loss": 0.3844, "step": 14225 }, { "epoch": 0.8535429291414172, "grad_norm": 1.293502688407898, "learning_rate": 3.8646392728728487e-07, "loss": 0.3404, "step": 14226 }, { "epoch": 0.8536029279414412, "grad_norm": 1.3210023641586304, "learning_rate": 3.8615330225650684e-07, "loss": 0.3416, "step": 14227 }, { "epoch": 0.8536629267414652, "grad_norm": 1.313427209854126, "learning_rate": 3.8584279482153185e-07, "loss": 0.3836, "step": 14228 }, { "epoch": 0.8537229255414892, "grad_norm": 1.3103424310684204, "learning_rate": 3.855324049940855e-07, "loss": 0.3738, "step": 14229 }, { "epoch": 0.8537829243415131, "grad_norm": 1.363951563835144, "learning_rate": 3.852221327858921e-07, "loss": 0.3368, "step": 14230 }, { "epoch": 0.8538429231415372, "grad_norm": 1.3662153482437134, "learning_rate": 3.8491197820866725e-07, "loss": 0.3745, "step": 14231 }, { "epoch": 0.8539029219415611, "grad_norm": 1.4284600019454956, "learning_rate": 3.8460194127412494e-07, "loss": 0.4199, "step": 14232 }, { "epoch": 0.8539629207415852, "grad_norm": 1.4438061714172363, "learning_rate": 3.842920219939731e-07, "loss": 0.3674, "step": 14233 }, { "epoch": 0.8540229195416091, "grad_norm": 1.4135152101516724, "learning_rate": 3.8398222037991607e-07, "loss": 0.3838, "step": 14234 }, { "epoch": 0.8540829183416332, "grad_norm": 1.447409749031067, "learning_rate": 3.836725364436555e-07, "loss": 0.3533, "step": 14235 }, { "epoch": 0.8541429171416571, "grad_norm": 1.3722172975540161, "learning_rate": 3.8336297019688406e-07, "loss": 0.4087, "step": 14236 }, { "epoch": 0.8542029159416812, "grad_norm": 1.3152788877487183, "learning_rate": 3.8305352165129453e-07, "loss": 0.3335, "step": 14237 }, { "epoch": 0.8542629147417051, "grad_norm": 1.3457175493240356, "learning_rate": 3.827441908185721e-07, "loss": 0.3709, "step": 14238 }, { "epoch": 0.8543229135417292, "grad_norm": 1.329890251159668, "learning_rate": 3.8243497771039993e-07, "loss": 0.4367, "step": 14239 }, { "epoch": 0.8543829123417531, "grad_norm": 1.3009990453720093, "learning_rate": 3.8212588233845503e-07, "loss": 0.3988, "step": 14240 }, { "epoch": 0.8544429111417772, "grad_norm": 1.3432528972625732, "learning_rate": 3.8181690471440993e-07, "loss": 0.4097, "step": 14241 }, { "epoch": 0.8545029099418011, "grad_norm": 1.2936228513717651, "learning_rate": 3.815080448499348e-07, "loss": 0.3979, "step": 14242 }, { "epoch": 0.8545629087418252, "grad_norm": 1.3936035633087158, "learning_rate": 3.8119930275669287e-07, "loss": 0.3949, "step": 14243 }, { "epoch": 0.8546229075418491, "grad_norm": 1.3104811906814575, "learning_rate": 3.80890678446343e-07, "loss": 0.3958, "step": 14244 }, { "epoch": 0.8546829063418732, "grad_norm": 1.3647769689559937, "learning_rate": 3.805821719305429e-07, "loss": 0.3845, "step": 14245 }, { "epoch": 0.8547429051418972, "grad_norm": 1.3472793102264404, "learning_rate": 3.8027378322094173e-07, "loss": 0.3523, "step": 14246 }, { "epoch": 0.8548029039419212, "grad_norm": 1.394262433052063, "learning_rate": 3.799655123291864e-07, "loss": 0.4025, "step": 14247 }, { "epoch": 0.8548629027419452, "grad_norm": 1.2600377798080444, "learning_rate": 3.796573592669183e-07, "loss": 0.3818, "step": 14248 }, { "epoch": 0.8549229015419691, "grad_norm": 1.2489418983459473, "learning_rate": 3.793493240457761e-07, "loss": 0.3923, "step": 14249 }, { "epoch": 0.8549829003419932, "grad_norm": 1.3483502864837646, "learning_rate": 3.790414066773919e-07, "loss": 0.3811, "step": 14250 }, { "epoch": 0.8550428991420171, "grad_norm": 1.3962881565093994, "learning_rate": 3.7873360717339464e-07, "loss": 0.4059, "step": 14251 }, { "epoch": 0.8551028979420412, "grad_norm": 1.2710731029510498, "learning_rate": 3.784259255454087e-07, "loss": 0.352, "step": 14252 }, { "epoch": 0.8551628967420651, "grad_norm": 1.3077013492584229, "learning_rate": 3.781183618050533e-07, "loss": 0.3774, "step": 14253 }, { "epoch": 0.8552228955420892, "grad_norm": 1.4257092475891113, "learning_rate": 3.778109159639458e-07, "loss": 0.3755, "step": 14254 }, { "epoch": 0.8552828943421131, "grad_norm": 1.3566759824752808, "learning_rate": 3.775035880336933e-07, "loss": 0.363, "step": 14255 }, { "epoch": 0.8553428931421372, "grad_norm": 1.4328361749649048, "learning_rate": 3.771963780259047e-07, "loss": 0.3706, "step": 14256 }, { "epoch": 0.8554028919421611, "grad_norm": 1.3225901126861572, "learning_rate": 3.768892859521821e-07, "loss": 0.3597, "step": 14257 }, { "epoch": 0.8554628907421852, "grad_norm": 1.2753934860229492, "learning_rate": 3.7658231182412226e-07, "loss": 0.3439, "step": 14258 }, { "epoch": 0.8555228895422091, "grad_norm": 1.4006352424621582, "learning_rate": 3.76275455653318e-07, "loss": 0.3479, "step": 14259 }, { "epoch": 0.8555828883422332, "grad_norm": 1.2766857147216797, "learning_rate": 3.7596871745135787e-07, "loss": 0.3615, "step": 14260 }, { "epoch": 0.8556428871422571, "grad_norm": 1.4270397424697876, "learning_rate": 3.756620972298265e-07, "loss": 0.3904, "step": 14261 }, { "epoch": 0.8557028859422812, "grad_norm": 1.375567078590393, "learning_rate": 3.753555950003036e-07, "loss": 0.38, "step": 14262 }, { "epoch": 0.8557628847423051, "grad_norm": 1.2919337749481201, "learning_rate": 3.7504921077436335e-07, "loss": 0.3453, "step": 14263 }, { "epoch": 0.8558228835423292, "grad_norm": 1.359184741973877, "learning_rate": 3.7474294456357757e-07, "loss": 0.4125, "step": 14264 }, { "epoch": 0.8558828823423531, "grad_norm": 1.2876296043395996, "learning_rate": 3.744367963795123e-07, "loss": 0.377, "step": 14265 }, { "epoch": 0.8559428811423772, "grad_norm": 1.3720654249191284, "learning_rate": 3.741307662337293e-07, "loss": 0.3475, "step": 14266 }, { "epoch": 0.8560028799424012, "grad_norm": 1.5264042615890503, "learning_rate": 3.7382485413778486e-07, "loss": 0.3893, "step": 14267 }, { "epoch": 0.8560628787424251, "grad_norm": 1.2724186182022095, "learning_rate": 3.73519060103233e-07, "loss": 0.3654, "step": 14268 }, { "epoch": 0.8561228775424492, "grad_norm": 1.3557265996932983, "learning_rate": 3.7321338414162294e-07, "loss": 0.4151, "step": 14269 }, { "epoch": 0.8561828763424731, "grad_norm": 1.3088867664337158, "learning_rate": 3.7290782626449755e-07, "loss": 0.3853, "step": 14270 }, { "epoch": 0.8562428751424972, "grad_norm": 1.3727569580078125, "learning_rate": 3.7260238648339626e-07, "loss": 0.3675, "step": 14271 }, { "epoch": 0.8563028739425211, "grad_norm": 1.403369665145874, "learning_rate": 3.7229706480985414e-07, "loss": 0.3549, "step": 14272 }, { "epoch": 0.8563628727425452, "grad_norm": 1.3216115236282349, "learning_rate": 3.7199186125540294e-07, "loss": 0.4209, "step": 14273 }, { "epoch": 0.8564228715425691, "grad_norm": 1.1947815418243408, "learning_rate": 3.7168677583156757e-07, "loss": 0.3154, "step": 14274 }, { "epoch": 0.8564828703425932, "grad_norm": 1.490080714225769, "learning_rate": 3.713818085498697e-07, "loss": 0.3897, "step": 14275 }, { "epoch": 0.8565428691426171, "grad_norm": 1.283951997756958, "learning_rate": 3.7107695942182753e-07, "loss": 0.3853, "step": 14276 }, { "epoch": 0.8566028679426412, "grad_norm": 1.3458501100540161, "learning_rate": 3.7077222845895317e-07, "loss": 0.3983, "step": 14277 }, { "epoch": 0.8566628667426651, "grad_norm": 1.2585307359695435, "learning_rate": 3.704676156727555e-07, "loss": 0.3206, "step": 14278 }, { "epoch": 0.8567228655426892, "grad_norm": 1.3536908626556396, "learning_rate": 3.7016312107473675e-07, "loss": 0.3987, "step": 14279 }, { "epoch": 0.8567828643427131, "grad_norm": 1.4135496616363525, "learning_rate": 3.698587446763973e-07, "loss": 0.3977, "step": 14280 }, { "epoch": 0.8568428631427372, "grad_norm": 1.5702400207519531, "learning_rate": 3.6955448648923373e-07, "loss": 0.3933, "step": 14281 }, { "epoch": 0.8569028619427611, "grad_norm": 1.181715726852417, "learning_rate": 3.692503465247338e-07, "loss": 0.3482, "step": 14282 }, { "epoch": 0.8569628607427852, "grad_norm": 1.3626298904418945, "learning_rate": 3.689463247943852e-07, "loss": 0.3895, "step": 14283 }, { "epoch": 0.8570228595428091, "grad_norm": 1.4216705560684204, "learning_rate": 3.686424213096679e-07, "loss": 0.357, "step": 14284 }, { "epoch": 0.8570828583428332, "grad_norm": 1.3254987001419067, "learning_rate": 3.683386360820606e-07, "loss": 0.382, "step": 14285 }, { "epoch": 0.8571428571428571, "grad_norm": 1.3372151851654053, "learning_rate": 3.6803496912303537e-07, "loss": 0.3319, "step": 14286 }, { "epoch": 0.8572028559428811, "grad_norm": 1.295183539390564, "learning_rate": 3.677314204440594e-07, "loss": 0.3869, "step": 14287 }, { "epoch": 0.8572628547429051, "grad_norm": 1.4105793237686157, "learning_rate": 3.6742799005659807e-07, "loss": 0.3819, "step": 14288 }, { "epoch": 0.8573228535429291, "grad_norm": 1.3326421976089478, "learning_rate": 3.6712467797210953e-07, "loss": 0.3233, "step": 14289 }, { "epoch": 0.8573828523429532, "grad_norm": 1.3356388807296753, "learning_rate": 3.6682148420204815e-07, "loss": 0.3303, "step": 14290 }, { "epoch": 0.8574428511429771, "grad_norm": 1.5275068283081055, "learning_rate": 3.6651840875786417e-07, "loss": 0.4211, "step": 14291 }, { "epoch": 0.8575028499430012, "grad_norm": 1.3293126821517944, "learning_rate": 3.662154516510044e-07, "loss": 0.3519, "step": 14292 }, { "epoch": 0.8575628487430251, "grad_norm": 1.2879923582077026, "learning_rate": 3.6591261289290965e-07, "loss": 0.3336, "step": 14293 }, { "epoch": 0.8576228475430492, "grad_norm": 1.4082679748535156, "learning_rate": 3.6560989249501583e-07, "loss": 0.3419, "step": 14294 }, { "epoch": 0.8576828463430731, "grad_norm": 1.7141828536987305, "learning_rate": 3.6530729046875687e-07, "loss": 0.3859, "step": 14295 }, { "epoch": 0.8577428451430972, "grad_norm": 1.4345877170562744, "learning_rate": 3.6500480682556004e-07, "loss": 0.3578, "step": 14296 }, { "epoch": 0.8578028439431211, "grad_norm": 1.3041057586669922, "learning_rate": 3.6470244157684796e-07, "loss": 0.3726, "step": 14297 }, { "epoch": 0.8578628427431452, "grad_norm": 1.3024239540100098, "learning_rate": 3.6440019473404095e-07, "loss": 0.3765, "step": 14298 }, { "epoch": 0.8579228415431691, "grad_norm": 1.3165735006332397, "learning_rate": 3.6409806630855193e-07, "loss": 0.3687, "step": 14299 }, { "epoch": 0.8579828403431932, "grad_norm": 1.3815017938613892, "learning_rate": 3.637960563117931e-07, "loss": 0.3713, "step": 14300 }, { "epoch": 0.8580428391432171, "grad_norm": 1.2859798669815063, "learning_rate": 3.6349416475516767e-07, "loss": 0.3502, "step": 14301 }, { "epoch": 0.8581028379432412, "grad_norm": 1.4061346054077148, "learning_rate": 3.6319239165007836e-07, "loss": 0.3615, "step": 14302 }, { "epoch": 0.8581628367432651, "grad_norm": 1.1786881685256958, "learning_rate": 3.6289073700792036e-07, "loss": 0.3224, "step": 14303 }, { "epoch": 0.8582228355432892, "grad_norm": 1.2515747547149658, "learning_rate": 3.6258920084008747e-07, "loss": 0.3568, "step": 14304 }, { "epoch": 0.8582828343433131, "grad_norm": 1.298451542854309, "learning_rate": 3.6228778315796614e-07, "loss": 0.3854, "step": 14305 }, { "epoch": 0.8583428331433371, "grad_norm": 1.2836577892303467, "learning_rate": 3.6198648397293955e-07, "loss": 0.363, "step": 14306 }, { "epoch": 0.8584028319433611, "grad_norm": 1.453789472579956, "learning_rate": 3.6168530329638706e-07, "loss": 0.4, "step": 14307 }, { "epoch": 0.8584628307433851, "grad_norm": 1.373976230621338, "learning_rate": 3.613842411396828e-07, "loss": 0.354, "step": 14308 }, { "epoch": 0.8585228295434091, "grad_norm": 1.3842171430587769, "learning_rate": 3.610832975141953e-07, "loss": 0.3876, "step": 14309 }, { "epoch": 0.8585828283434331, "grad_norm": 1.3634189367294312, "learning_rate": 3.607824724312918e-07, "loss": 0.3835, "step": 14310 }, { "epoch": 0.8586428271434571, "grad_norm": 1.260818362236023, "learning_rate": 3.6048176590233113e-07, "loss": 0.3344, "step": 14311 }, { "epoch": 0.8587028259434811, "grad_norm": 1.3385534286499023, "learning_rate": 3.6018117793867214e-07, "loss": 0.3862, "step": 14312 }, { "epoch": 0.8587628247435052, "grad_norm": 1.3501816987991333, "learning_rate": 3.598807085516634e-07, "loss": 0.3268, "step": 14313 }, { "epoch": 0.8588228235435291, "grad_norm": 1.4844642877578735, "learning_rate": 3.595803577526547e-07, "loss": 0.3903, "step": 14314 }, { "epoch": 0.8588828223435532, "grad_norm": 1.4360527992248535, "learning_rate": 3.5928012555298763e-07, "loss": 0.3334, "step": 14315 }, { "epoch": 0.8589428211435771, "grad_norm": 1.3351023197174072, "learning_rate": 3.5898001196400205e-07, "loss": 0.4305, "step": 14316 }, { "epoch": 0.8590028199436012, "grad_norm": 1.3479186296463013, "learning_rate": 3.586800169970307e-07, "loss": 0.3918, "step": 14317 }, { "epoch": 0.8590628187436251, "grad_norm": 1.3375496864318848, "learning_rate": 3.5838014066340233e-07, "loss": 0.3806, "step": 14318 }, { "epoch": 0.8591228175436492, "grad_norm": 1.3235303163528442, "learning_rate": 3.5808038297444396e-07, "loss": 0.3741, "step": 14319 }, { "epoch": 0.8591828163436731, "grad_norm": 1.242396593093872, "learning_rate": 3.577807439414748e-07, "loss": 0.3969, "step": 14320 }, { "epoch": 0.8592428151436972, "grad_norm": 1.3210058212280273, "learning_rate": 3.574812235758103e-07, "loss": 0.35, "step": 14321 }, { "epoch": 0.8593028139437211, "grad_norm": 1.3312619924545288, "learning_rate": 3.571818218887635e-07, "loss": 0.3708, "step": 14322 }, { "epoch": 0.8593628127437452, "grad_norm": 1.4386873245239258, "learning_rate": 3.568825388916405e-07, "loss": 0.3709, "step": 14323 }, { "epoch": 0.8594228115437691, "grad_norm": 1.2635512351989746, "learning_rate": 3.5658337459574426e-07, "loss": 0.3232, "step": 14324 }, { "epoch": 0.8594828103437931, "grad_norm": 1.2385821342468262, "learning_rate": 3.562843290123718e-07, "loss": 0.3568, "step": 14325 }, { "epoch": 0.8595428091438171, "grad_norm": 1.3750849962234497, "learning_rate": 3.5598540215281734e-07, "loss": 0.3739, "step": 14326 }, { "epoch": 0.8596028079438411, "grad_norm": 1.3305171728134155, "learning_rate": 3.556865940283716e-07, "loss": 0.396, "step": 14327 }, { "epoch": 0.8596628067438651, "grad_norm": 1.280708909034729, "learning_rate": 3.5538790465031667e-07, "loss": 0.3641, "step": 14328 }, { "epoch": 0.8597228055438891, "grad_norm": 1.3415658473968506, "learning_rate": 3.550893340299341e-07, "loss": 0.3658, "step": 14329 }, { "epoch": 0.8597828043439131, "grad_norm": 1.3405715227127075, "learning_rate": 3.5479088217849847e-07, "loss": 0.3413, "step": 14330 }, { "epoch": 0.8598428031439371, "grad_norm": 1.330110788345337, "learning_rate": 3.544925491072832e-07, "loss": 0.3287, "step": 14331 }, { "epoch": 0.8599028019439611, "grad_norm": 1.4959381818771362, "learning_rate": 3.5419433482755206e-07, "loss": 0.4002, "step": 14332 }, { "epoch": 0.8599628007439851, "grad_norm": 1.3184990882873535, "learning_rate": 3.5389623935056875e-07, "loss": 0.337, "step": 14333 }, { "epoch": 0.8600227995440091, "grad_norm": 1.3398798704147339, "learning_rate": 3.535982626875915e-07, "loss": 0.409, "step": 14334 }, { "epoch": 0.8600827983440331, "grad_norm": 1.2561109066009521, "learning_rate": 3.5330040484987273e-07, "loss": 0.3758, "step": 14335 }, { "epoch": 0.8601427971440572, "grad_norm": 1.3594551086425781, "learning_rate": 3.5300266584866145e-07, "loss": 0.3357, "step": 14336 }, { "epoch": 0.8602027959440811, "grad_norm": 1.265020728111267, "learning_rate": 3.5270504569520114e-07, "loss": 0.3668, "step": 14337 }, { "epoch": 0.8602627947441052, "grad_norm": 1.2295446395874023, "learning_rate": 3.5240754440073304e-07, "loss": 0.3811, "step": 14338 }, { "epoch": 0.8603227935441291, "grad_norm": 1.3062337636947632, "learning_rate": 3.521101619764912e-07, "loss": 0.3599, "step": 14339 }, { "epoch": 0.8603827923441532, "grad_norm": 1.322034478187561, "learning_rate": 3.518128984337064e-07, "loss": 0.3598, "step": 14340 }, { "epoch": 0.8604427911441771, "grad_norm": 1.2564318180084229, "learning_rate": 3.515157537836062e-07, "loss": 0.357, "step": 14341 }, { "epoch": 0.8605027899442012, "grad_norm": 1.4596858024597168, "learning_rate": 3.512187280374111e-07, "loss": 0.3526, "step": 14342 }, { "epoch": 0.8605627887442251, "grad_norm": 1.4726685285568237, "learning_rate": 3.509218212063391e-07, "loss": 0.3869, "step": 14343 }, { "epoch": 0.8606227875442491, "grad_norm": 1.2659059762954712, "learning_rate": 3.506250333016017e-07, "loss": 0.3272, "step": 14344 }, { "epoch": 0.8606827863442731, "grad_norm": 1.291414499282837, "learning_rate": 3.503283643344087e-07, "loss": 0.3331, "step": 14345 }, { "epoch": 0.8607427851442971, "grad_norm": 1.3180350065231323, "learning_rate": 3.5003181431596476e-07, "loss": 0.3828, "step": 14346 }, { "epoch": 0.8608027839443211, "grad_norm": 1.2080538272857666, "learning_rate": 3.497353832574663e-07, "loss": 0.3561, "step": 14347 }, { "epoch": 0.8608627827443451, "grad_norm": 1.3594731092453003, "learning_rate": 3.4943907117011095e-07, "loss": 0.3673, "step": 14348 }, { "epoch": 0.8609227815443691, "grad_norm": 1.4545410871505737, "learning_rate": 3.4914287806508715e-07, "loss": 0.3576, "step": 14349 }, { "epoch": 0.8609827803443931, "grad_norm": 1.4826991558074951, "learning_rate": 3.4884680395358196e-07, "loss": 0.4209, "step": 14350 }, { "epoch": 0.8610427791444171, "grad_norm": 1.2819428443908691, "learning_rate": 3.4855084884677676e-07, "loss": 0.367, "step": 14351 }, { "epoch": 0.8611027779444411, "grad_norm": 1.3882800340652466, "learning_rate": 3.482550127558471e-07, "loss": 0.3649, "step": 14352 }, { "epoch": 0.8611627767444651, "grad_norm": 1.258177638053894, "learning_rate": 3.479592956919672e-07, "loss": 0.4025, "step": 14353 }, { "epoch": 0.8612227755444891, "grad_norm": 1.3717153072357178, "learning_rate": 3.476636976663041e-07, "loss": 0.3639, "step": 14354 }, { "epoch": 0.8612827743445131, "grad_norm": 1.1842997074127197, "learning_rate": 3.473682186900209e-07, "loss": 0.323, "step": 14355 }, { "epoch": 0.8613427731445371, "grad_norm": 1.4553089141845703, "learning_rate": 3.470728587742759e-07, "loss": 0.3725, "step": 14356 }, { "epoch": 0.8614027719445612, "grad_norm": 1.4493589401245117, "learning_rate": 3.4677761793022447e-07, "loss": 0.3509, "step": 14357 }, { "epoch": 0.8614627707445851, "grad_norm": 1.3028994798660278, "learning_rate": 3.4648249616901803e-07, "loss": 0.3831, "step": 14358 }, { "epoch": 0.8615227695446092, "grad_norm": 1.365770936012268, "learning_rate": 3.4618749350179864e-07, "loss": 0.3809, "step": 14359 }, { "epoch": 0.8615827683446331, "grad_norm": 1.4342772960662842, "learning_rate": 3.458926099397096e-07, "loss": 0.4139, "step": 14360 }, { "epoch": 0.8616427671446572, "grad_norm": 1.2575554847717285, "learning_rate": 3.4559784549388595e-07, "loss": 0.3625, "step": 14361 }, { "epoch": 0.8617027659446811, "grad_norm": 1.1828302145004272, "learning_rate": 3.453032001754608e-07, "loss": 0.3307, "step": 14362 }, { "epoch": 0.8617627647447051, "grad_norm": 1.405005931854248, "learning_rate": 3.4500867399556123e-07, "loss": 0.4144, "step": 14363 }, { "epoch": 0.8618227635447291, "grad_norm": 1.4402061700820923, "learning_rate": 3.447142669653094e-07, "loss": 0.3608, "step": 14364 }, { "epoch": 0.8618827623447531, "grad_norm": 1.2139480113983154, "learning_rate": 3.444199790958248e-07, "loss": 0.3474, "step": 14365 }, { "epoch": 0.8619427611447771, "grad_norm": 1.4571455717086792, "learning_rate": 3.441258103982205e-07, "loss": 0.3976, "step": 14366 }, { "epoch": 0.8620027599448011, "grad_norm": 1.3024824857711792, "learning_rate": 3.4383176088360593e-07, "loss": 0.358, "step": 14367 }, { "epoch": 0.8620627587448251, "grad_norm": 1.4194421768188477, "learning_rate": 3.435378305630866e-07, "loss": 0.3719, "step": 14368 }, { "epoch": 0.8621227575448491, "grad_norm": 1.5798847675323486, "learning_rate": 3.4324401944776307e-07, "loss": 0.3838, "step": 14369 }, { "epoch": 0.8621827563448731, "grad_norm": 1.2822761535644531, "learning_rate": 3.4295032754873034e-07, "loss": 0.3757, "step": 14370 }, { "epoch": 0.8622427551448971, "grad_norm": 1.3155187368392944, "learning_rate": 3.4265675487707964e-07, "loss": 0.3656, "step": 14371 }, { "epoch": 0.8623027539449211, "grad_norm": 1.4382998943328857, "learning_rate": 3.4236330144389943e-07, "loss": 0.3631, "step": 14372 }, { "epoch": 0.8623627527449451, "grad_norm": 1.4973580837249756, "learning_rate": 3.420699672602712e-07, "loss": 0.4058, "step": 14373 }, { "epoch": 0.8624227515449691, "grad_norm": 1.3331241607666016, "learning_rate": 3.417767523372717e-07, "loss": 0.3484, "step": 14374 }, { "epoch": 0.8624827503449931, "grad_norm": 1.3425726890563965, "learning_rate": 3.4148365668597655e-07, "loss": 0.3658, "step": 14375 }, { "epoch": 0.862542749145017, "grad_norm": 1.4065450429916382, "learning_rate": 3.411906803174526e-07, "loss": 0.398, "step": 14376 }, { "epoch": 0.8626027479450411, "grad_norm": 1.1766057014465332, "learning_rate": 3.408978232427664e-07, "loss": 0.362, "step": 14377 }, { "epoch": 0.862662746745065, "grad_norm": 1.478861927986145, "learning_rate": 3.406050854729754e-07, "loss": 0.369, "step": 14378 }, { "epoch": 0.8627227455450891, "grad_norm": 1.4418247938156128, "learning_rate": 3.403124670191362e-07, "loss": 0.3661, "step": 14379 }, { "epoch": 0.8627827443451132, "grad_norm": 1.30242121219635, "learning_rate": 3.4001996789230033e-07, "loss": 0.4338, "step": 14380 }, { "epoch": 0.8628427431451371, "grad_norm": 1.2831875085830688, "learning_rate": 3.397275881035135e-07, "loss": 0.3637, "step": 14381 }, { "epoch": 0.8629027419451611, "grad_norm": 1.2229686975479126, "learning_rate": 3.394353276638172e-07, "loss": 0.3566, "step": 14382 }, { "epoch": 0.8629627407451851, "grad_norm": 1.4289045333862305, "learning_rate": 3.391431865842485e-07, "loss": 0.3977, "step": 14383 }, { "epoch": 0.8630227395452091, "grad_norm": 1.4389299154281616, "learning_rate": 3.388511648758416e-07, "loss": 0.4116, "step": 14384 }, { "epoch": 0.8630827383452331, "grad_norm": 1.3482447862625122, "learning_rate": 3.3855926254962386e-07, "loss": 0.3732, "step": 14385 }, { "epoch": 0.8631427371452571, "grad_norm": 1.4213167428970337, "learning_rate": 3.382674796166189e-07, "loss": 0.3463, "step": 14386 }, { "epoch": 0.8632027359452811, "grad_norm": 1.4056768417358398, "learning_rate": 3.3797581608784677e-07, "loss": 0.405, "step": 14387 }, { "epoch": 0.8632627347453051, "grad_norm": 1.3907074928283691, "learning_rate": 3.376842719743221e-07, "loss": 0.4055, "step": 14388 }, { "epoch": 0.8633227335453291, "grad_norm": 1.3725024461746216, "learning_rate": 3.373928472870549e-07, "loss": 0.4009, "step": 14389 }, { "epoch": 0.8633827323453531, "grad_norm": 1.2877784967422485, "learning_rate": 3.3710154203705076e-07, "loss": 0.3258, "step": 14390 }, { "epoch": 0.8634427311453771, "grad_norm": 1.4388283491134644, "learning_rate": 3.3681035623531087e-07, "loss": 0.3842, "step": 14391 }, { "epoch": 0.8635027299454011, "grad_norm": 1.4306484460830688, "learning_rate": 3.365192898928338e-07, "loss": 0.3703, "step": 14392 }, { "epoch": 0.8635627287454251, "grad_norm": 1.3243608474731445, "learning_rate": 3.36228343020609e-07, "loss": 0.3232, "step": 14393 }, { "epoch": 0.8636227275454491, "grad_norm": 1.4215178489685059, "learning_rate": 3.359375156296262e-07, "loss": 0.3691, "step": 14394 }, { "epoch": 0.863682726345473, "grad_norm": 1.3862502574920654, "learning_rate": 3.356468077308674e-07, "loss": 0.3799, "step": 14395 }, { "epoch": 0.8637427251454971, "grad_norm": 1.5520292520523071, "learning_rate": 3.3535621933531266e-07, "loss": 0.3886, "step": 14396 }, { "epoch": 0.863802723945521, "grad_norm": 1.3674869537353516, "learning_rate": 3.350657504539354e-07, "loss": 0.3586, "step": 14397 }, { "epoch": 0.8638627227455451, "grad_norm": 1.471961259841919, "learning_rate": 3.347754010977045e-07, "loss": 0.3921, "step": 14398 }, { "epoch": 0.863922721545569, "grad_norm": 1.3492122888565063, "learning_rate": 3.3448517127758665e-07, "loss": 0.3855, "step": 14399 }, { "epoch": 0.8639827203455931, "grad_norm": 1.553804636001587, "learning_rate": 3.3419506100454215e-07, "loss": 0.3721, "step": 14400 }, { "epoch": 0.864042719145617, "grad_norm": 1.4669710397720337, "learning_rate": 3.3390507028952657e-07, "loss": 0.3505, "step": 14401 }, { "epoch": 0.8641027179456411, "grad_norm": 1.3467355966567993, "learning_rate": 3.3361519914349124e-07, "loss": 0.3712, "step": 14402 }, { "epoch": 0.8641627167456651, "grad_norm": 1.329657793045044, "learning_rate": 3.333254475773839e-07, "loss": 0.3971, "step": 14403 }, { "epoch": 0.8642227155456891, "grad_norm": 1.239291787147522, "learning_rate": 3.3303581560214813e-07, "loss": 0.3233, "step": 14404 }, { "epoch": 0.8642827143457131, "grad_norm": 1.4208688735961914, "learning_rate": 3.327463032287196e-07, "loss": 0.3666, "step": 14405 }, { "epoch": 0.8643427131457371, "grad_norm": 1.4462100267410278, "learning_rate": 3.3245691046803416e-07, "loss": 0.401, "step": 14406 }, { "epoch": 0.8644027119457611, "grad_norm": 1.3806902170181274, "learning_rate": 3.3216763733101936e-07, "loss": 0.3788, "step": 14407 }, { "epoch": 0.8644627107457851, "grad_norm": 1.3168950080871582, "learning_rate": 3.3187848382860043e-07, "loss": 0.3518, "step": 14408 }, { "epoch": 0.8645227095458091, "grad_norm": 1.2658238410949707, "learning_rate": 3.315894499716976e-07, "loss": 0.3939, "step": 14409 }, { "epoch": 0.8645827083458331, "grad_norm": 1.4560037851333618, "learning_rate": 3.3130053577122524e-07, "loss": 0.3552, "step": 14410 }, { "epoch": 0.8646427071458571, "grad_norm": 1.34342622756958, "learning_rate": 3.3101174123809584e-07, "loss": 0.4094, "step": 14411 }, { "epoch": 0.8647027059458811, "grad_norm": 1.2107809782028198, "learning_rate": 3.3072306638321517e-07, "loss": 0.3899, "step": 14412 }, { "epoch": 0.8647627047459051, "grad_norm": 1.3341408967971802, "learning_rate": 3.3043451121748485e-07, "loss": 0.3539, "step": 14413 }, { "epoch": 0.864822703545929, "grad_norm": 1.421946406364441, "learning_rate": 3.301460757518021e-07, "loss": 0.3788, "step": 14414 }, { "epoch": 0.8648827023459531, "grad_norm": 1.3430887460708618, "learning_rate": 3.2985775999706073e-07, "loss": 0.3765, "step": 14415 }, { "epoch": 0.864942701145977, "grad_norm": 1.3617738485336304, "learning_rate": 3.295695639641487e-07, "loss": 0.3996, "step": 14416 }, { "epoch": 0.8650026999460011, "grad_norm": 1.3397241830825806, "learning_rate": 3.292814876639493e-07, "loss": 0.3638, "step": 14417 }, { "epoch": 0.865062698746025, "grad_norm": 1.3229538202285767, "learning_rate": 3.2899353110734277e-07, "loss": 0.3699, "step": 14418 }, { "epoch": 0.8651226975460491, "grad_norm": 1.3569101095199585, "learning_rate": 3.2870569430520337e-07, "loss": 0.348, "step": 14419 }, { "epoch": 0.865182696346073, "grad_norm": 1.411880373954773, "learning_rate": 3.2841797726840123e-07, "loss": 0.3678, "step": 14420 }, { "epoch": 0.8652426951460971, "grad_norm": 1.3895448446273804, "learning_rate": 3.2813038000780294e-07, "loss": 0.3835, "step": 14421 }, { "epoch": 0.865302693946121, "grad_norm": 1.2587672472000122, "learning_rate": 3.2784290253426846e-07, "loss": 0.3411, "step": 14422 }, { "epoch": 0.8653626927461451, "grad_norm": 1.3650161027908325, "learning_rate": 3.275555448586566e-07, "loss": 0.3308, "step": 14423 }, { "epoch": 0.8654226915461691, "grad_norm": 1.3644598722457886, "learning_rate": 3.272683069918169e-07, "loss": 0.3403, "step": 14424 }, { "epoch": 0.8654826903461931, "grad_norm": 1.3692915439605713, "learning_rate": 3.269811889445988e-07, "loss": 0.313, "step": 14425 }, { "epoch": 0.8655426891462171, "grad_norm": 1.3383301496505737, "learning_rate": 3.2669419072784443e-07, "loss": 0.3848, "step": 14426 }, { "epoch": 0.8656026879462411, "grad_norm": 1.5128579139709473, "learning_rate": 3.2640731235239356e-07, "loss": 0.4008, "step": 14427 }, { "epoch": 0.8656626867462651, "grad_norm": 1.3978521823883057, "learning_rate": 3.261205538290797e-07, "loss": 0.3871, "step": 14428 }, { "epoch": 0.8657226855462891, "grad_norm": 1.3438265323638916, "learning_rate": 3.2583391516873184e-07, "loss": 0.33, "step": 14429 }, { "epoch": 0.8657826843463131, "grad_norm": 1.3130133152008057, "learning_rate": 3.2554739638217606e-07, "loss": 0.3577, "step": 14430 }, { "epoch": 0.865842683146337, "grad_norm": 1.3873692750930786, "learning_rate": 3.252609974802323e-07, "loss": 0.3596, "step": 14431 }, { "epoch": 0.8659026819463611, "grad_norm": 1.3713194131851196, "learning_rate": 3.24974718473716e-07, "loss": 0.3999, "step": 14432 }, { "epoch": 0.865962680746385, "grad_norm": 1.3366931676864624, "learning_rate": 3.246885593734396e-07, "loss": 0.3479, "step": 14433 }, { "epoch": 0.8660226795464091, "grad_norm": 1.3585702180862427, "learning_rate": 3.244025201902099e-07, "loss": 0.3812, "step": 14434 }, { "epoch": 0.866082678346433, "grad_norm": 1.2970638275146484, "learning_rate": 3.241166009348289e-07, "loss": 0.3856, "step": 14435 }, { "epoch": 0.8661426771464571, "grad_norm": 1.4214963912963867, "learning_rate": 3.2383080161809413e-07, "loss": 0.3761, "step": 14436 }, { "epoch": 0.866202675946481, "grad_norm": 1.2858248949050903, "learning_rate": 3.2354512225079983e-07, "loss": 0.3925, "step": 14437 }, { "epoch": 0.8662626747465051, "grad_norm": 1.2914456129074097, "learning_rate": 3.232595628437341e-07, "loss": 0.3834, "step": 14438 }, { "epoch": 0.866322673546529, "grad_norm": 1.2482883930206299, "learning_rate": 3.2297412340768163e-07, "loss": 0.3495, "step": 14439 }, { "epoch": 0.8663826723465531, "grad_norm": 1.2656911611557007, "learning_rate": 3.226888039534222e-07, "loss": 0.3398, "step": 14440 }, { "epoch": 0.866442671146577, "grad_norm": 1.2991541624069214, "learning_rate": 3.224036044917304e-07, "loss": 0.384, "step": 14441 }, { "epoch": 0.8665026699466011, "grad_norm": 1.3341312408447266, "learning_rate": 3.2211852503337794e-07, "loss": 0.3355, "step": 14442 }, { "epoch": 0.866562668746625, "grad_norm": 1.323298692703247, "learning_rate": 3.2183356558913045e-07, "loss": 0.368, "step": 14443 }, { "epoch": 0.8666226675466491, "grad_norm": 1.3554142713546753, "learning_rate": 3.2154872616974837e-07, "loss": 0.3925, "step": 14444 }, { "epoch": 0.866682666346673, "grad_norm": 1.5244102478027344, "learning_rate": 3.212640067859911e-07, "loss": 0.3734, "step": 14445 }, { "epoch": 0.8667426651466971, "grad_norm": 1.7045000791549683, "learning_rate": 3.2097940744860974e-07, "loss": 0.4228, "step": 14446 }, { "epoch": 0.8668026639467211, "grad_norm": 1.2851773500442505, "learning_rate": 3.206949281683528e-07, "loss": 0.4068, "step": 14447 }, { "epoch": 0.8668626627467451, "grad_norm": 1.3766621351242065, "learning_rate": 3.2041056895596254e-07, "loss": 0.3649, "step": 14448 }, { "epoch": 0.8669226615467691, "grad_norm": 1.3893243074417114, "learning_rate": 3.201263298221792e-07, "loss": 0.3532, "step": 14449 }, { "epoch": 0.866982660346793, "grad_norm": 1.4822922945022583, "learning_rate": 3.1984221077773804e-07, "loss": 0.4001, "step": 14450 }, { "epoch": 0.8670426591468171, "grad_norm": 1.3637949228286743, "learning_rate": 3.19558211833367e-07, "loss": 0.356, "step": 14451 }, { "epoch": 0.867102657946841, "grad_norm": 1.2601267099380493, "learning_rate": 3.1927433299979223e-07, "loss": 0.3572, "step": 14452 }, { "epoch": 0.8671626567468651, "grad_norm": 1.4972689151763916, "learning_rate": 3.189905742877344e-07, "loss": 0.3921, "step": 14453 }, { "epoch": 0.867222655546889, "grad_norm": 1.4205435514450073, "learning_rate": 3.187069357079111e-07, "loss": 0.4121, "step": 14454 }, { "epoch": 0.8672826543469131, "grad_norm": 1.4540367126464844, "learning_rate": 3.1842341727103124e-07, "loss": 0.3944, "step": 14455 }, { "epoch": 0.867342653146937, "grad_norm": 1.3949158191680908, "learning_rate": 3.1814001898780403e-07, "loss": 0.4034, "step": 14456 }, { "epoch": 0.8674026519469611, "grad_norm": 1.4909353256225586, "learning_rate": 3.1785674086893245e-07, "loss": 0.3668, "step": 14457 }, { "epoch": 0.867462650746985, "grad_norm": 1.2247394323349, "learning_rate": 3.175735829251137e-07, "loss": 0.3437, "step": 14458 }, { "epoch": 0.8675226495470091, "grad_norm": 1.3501582145690918, "learning_rate": 3.1729054516704175e-07, "loss": 0.3658, "step": 14459 }, { "epoch": 0.867582648347033, "grad_norm": 1.2115567922592163, "learning_rate": 3.170076276054053e-07, "loss": 0.3525, "step": 14460 }, { "epoch": 0.8676426471470571, "grad_norm": 1.2516536712646484, "learning_rate": 3.1672483025088903e-07, "loss": 0.411, "step": 14461 }, { "epoch": 0.867702645947081, "grad_norm": 1.1897532939910889, "learning_rate": 3.1644215311417345e-07, "loss": 0.3914, "step": 14462 }, { "epoch": 0.8677626447471051, "grad_norm": 1.516841173171997, "learning_rate": 3.161595962059328e-07, "loss": 0.3862, "step": 14463 }, { "epoch": 0.867822643547129, "grad_norm": 1.3409757614135742, "learning_rate": 3.1587715953683885e-07, "loss": 0.4299, "step": 14464 }, { "epoch": 0.8678826423471531, "grad_norm": 1.378804326057434, "learning_rate": 3.1559484311755825e-07, "loss": 0.388, "step": 14465 }, { "epoch": 0.867942641147177, "grad_norm": 1.3089205026626587, "learning_rate": 3.153126469587523e-07, "loss": 0.3772, "step": 14466 }, { "epoch": 0.8680026399472011, "grad_norm": 1.3935292959213257, "learning_rate": 3.1503057107107744e-07, "loss": 0.3483, "step": 14467 }, { "epoch": 0.868062638747225, "grad_norm": 1.2065646648406982, "learning_rate": 3.147486154651873e-07, "loss": 0.3069, "step": 14468 }, { "epoch": 0.868122637547249, "grad_norm": 1.4052915573120117, "learning_rate": 3.144667801517312e-07, "loss": 0.3877, "step": 14469 }, { "epoch": 0.8681826363472731, "grad_norm": 1.4196860790252686, "learning_rate": 3.1418506514135035e-07, "loss": 0.335, "step": 14470 }, { "epoch": 0.868242635147297, "grad_norm": 1.4060684442520142, "learning_rate": 3.1390347044468564e-07, "loss": 0.3688, "step": 14471 }, { "epoch": 0.8683026339473211, "grad_norm": 1.420332908630371, "learning_rate": 3.136219960723701e-07, "loss": 0.4112, "step": 14472 }, { "epoch": 0.868362632747345, "grad_norm": 1.200270175933838, "learning_rate": 3.133406420350359e-07, "loss": 0.301, "step": 14473 }, { "epoch": 0.8684226315473691, "grad_norm": 1.4237860441207886, "learning_rate": 3.130594083433069e-07, "loss": 0.3844, "step": 14474 }, { "epoch": 0.868482630347393, "grad_norm": 1.3694919347763062, "learning_rate": 3.127782950078037e-07, "loss": 0.3314, "step": 14475 }, { "epoch": 0.8685426291474171, "grad_norm": 1.4135456085205078, "learning_rate": 3.124973020391441e-07, "loss": 0.3959, "step": 14476 }, { "epoch": 0.868602627947441, "grad_norm": 1.3555883169174194, "learning_rate": 3.1221642944793907e-07, "loss": 0.3852, "step": 14477 }, { "epoch": 0.8686626267474651, "grad_norm": 1.195759892463684, "learning_rate": 3.1193567724479566e-07, "loss": 0.3317, "step": 14478 }, { "epoch": 0.868722625547489, "grad_norm": 1.3957946300506592, "learning_rate": 3.116550454403173e-07, "loss": 0.3728, "step": 14479 }, { "epoch": 0.8687826243475131, "grad_norm": 1.2627922296524048, "learning_rate": 3.113745340451021e-07, "loss": 0.3576, "step": 14480 }, { "epoch": 0.868842623147537, "grad_norm": 1.4106640815734863, "learning_rate": 3.1109414306974317e-07, "loss": 0.3567, "step": 14481 }, { "epoch": 0.8689026219475611, "grad_norm": 1.2886308431625366, "learning_rate": 3.108138725248291e-07, "loss": 0.3538, "step": 14482 }, { "epoch": 0.868962620747585, "grad_norm": 1.3817089796066284, "learning_rate": 3.1053372242094606e-07, "loss": 0.3933, "step": 14483 }, { "epoch": 0.8690226195476091, "grad_norm": 1.4113649129867554, "learning_rate": 3.1025369276867225e-07, "loss": 0.3441, "step": 14484 }, { "epoch": 0.869082618347633, "grad_norm": 1.3044475317001343, "learning_rate": 3.099737835785848e-07, "loss": 0.3612, "step": 14485 }, { "epoch": 0.8691426171476571, "grad_norm": 1.3378931283950806, "learning_rate": 3.096939948612538e-07, "loss": 0.3475, "step": 14486 }, { "epoch": 0.869202615947681, "grad_norm": 1.3279575109481812, "learning_rate": 3.09414326627245e-07, "loss": 0.3575, "step": 14487 }, { "epoch": 0.869262614747705, "grad_norm": 1.3462748527526855, "learning_rate": 3.0913477888712157e-07, "loss": 0.3841, "step": 14488 }, { "epoch": 0.869322613547729, "grad_norm": 1.234288215637207, "learning_rate": 3.088553516514397e-07, "loss": 0.3648, "step": 14489 }, { "epoch": 0.869382612347753, "grad_norm": 1.407644510269165, "learning_rate": 3.0857604493075185e-07, "loss": 0.416, "step": 14490 }, { "epoch": 0.869442611147777, "grad_norm": 1.2577950954437256, "learning_rate": 3.0829685873560763e-07, "loss": 0.3615, "step": 14491 }, { "epoch": 0.869502609947801, "grad_norm": 1.5540657043457031, "learning_rate": 3.0801779307654914e-07, "loss": 0.3904, "step": 14492 }, { "epoch": 0.8695626087478251, "grad_norm": 1.4171812534332275, "learning_rate": 3.0773884796411654e-07, "loss": 0.3772, "step": 14493 }, { "epoch": 0.869622607547849, "grad_norm": 1.4688807725906372, "learning_rate": 3.0746002340884264e-07, "loss": 0.388, "step": 14494 }, { "epoch": 0.8696826063478731, "grad_norm": 1.436029314994812, "learning_rate": 3.071813194212595e-07, "loss": 0.399, "step": 14495 }, { "epoch": 0.869742605147897, "grad_norm": 1.3879412412643433, "learning_rate": 3.069027360118913e-07, "loss": 0.3718, "step": 14496 }, { "epoch": 0.8698026039479211, "grad_norm": 1.3926557302474976, "learning_rate": 3.066242731912584e-07, "loss": 0.3647, "step": 14497 }, { "epoch": 0.869862602747945, "grad_norm": 1.2568556070327759, "learning_rate": 3.0634593096987813e-07, "loss": 0.337, "step": 14498 }, { "epoch": 0.8699226015479691, "grad_norm": 1.2942050695419312, "learning_rate": 3.060677093582614e-07, "loss": 0.3807, "step": 14499 }, { "epoch": 0.869982600347993, "grad_norm": 1.396436333656311, "learning_rate": 3.057896083669171e-07, "loss": 0.383, "step": 14500 }, { "epoch": 0.8700425991480171, "grad_norm": 1.2750866413116455, "learning_rate": 3.0551162800634525e-07, "loss": 0.384, "step": 14501 }, { "epoch": 0.870102597948041, "grad_norm": 1.3684691190719604, "learning_rate": 3.052337682870452e-07, "loss": 0.3476, "step": 14502 }, { "epoch": 0.8701625967480651, "grad_norm": 1.3483282327651978, "learning_rate": 3.0495602921951065e-07, "loss": 0.3849, "step": 14503 }, { "epoch": 0.870222595548089, "grad_norm": 1.4335417747497559, "learning_rate": 3.0467841081423075e-07, "loss": 0.3732, "step": 14504 }, { "epoch": 0.8702825943481131, "grad_norm": 1.3872085809707642, "learning_rate": 3.04400913081689e-07, "loss": 0.3807, "step": 14505 }, { "epoch": 0.870342593148137, "grad_norm": 1.4454857110977173, "learning_rate": 3.041235360323653e-07, "loss": 0.3677, "step": 14506 }, { "epoch": 0.870402591948161, "grad_norm": 1.4310816526412964, "learning_rate": 3.0384627967673577e-07, "loss": 0.3761, "step": 14507 }, { "epoch": 0.870462590748185, "grad_norm": 1.3435859680175781, "learning_rate": 3.03569144025271e-07, "loss": 0.402, "step": 14508 }, { "epoch": 0.870522589548209, "grad_norm": 1.4212043285369873, "learning_rate": 3.0329212908843586e-07, "loss": 0.3487, "step": 14509 }, { "epoch": 0.870582588348233, "grad_norm": 1.4790270328521729, "learning_rate": 3.0301523487669307e-07, "loss": 0.4067, "step": 14510 }, { "epoch": 0.870642587148257, "grad_norm": 1.5050406455993652, "learning_rate": 3.0273846140049973e-07, "loss": 0.4054, "step": 14511 }, { "epoch": 0.870702585948281, "grad_norm": 1.4365657567977905, "learning_rate": 3.024618086703082e-07, "loss": 0.3897, "step": 14512 }, { "epoch": 0.870762584748305, "grad_norm": 1.4246479272842407, "learning_rate": 3.021852766965655e-07, "loss": 0.4119, "step": 14513 }, { "epoch": 0.8708225835483291, "grad_norm": 1.417549967765808, "learning_rate": 3.0190886548971564e-07, "loss": 0.3582, "step": 14514 }, { "epoch": 0.870882582348353, "grad_norm": 1.3038074970245361, "learning_rate": 3.0163257506019864e-07, "loss": 0.3802, "step": 14515 }, { "epoch": 0.8709425811483771, "grad_norm": 1.3211867809295654, "learning_rate": 3.0135640541844664e-07, "loss": 0.3944, "step": 14516 }, { "epoch": 0.871002579948401, "grad_norm": 1.3532390594482422, "learning_rate": 3.010803565748903e-07, "loss": 0.3514, "step": 14517 }, { "epoch": 0.8710625787484251, "grad_norm": 1.3271934986114502, "learning_rate": 3.008044285399543e-07, "loss": 0.3594, "step": 14518 }, { "epoch": 0.871122577548449, "grad_norm": 1.2931116819381714, "learning_rate": 3.005286213240604e-07, "loss": 0.3915, "step": 14519 }, { "epoch": 0.8711825763484731, "grad_norm": 1.2969304323196411, "learning_rate": 3.0025293493762355e-07, "loss": 0.3195, "step": 14520 }, { "epoch": 0.871242575148497, "grad_norm": 1.3394216299057007, "learning_rate": 2.999773693910546e-07, "loss": 0.3996, "step": 14521 }, { "epoch": 0.8713025739485211, "grad_norm": 1.2794544696807861, "learning_rate": 2.9970192469476163e-07, "loss": 0.3373, "step": 14522 }, { "epoch": 0.871362572748545, "grad_norm": 1.4019285440444946, "learning_rate": 2.9942660085914687e-07, "loss": 0.3591, "step": 14523 }, { "epoch": 0.8714225715485691, "grad_norm": 1.4364765882492065, "learning_rate": 2.9915139789460704e-07, "loss": 0.375, "step": 14524 }, { "epoch": 0.871482570348593, "grad_norm": 1.315495491027832, "learning_rate": 2.988763158115359e-07, "loss": 0.3581, "step": 14525 }, { "epoch": 0.871542569148617, "grad_norm": 1.3442294597625732, "learning_rate": 2.9860135462032207e-07, "loss": 0.3222, "step": 14526 }, { "epoch": 0.871602567948641, "grad_norm": 1.4413577318191528, "learning_rate": 2.9832651433134987e-07, "loss": 0.3249, "step": 14527 }, { "epoch": 0.871662566748665, "grad_norm": 1.3361575603485107, "learning_rate": 2.980517949549979e-07, "loss": 0.4054, "step": 14528 }, { "epoch": 0.871722565548689, "grad_norm": 1.4347275495529175, "learning_rate": 2.977771965016419e-07, "loss": 0.3671, "step": 14529 }, { "epoch": 0.871782564348713, "grad_norm": 1.2472707033157349, "learning_rate": 2.9750271898165155e-07, "loss": 0.3705, "step": 14530 }, { "epoch": 0.871842563148737, "grad_norm": 1.3280465602874756, "learning_rate": 2.9722836240539363e-07, "loss": 0.3853, "step": 14531 }, { "epoch": 0.871902561948761, "grad_norm": 1.347928524017334, "learning_rate": 2.9695412678322845e-07, "loss": 0.4072, "step": 14532 }, { "epoch": 0.871962560748785, "grad_norm": 1.3935798406600952, "learning_rate": 2.9668001212551204e-07, "loss": 0.3867, "step": 14533 }, { "epoch": 0.872022559548809, "grad_norm": 1.3114417791366577, "learning_rate": 2.964060184425984e-07, "loss": 0.3971, "step": 14534 }, { "epoch": 0.872082558348833, "grad_norm": 1.229390025138855, "learning_rate": 2.961321457448338e-07, "loss": 0.3757, "step": 14535 }, { "epoch": 0.872142557148857, "grad_norm": 1.3340778350830078, "learning_rate": 2.958583940425611e-07, "loss": 0.3418, "step": 14536 }, { "epoch": 0.8722025559488811, "grad_norm": 1.4047999382019043, "learning_rate": 2.9558476334611833e-07, "loss": 0.4149, "step": 14537 }, { "epoch": 0.872262554748905, "grad_norm": 1.3200135231018066, "learning_rate": 2.9531125366584017e-07, "loss": 0.3982, "step": 14538 }, { "epoch": 0.8723225535489291, "grad_norm": 1.3730992078781128, "learning_rate": 2.9503786501205535e-07, "loss": 0.3972, "step": 14539 }, { "epoch": 0.872382552348953, "grad_norm": 1.4462560415267944, "learning_rate": 2.947645973950885e-07, "loss": 0.3776, "step": 14540 }, { "epoch": 0.8724425511489771, "grad_norm": 1.3423871994018555, "learning_rate": 2.9449145082526013e-07, "loss": 0.3654, "step": 14541 }, { "epoch": 0.872502549949001, "grad_norm": 1.5677350759506226, "learning_rate": 2.9421842531288513e-07, "loss": 0.3722, "step": 14542 }, { "epoch": 0.8725625487490251, "grad_norm": 1.2294954061508179, "learning_rate": 2.9394552086827437e-07, "loss": 0.3451, "step": 14543 }, { "epoch": 0.872622547549049, "grad_norm": 1.479594111442566, "learning_rate": 2.936727375017353e-07, "loss": 0.3591, "step": 14544 }, { "epoch": 0.872682546349073, "grad_norm": 1.3651913404464722, "learning_rate": 2.934000752235679e-07, "loss": 0.3834, "step": 14545 }, { "epoch": 0.872742545149097, "grad_norm": 1.3174008131027222, "learning_rate": 2.9312753404407195e-07, "loss": 0.3602, "step": 14546 }, { "epoch": 0.872802543949121, "grad_norm": 1.322223424911499, "learning_rate": 2.9285511397353726e-07, "loss": 0.3656, "step": 14547 }, { "epoch": 0.872862542749145, "grad_norm": 1.2206186056137085, "learning_rate": 2.925828150222538e-07, "loss": 0.3412, "step": 14548 }, { "epoch": 0.872922541549169, "grad_norm": 1.3640621900558472, "learning_rate": 2.9231063720050417e-07, "loss": 0.3878, "step": 14549 }, { "epoch": 0.872982540349193, "grad_norm": 1.257529616355896, "learning_rate": 2.9203858051856776e-07, "loss": 0.3602, "step": 14550 }, { "epoch": 0.873042539149217, "grad_norm": 1.2662022113800049, "learning_rate": 2.917666449867189e-07, "loss": 0.3861, "step": 14551 }, { "epoch": 0.873102537949241, "grad_norm": 1.3635766506195068, "learning_rate": 2.9149483061522636e-07, "loss": 0.3223, "step": 14552 }, { "epoch": 0.873162536749265, "grad_norm": 1.2173439264297485, "learning_rate": 2.9122313741435696e-07, "loss": 0.3613, "step": 14553 }, { "epoch": 0.873222535549289, "grad_norm": 1.1356765031814575, "learning_rate": 2.9095156539437077e-07, "loss": 0.3033, "step": 14554 }, { "epoch": 0.873282534349313, "grad_norm": 1.432725429534912, "learning_rate": 2.906801145655224e-07, "loss": 0.3686, "step": 14555 }, { "epoch": 0.873342533149337, "grad_norm": 1.2937779426574707, "learning_rate": 2.904087849380656e-07, "loss": 0.3598, "step": 14556 }, { "epoch": 0.873402531949361, "grad_norm": 1.3583537340164185, "learning_rate": 2.90137576522246e-07, "loss": 0.3156, "step": 14557 }, { "epoch": 0.873462530749385, "grad_norm": 1.2333171367645264, "learning_rate": 2.8986648932830585e-07, "loss": 0.3623, "step": 14558 }, { "epoch": 0.873522529549409, "grad_norm": 1.1277087926864624, "learning_rate": 2.895955233664825e-07, "loss": 0.3141, "step": 14559 }, { "epoch": 0.8735825283494331, "grad_norm": 1.4046192169189453, "learning_rate": 2.8932467864700953e-07, "loss": 0.4233, "step": 14560 }, { "epoch": 0.873642527149457, "grad_norm": 1.4842087030410767, "learning_rate": 2.8905395518011717e-07, "loss": 0.3696, "step": 14561 }, { "epoch": 0.8737025259494811, "grad_norm": 1.320476770401001, "learning_rate": 2.887833529760265e-07, "loss": 0.3597, "step": 14562 }, { "epoch": 0.873762524749505, "grad_norm": 1.3509999513626099, "learning_rate": 2.88512872044959e-07, "loss": 0.3446, "step": 14563 }, { "epoch": 0.873822523549529, "grad_norm": 1.2758432626724243, "learning_rate": 2.8824251239712817e-07, "loss": 0.3395, "step": 14564 }, { "epoch": 0.873882522349553, "grad_norm": 1.3553085327148438, "learning_rate": 2.879722740427458e-07, "loss": 0.4509, "step": 14565 }, { "epoch": 0.873942521149577, "grad_norm": 1.3983283042907715, "learning_rate": 2.877021569920163e-07, "loss": 0.3598, "step": 14566 }, { "epoch": 0.874002519949601, "grad_norm": 1.1861956119537354, "learning_rate": 2.8743216125514065e-07, "loss": 0.3673, "step": 14567 }, { "epoch": 0.874062518749625, "grad_norm": 1.3092788457870483, "learning_rate": 2.871622868423163e-07, "loss": 0.3783, "step": 14568 }, { "epoch": 0.874122517549649, "grad_norm": 1.2516928911209106, "learning_rate": 2.868925337637349e-07, "loss": 0.3541, "step": 14569 }, { "epoch": 0.874182516349673, "grad_norm": 1.320853352546692, "learning_rate": 2.866229020295838e-07, "loss": 0.3477, "step": 14570 }, { "epoch": 0.874242515149697, "grad_norm": 1.2043377161026, "learning_rate": 2.863533916500449e-07, "loss": 0.3694, "step": 14571 }, { "epoch": 0.874302513949721, "grad_norm": 1.4028037786483765, "learning_rate": 2.860840026352973e-07, "loss": 0.4114, "step": 14572 }, { "epoch": 0.874362512749745, "grad_norm": 1.331411361694336, "learning_rate": 2.858147349955145e-07, "loss": 0.3696, "step": 14573 }, { "epoch": 0.874422511549769, "grad_norm": 1.3447902202606201, "learning_rate": 2.8554558874086515e-07, "loss": 0.3808, "step": 14574 }, { "epoch": 0.874482510349793, "grad_norm": 1.4430721998214722, "learning_rate": 2.8527656388151407e-07, "loss": 0.3861, "step": 14575 }, { "epoch": 0.874542509149817, "grad_norm": 1.4893721342086792, "learning_rate": 2.850076604276206e-07, "loss": 0.403, "step": 14576 }, { "epoch": 0.874602507949841, "grad_norm": 1.4449623823165894, "learning_rate": 2.847388783893414e-07, "loss": 0.3708, "step": 14577 }, { "epoch": 0.874662506749865, "grad_norm": 1.3427045345306396, "learning_rate": 2.844702177768246e-07, "loss": 0.3716, "step": 14578 }, { "epoch": 0.874722505549889, "grad_norm": 1.2896143198013306, "learning_rate": 2.8420167860021794e-07, "loss": 0.3372, "step": 14579 }, { "epoch": 0.874782504349913, "grad_norm": 1.3634123802185059, "learning_rate": 2.8393326086966365e-07, "loss": 0.3926, "step": 14580 }, { "epoch": 0.8748425031499371, "grad_norm": 1.4431049823760986, "learning_rate": 2.836649645952974e-07, "loss": 0.3921, "step": 14581 }, { "epoch": 0.874902501949961, "grad_norm": 1.2905981540679932, "learning_rate": 2.833967897872521e-07, "loss": 0.3742, "step": 14582 }, { "epoch": 0.874962500749985, "grad_norm": 1.479475498199463, "learning_rate": 2.831287364556542e-07, "loss": 0.3813, "step": 14583 }, { "epoch": 0.875022499550009, "grad_norm": 1.4459589719772339, "learning_rate": 2.8286080461062884e-07, "loss": 0.396, "step": 14584 }, { "epoch": 0.875082498350033, "grad_norm": 1.3825887441635132, "learning_rate": 2.8259299426229376e-07, "loss": 0.3369, "step": 14585 }, { "epoch": 0.875142497150057, "grad_norm": 1.4773030281066895, "learning_rate": 2.823253054207618e-07, "loss": 0.3881, "step": 14586 }, { "epoch": 0.875202495950081, "grad_norm": 1.4880114793777466, "learning_rate": 2.820577380961444e-07, "loss": 0.3814, "step": 14587 }, { "epoch": 0.875262494750105, "grad_norm": 1.2399380207061768, "learning_rate": 2.817902922985451e-07, "loss": 0.3193, "step": 14588 }, { "epoch": 0.875322493550129, "grad_norm": 1.234798789024353, "learning_rate": 2.815229680380642e-07, "loss": 0.3764, "step": 14589 }, { "epoch": 0.875382492350153, "grad_norm": 1.4196901321411133, "learning_rate": 2.812557653247973e-07, "loss": 0.3632, "step": 14590 }, { "epoch": 0.875442491150177, "grad_norm": 1.4430108070373535, "learning_rate": 2.8098868416883497e-07, "loss": 0.3831, "step": 14591 }, { "epoch": 0.875502489950201, "grad_norm": 1.4024351835250854, "learning_rate": 2.8072172458026595e-07, "loss": 0.3765, "step": 14592 }, { "epoch": 0.875562488750225, "grad_norm": 1.3972042798995972, "learning_rate": 2.804548865691691e-07, "loss": 0.3262, "step": 14593 }, { "epoch": 0.875622487550249, "grad_norm": 1.5348098278045654, "learning_rate": 2.801881701456234e-07, "loss": 0.4194, "step": 14594 }, { "epoch": 0.875682486350273, "grad_norm": 1.4628534317016602, "learning_rate": 2.799215753197008e-07, "loss": 0.3635, "step": 14595 }, { "epoch": 0.875742485150297, "grad_norm": 1.432434320449829, "learning_rate": 2.796551021014697e-07, "loss": 0.3956, "step": 14596 }, { "epoch": 0.875802483950321, "grad_norm": 1.2562415599822998, "learning_rate": 2.793887505009939e-07, "loss": 0.3374, "step": 14597 }, { "epoch": 0.875862482750345, "grad_norm": 1.444395661354065, "learning_rate": 2.791225205283314e-07, "loss": 0.4037, "step": 14598 }, { "epoch": 0.875922481550369, "grad_norm": 1.2784957885742188, "learning_rate": 2.788564121935374e-07, "loss": 0.3417, "step": 14599 }, { "epoch": 0.875982480350393, "grad_norm": 1.241058111190796, "learning_rate": 2.7859042550666167e-07, "loss": 0.4224, "step": 14600 }, { "epoch": 0.876042479150417, "grad_norm": 1.356711745262146, "learning_rate": 2.7832456047774786e-07, "loss": 0.3878, "step": 14601 }, { "epoch": 0.8761024779504409, "grad_norm": 1.289319396018982, "learning_rate": 2.780588171168383e-07, "loss": 0.3478, "step": 14602 }, { "epoch": 0.876162476750465, "grad_norm": 1.2038254737854004, "learning_rate": 2.7779319543396764e-07, "loss": 0.3011, "step": 14603 }, { "epoch": 0.876222475550489, "grad_norm": 1.3599941730499268, "learning_rate": 2.7752769543916824e-07, "loss": 0.433, "step": 14604 }, { "epoch": 0.876282474350513, "grad_norm": 1.3380742073059082, "learning_rate": 2.7726231714246535e-07, "loss": 0.385, "step": 14605 }, { "epoch": 0.876342473150537, "grad_norm": 1.3970975875854492, "learning_rate": 2.769970605538824e-07, "loss": 0.4047, "step": 14606 }, { "epoch": 0.876402471950561, "grad_norm": 1.3723727464675903, "learning_rate": 2.767319256834369e-07, "loss": 0.3377, "step": 14607 }, { "epoch": 0.876462470750585, "grad_norm": 1.3203679323196411, "learning_rate": 2.7646691254114056e-07, "loss": 0.3353, "step": 14608 }, { "epoch": 0.876522469550609, "grad_norm": 1.4665911197662354, "learning_rate": 2.762020211370032e-07, "loss": 0.39, "step": 14609 }, { "epoch": 0.876582468350633, "grad_norm": 1.3712118864059448, "learning_rate": 2.7593725148102715e-07, "loss": 0.3903, "step": 14610 }, { "epoch": 0.876642467150657, "grad_norm": 1.3500709533691406, "learning_rate": 2.756726035832129e-07, "loss": 0.359, "step": 14611 }, { "epoch": 0.876702465950681, "grad_norm": 1.5295761823654175, "learning_rate": 2.754080774535546e-07, "loss": 0.3728, "step": 14612 }, { "epoch": 0.876762464750705, "grad_norm": 1.4702900648117065, "learning_rate": 2.7514367310204153e-07, "loss": 0.3698, "step": 14613 }, { "epoch": 0.876822463550729, "grad_norm": 1.2393155097961426, "learning_rate": 2.7487939053866006e-07, "loss": 0.3393, "step": 14614 }, { "epoch": 0.876882462350753, "grad_norm": 1.361305832862854, "learning_rate": 2.7461522977339006e-07, "loss": 0.391, "step": 14615 }, { "epoch": 0.876942461150777, "grad_norm": 1.3804802894592285, "learning_rate": 2.7435119081620835e-07, "loss": 0.3967, "step": 14616 }, { "epoch": 0.877002459950801, "grad_norm": 1.3968539237976074, "learning_rate": 2.7408727367708573e-07, "loss": 0.3751, "step": 14617 }, { "epoch": 0.877062458750825, "grad_norm": 1.4290882349014282, "learning_rate": 2.7382347836598966e-07, "loss": 0.4037, "step": 14618 }, { "epoch": 0.8771224575508489, "grad_norm": 1.1387232542037964, "learning_rate": 2.735598048928826e-07, "loss": 0.3341, "step": 14619 }, { "epoch": 0.877182456350873, "grad_norm": 1.2480093240737915, "learning_rate": 2.7329625326772166e-07, "loss": 0.3608, "step": 14620 }, { "epoch": 0.8772424551508969, "grad_norm": 1.3861194849014282, "learning_rate": 2.730328235004608e-07, "loss": 0.3751, "step": 14621 }, { "epoch": 0.877302453950921, "grad_norm": 1.368710994720459, "learning_rate": 2.727695156010476e-07, "loss": 0.3739, "step": 14622 }, { "epoch": 0.8773624527509449, "grad_norm": 1.326059103012085, "learning_rate": 2.7250632957942773e-07, "loss": 0.334, "step": 14623 }, { "epoch": 0.877422451550969, "grad_norm": 1.3753770589828491, "learning_rate": 2.722432654455382e-07, "loss": 0.375, "step": 14624 }, { "epoch": 0.8774824503509929, "grad_norm": 1.331761360168457, "learning_rate": 2.7198032320931506e-07, "loss": 0.3696, "step": 14625 }, { "epoch": 0.877542449151017, "grad_norm": 1.3528720140457153, "learning_rate": 2.717175028806888e-07, "loss": 0.3859, "step": 14626 }, { "epoch": 0.877602447951041, "grad_norm": 1.416934609413147, "learning_rate": 2.714548044695844e-07, "loss": 0.3789, "step": 14627 }, { "epoch": 0.877662446751065, "grad_norm": 1.3190956115722656, "learning_rate": 2.7119222798592275e-07, "loss": 0.3838, "step": 14628 }, { "epoch": 0.877722445551089, "grad_norm": 1.252732515335083, "learning_rate": 2.7092977343961987e-07, "loss": 0.3247, "step": 14629 }, { "epoch": 0.877782444351113, "grad_norm": 1.5990958213806152, "learning_rate": 2.706674408405881e-07, "loss": 0.3997, "step": 14630 }, { "epoch": 0.877842443151137, "grad_norm": 1.3452470302581787, "learning_rate": 2.7040523019873453e-07, "loss": 0.3483, "step": 14631 }, { "epoch": 0.877902441951161, "grad_norm": 1.4376918077468872, "learning_rate": 2.7014314152396057e-07, "loss": 0.3599, "step": 14632 }, { "epoch": 0.877962440751185, "grad_norm": 1.498068928718567, "learning_rate": 2.6988117482616603e-07, "loss": 0.4002, "step": 14633 }, { "epoch": 0.878022439551209, "grad_norm": 1.3855923414230347, "learning_rate": 2.696193301152429e-07, "loss": 0.3684, "step": 14634 }, { "epoch": 0.878082438351233, "grad_norm": 1.3027191162109375, "learning_rate": 2.6935760740107976e-07, "loss": 0.3702, "step": 14635 }, { "epoch": 0.878142437151257, "grad_norm": 1.3992881774902344, "learning_rate": 2.6909600669356086e-07, "loss": 0.3659, "step": 14636 }, { "epoch": 0.878202435951281, "grad_norm": 1.3923993110656738, "learning_rate": 2.688345280025658e-07, "loss": 0.4238, "step": 14637 }, { "epoch": 0.8782624347513049, "grad_norm": 1.3908575773239136, "learning_rate": 2.6857317133797027e-07, "loss": 0.3813, "step": 14638 }, { "epoch": 0.878322433551329, "grad_norm": 1.3491266965866089, "learning_rate": 2.6831193670964297e-07, "loss": 0.3723, "step": 14639 }, { "epoch": 0.8783824323513529, "grad_norm": 1.332507610321045, "learning_rate": 2.680508241274507e-07, "loss": 0.3548, "step": 14640 }, { "epoch": 0.878442431151377, "grad_norm": 1.376248836517334, "learning_rate": 2.677898336012533e-07, "loss": 0.3929, "step": 14641 }, { "epoch": 0.8785024299514009, "grad_norm": 1.2261004447937012, "learning_rate": 2.675289651409089e-07, "loss": 0.3589, "step": 14642 }, { "epoch": 0.878562428751425, "grad_norm": 1.3702874183654785, "learning_rate": 2.67268218756268e-07, "loss": 0.3647, "step": 14643 }, { "epoch": 0.8786224275514489, "grad_norm": 1.3218191862106323, "learning_rate": 2.670075944571779e-07, "loss": 0.3497, "step": 14644 }, { "epoch": 0.878682426351473, "grad_norm": 1.1840473413467407, "learning_rate": 2.6674709225348214e-07, "loss": 0.3612, "step": 14645 }, { "epoch": 0.8787424251514969, "grad_norm": 1.409447431564331, "learning_rate": 2.664867121550179e-07, "loss": 0.3451, "step": 14646 }, { "epoch": 0.878802423951521, "grad_norm": 1.425148844718933, "learning_rate": 2.6622645417161853e-07, "loss": 0.3315, "step": 14647 }, { "epoch": 0.8788624227515449, "grad_norm": 1.3525469303131104, "learning_rate": 2.659663183131124e-07, "loss": 0.3633, "step": 14648 }, { "epoch": 0.878922421551569, "grad_norm": 1.4145989418029785, "learning_rate": 2.6570630458932503e-07, "loss": 0.3693, "step": 14649 }, { "epoch": 0.878982420351593, "grad_norm": 1.437477707862854, "learning_rate": 2.6544641301007503e-07, "loss": 0.4032, "step": 14650 }, { "epoch": 0.879042419151617, "grad_norm": 1.4193302392959595, "learning_rate": 2.6518664358517674e-07, "loss": 0.3929, "step": 14651 }, { "epoch": 0.879102417951641, "grad_norm": 1.3238965272903442, "learning_rate": 2.649269963244421e-07, "loss": 0.3523, "step": 14652 }, { "epoch": 0.879162416751665, "grad_norm": 1.4484120607376099, "learning_rate": 2.6466747123767537e-07, "loss": 0.391, "step": 14653 }, { "epoch": 0.879222415551689, "grad_norm": 1.4337997436523438, "learning_rate": 2.644080683346776e-07, "loss": 0.3629, "step": 14654 }, { "epoch": 0.879282414351713, "grad_norm": 1.3764210939407349, "learning_rate": 2.6414878762524646e-07, "loss": 0.3398, "step": 14655 }, { "epoch": 0.879342413151737, "grad_norm": 1.4906388521194458, "learning_rate": 2.638896291191725e-07, "loss": 0.4032, "step": 14656 }, { "epoch": 0.8794024119517609, "grad_norm": 1.4519575834274292, "learning_rate": 2.6363059282624444e-07, "loss": 0.395, "step": 14657 }, { "epoch": 0.879462410751785, "grad_norm": 1.3093650341033936, "learning_rate": 2.6337167875624357e-07, "loss": 0.3659, "step": 14658 }, { "epoch": 0.8795224095518089, "grad_norm": 1.4457051753997803, "learning_rate": 2.631128869189488e-07, "loss": 0.3311, "step": 14659 }, { "epoch": 0.879582408351833, "grad_norm": 1.474790334701538, "learning_rate": 2.628542173241321e-07, "loss": 0.3808, "step": 14660 }, { "epoch": 0.8796424071518569, "grad_norm": 1.3636553287506104, "learning_rate": 2.625956699815639e-07, "loss": 0.338, "step": 14661 }, { "epoch": 0.879702405951881, "grad_norm": 1.3137025833129883, "learning_rate": 2.6233724490100765e-07, "loss": 0.3836, "step": 14662 }, { "epoch": 0.8797624047519049, "grad_norm": 1.3003528118133545, "learning_rate": 2.6207894209222236e-07, "loss": 0.3439, "step": 14663 }, { "epoch": 0.879822403551929, "grad_norm": 1.399059772491455, "learning_rate": 2.618207615649642e-07, "loss": 0.3852, "step": 14664 }, { "epoch": 0.8798824023519529, "grad_norm": 1.481660008430481, "learning_rate": 2.615627033289825e-07, "loss": 0.3829, "step": 14665 }, { "epoch": 0.879942401151977, "grad_norm": 1.266318440437317, "learning_rate": 2.6130476739402286e-07, "loss": 0.3687, "step": 14666 }, { "epoch": 0.8800023999520009, "grad_norm": 1.3668603897094727, "learning_rate": 2.6104695376982725e-07, "loss": 0.3356, "step": 14667 }, { "epoch": 0.880062398752025, "grad_norm": 1.292877435684204, "learning_rate": 2.607892624661308e-07, "loss": 0.3649, "step": 14668 }, { "epoch": 0.8801223975520489, "grad_norm": 1.307626485824585, "learning_rate": 2.6053169349266773e-07, "loss": 0.3574, "step": 14669 }, { "epoch": 0.880182396352073, "grad_norm": 1.4221009016036987, "learning_rate": 2.6027424685916186e-07, "loss": 0.4056, "step": 14670 }, { "epoch": 0.880242395152097, "grad_norm": 1.4029544591903687, "learning_rate": 2.600169225753384e-07, "loss": 0.3215, "step": 14671 }, { "epoch": 0.880302393952121, "grad_norm": 1.2982385158538818, "learning_rate": 2.597597206509139e-07, "loss": 0.378, "step": 14672 }, { "epoch": 0.880362392752145, "grad_norm": 1.3448294401168823, "learning_rate": 2.595026410956027e-07, "loss": 0.3868, "step": 14673 }, { "epoch": 0.880422391552169, "grad_norm": 1.6628625392913818, "learning_rate": 2.5924568391911313e-07, "loss": 0.433, "step": 14674 }, { "epoch": 0.880482390352193, "grad_norm": 1.3939526081085205, "learning_rate": 2.589888491311487e-07, "loss": 0.3685, "step": 14675 }, { "epoch": 0.8805423891522169, "grad_norm": 1.4064358472824097, "learning_rate": 2.587321367414102e-07, "loss": 0.3328, "step": 14676 }, { "epoch": 0.880602387952241, "grad_norm": 1.4097386598587036, "learning_rate": 2.584755467595912e-07, "loss": 0.3814, "step": 14677 }, { "epoch": 0.8806623867522649, "grad_norm": 1.4012534618377686, "learning_rate": 2.5821907919538243e-07, "loss": 0.4014, "step": 14678 }, { "epoch": 0.880722385552289, "grad_norm": 1.3331167697906494, "learning_rate": 2.579627340584696e-07, "loss": 0.3837, "step": 14679 }, { "epoch": 0.8807823843523129, "grad_norm": 1.4406336545944214, "learning_rate": 2.5770651135853406e-07, "loss": 0.3533, "step": 14680 }, { "epoch": 0.880842383152337, "grad_norm": 1.273320198059082, "learning_rate": 2.5745041110525144e-07, "loss": 0.3467, "step": 14681 }, { "epoch": 0.8809023819523609, "grad_norm": 1.40940523147583, "learning_rate": 2.571944333082935e-07, "loss": 0.3626, "step": 14682 }, { "epoch": 0.880962380752385, "grad_norm": 1.3195369243621826, "learning_rate": 2.569385779773272e-07, "loss": 0.3644, "step": 14683 }, { "epoch": 0.8810223795524089, "grad_norm": 1.2842069864273071, "learning_rate": 2.5668284512201694e-07, "loss": 0.3486, "step": 14684 }, { "epoch": 0.881082378352433, "grad_norm": 1.3215492963790894, "learning_rate": 2.5642723475201766e-07, "loss": 0.3978, "step": 14685 }, { "epoch": 0.8811423771524569, "grad_norm": 1.3156039714813232, "learning_rate": 2.561717468769848e-07, "loss": 0.3873, "step": 14686 }, { "epoch": 0.881202375952481, "grad_norm": 1.4362679719924927, "learning_rate": 2.5591638150656555e-07, "loss": 0.3962, "step": 14687 }, { "epoch": 0.8812623747525049, "grad_norm": 1.348569393157959, "learning_rate": 2.5566113865040595e-07, "loss": 0.353, "step": 14688 }, { "epoch": 0.881322373552529, "grad_norm": 1.3627407550811768, "learning_rate": 2.554060183181428e-07, "loss": 0.3833, "step": 14689 }, { "epoch": 0.8813823723525529, "grad_norm": 1.361124038696289, "learning_rate": 2.551510205194116e-07, "loss": 0.3895, "step": 14690 }, { "epoch": 0.881442371152577, "grad_norm": 1.3785632848739624, "learning_rate": 2.5489614526384375e-07, "loss": 0.3589, "step": 14691 }, { "epoch": 0.8815023699526009, "grad_norm": 1.4113526344299316, "learning_rate": 2.546413925610638e-07, "loss": 0.3878, "step": 14692 }, { "epoch": 0.881562368752625, "grad_norm": 1.358960509300232, "learning_rate": 2.543867624206927e-07, "loss": 0.3437, "step": 14693 }, { "epoch": 0.881622367552649, "grad_norm": 1.5278667211532593, "learning_rate": 2.541322548523461e-07, "loss": 0.4064, "step": 14694 }, { "epoch": 0.8816823663526729, "grad_norm": 1.2603665590286255, "learning_rate": 2.5387786986563627e-07, "loss": 0.3658, "step": 14695 }, { "epoch": 0.881742365152697, "grad_norm": 1.284305453300476, "learning_rate": 2.5362360747017044e-07, "loss": 0.393, "step": 14696 }, { "epoch": 0.8818023639527209, "grad_norm": 1.3164657354354858, "learning_rate": 2.5336946767554956e-07, "loss": 0.3852, "step": 14697 }, { "epoch": 0.881862362752745, "grad_norm": 1.364641547203064, "learning_rate": 2.531154504913731e-07, "loss": 0.3566, "step": 14698 }, { "epoch": 0.8819223615527689, "grad_norm": 1.3465511798858643, "learning_rate": 2.528615559272335e-07, "loss": 0.3485, "step": 14699 }, { "epoch": 0.881982360352793, "grad_norm": 1.3110727071762085, "learning_rate": 2.526077839927185e-07, "loss": 0.3513, "step": 14700 }, { "epoch": 0.8820423591528169, "grad_norm": 1.4520848989486694, "learning_rate": 2.5235413469741206e-07, "loss": 0.3762, "step": 14701 }, { "epoch": 0.882102357952841, "grad_norm": 1.2735214233398438, "learning_rate": 2.5210060805089373e-07, "loss": 0.3447, "step": 14702 }, { "epoch": 0.8821623567528649, "grad_norm": 1.2766555547714233, "learning_rate": 2.518472040627389e-07, "loss": 0.31, "step": 14703 }, { "epoch": 0.882222355552889, "grad_norm": 1.3981188535690308, "learning_rate": 2.515939227425162e-07, "loss": 0.3746, "step": 14704 }, { "epoch": 0.8822823543529129, "grad_norm": 1.291648030281067, "learning_rate": 2.513407640997917e-07, "loss": 0.342, "step": 14705 }, { "epoch": 0.882342353152937, "grad_norm": 1.3074778318405151, "learning_rate": 2.510877281441249e-07, "loss": 0.3025, "step": 14706 }, { "epoch": 0.8824023519529609, "grad_norm": 1.3430836200714111, "learning_rate": 2.508348148850731e-07, "loss": 0.3529, "step": 14707 }, { "epoch": 0.882462350752985, "grad_norm": 1.3474175930023193, "learning_rate": 2.5058202433218736e-07, "loss": 0.3546, "step": 14708 }, { "epoch": 0.8825223495530089, "grad_norm": 1.3046643733978271, "learning_rate": 2.503293564950133e-07, "loss": 0.3901, "step": 14709 }, { "epoch": 0.882582348353033, "grad_norm": 1.3776978254318237, "learning_rate": 2.500768113830949e-07, "loss": 0.3958, "step": 14710 }, { "epoch": 0.8826423471530569, "grad_norm": 1.3805145025253296, "learning_rate": 2.498243890059686e-07, "loss": 0.3632, "step": 14711 }, { "epoch": 0.882702345953081, "grad_norm": 1.4109746217727661, "learning_rate": 2.495720893731674e-07, "loss": 0.3795, "step": 14712 }, { "epoch": 0.8827623447531049, "grad_norm": 1.350904941558838, "learning_rate": 2.493199124942188e-07, "loss": 0.3875, "step": 14713 }, { "epoch": 0.8828223435531289, "grad_norm": 1.3506213426589966, "learning_rate": 2.490678583786473e-07, "loss": 0.3904, "step": 14714 }, { "epoch": 0.8828823423531529, "grad_norm": 1.2265866994857788, "learning_rate": 2.4881592703597266e-07, "loss": 0.3558, "step": 14715 }, { "epoch": 0.8829423411531769, "grad_norm": 1.4411715269088745, "learning_rate": 2.4856411847570684e-07, "loss": 0.4067, "step": 14716 }, { "epoch": 0.883002339953201, "grad_norm": 1.3180782794952393, "learning_rate": 2.483124327073615e-07, "loss": 0.3992, "step": 14717 }, { "epoch": 0.8830623387532249, "grad_norm": 1.390990138053894, "learning_rate": 2.480608697404401e-07, "loss": 0.3554, "step": 14718 }, { "epoch": 0.883122337553249, "grad_norm": 1.394148826599121, "learning_rate": 2.478094295844445e-07, "loss": 0.3808, "step": 14719 }, { "epoch": 0.8831823363532729, "grad_norm": 1.2263575792312622, "learning_rate": 2.4755811224887004e-07, "loss": 0.3702, "step": 14720 }, { "epoch": 0.883242335153297, "grad_norm": 1.3056089878082275, "learning_rate": 2.473069177432065e-07, "loss": 0.3611, "step": 14721 }, { "epoch": 0.8833023339533209, "grad_norm": 1.3131632804870605, "learning_rate": 2.4705584607694263e-07, "loss": 0.3745, "step": 14722 }, { "epoch": 0.883362332753345, "grad_norm": 1.2855944633483887, "learning_rate": 2.4680489725955857e-07, "loss": 0.366, "step": 14723 }, { "epoch": 0.8834223315533689, "grad_norm": 1.3243443965911865, "learning_rate": 2.465540713005317e-07, "loss": 0.3753, "step": 14724 }, { "epoch": 0.883482330353393, "grad_norm": 1.2537147998809814, "learning_rate": 2.463033682093356e-07, "loss": 0.3224, "step": 14725 }, { "epoch": 0.8835423291534169, "grad_norm": 1.3313525915145874, "learning_rate": 2.460527879954371e-07, "loss": 0.3519, "step": 14726 }, { "epoch": 0.883602327953441, "grad_norm": 1.2814457416534424, "learning_rate": 2.4580233066830013e-07, "loss": 0.343, "step": 14727 }, { "epoch": 0.8836623267534649, "grad_norm": 1.575089693069458, "learning_rate": 2.4555199623738227e-07, "loss": 0.4133, "step": 14728 }, { "epoch": 0.883722325553489, "grad_norm": 1.2443994283676147, "learning_rate": 2.4530178471213916e-07, "loss": 0.3801, "step": 14729 }, { "epoch": 0.8837823243535129, "grad_norm": 1.4076915979385376, "learning_rate": 2.4505169610201903e-07, "loss": 0.3816, "step": 14730 }, { "epoch": 0.883842323153537, "grad_norm": 1.4483563899993896, "learning_rate": 2.4480173041646635e-07, "loss": 0.3684, "step": 14731 }, { "epoch": 0.8839023219535609, "grad_norm": 1.3648308515548706, "learning_rate": 2.44551887664922e-07, "loss": 0.3736, "step": 14732 }, { "epoch": 0.8839623207535849, "grad_norm": 1.3594717979431152, "learning_rate": 2.4430216785682095e-07, "loss": 0.3599, "step": 14733 }, { "epoch": 0.8840223195536089, "grad_norm": 1.3714388608932495, "learning_rate": 2.440525710015949e-07, "loss": 0.392, "step": 14734 }, { "epoch": 0.8840823183536329, "grad_norm": 1.3238210678100586, "learning_rate": 2.4380309710866793e-07, "loss": 0.379, "step": 14735 }, { "epoch": 0.8841423171536569, "grad_norm": 1.2186025381088257, "learning_rate": 2.435537461874631e-07, "loss": 0.3254, "step": 14736 }, { "epoch": 0.8842023159536809, "grad_norm": 1.152512550354004, "learning_rate": 2.433045182473976e-07, "loss": 0.3289, "step": 14737 }, { "epoch": 0.884262314753705, "grad_norm": 1.3487908840179443, "learning_rate": 2.430554132978824e-07, "loss": 0.3537, "step": 14738 }, { "epoch": 0.8843223135537289, "grad_norm": 1.3801653385162354, "learning_rate": 2.4280643134832617e-07, "loss": 0.3213, "step": 14739 }, { "epoch": 0.884382312353753, "grad_norm": 1.5516608953475952, "learning_rate": 2.4255757240813055e-07, "loss": 0.3912, "step": 14740 }, { "epoch": 0.8844423111537769, "grad_norm": 1.4124438762664795, "learning_rate": 2.4230883648669514e-07, "loss": 0.4092, "step": 14741 }, { "epoch": 0.884502309953801, "grad_norm": 1.325300931930542, "learning_rate": 2.420602235934132e-07, "loss": 0.4413, "step": 14742 }, { "epoch": 0.8845623087538249, "grad_norm": 1.3967305421829224, "learning_rate": 2.4181173373767264e-07, "loss": 0.3726, "step": 14743 }, { "epoch": 0.884622307553849, "grad_norm": 1.2845972776412964, "learning_rate": 2.415633669288592e-07, "loss": 0.3389, "step": 14744 }, { "epoch": 0.8846823063538729, "grad_norm": 1.5126014947891235, "learning_rate": 2.413151231763521e-07, "loss": 0.343, "step": 14745 }, { "epoch": 0.884742305153897, "grad_norm": 1.2678351402282715, "learning_rate": 2.41067002489526e-07, "loss": 0.3484, "step": 14746 }, { "epoch": 0.8848023039539209, "grad_norm": 1.277458667755127, "learning_rate": 2.4081900487775123e-07, "loss": 0.3393, "step": 14747 }, { "epoch": 0.884862302753945, "grad_norm": 1.3062171936035156, "learning_rate": 2.4057113035039393e-07, "loss": 0.3366, "step": 14748 }, { "epoch": 0.8849223015539689, "grad_norm": 1.3589882850646973, "learning_rate": 2.403233789168155e-07, "loss": 0.4105, "step": 14749 }, { "epoch": 0.884982300353993, "grad_norm": 1.4064204692840576, "learning_rate": 2.4007575058637196e-07, "loss": 0.3695, "step": 14750 }, { "epoch": 0.8850422991540169, "grad_norm": 1.492544174194336, "learning_rate": 2.398282453684153e-07, "loss": 0.3966, "step": 14751 }, { "epoch": 0.8851022979540409, "grad_norm": 1.4770381450653076, "learning_rate": 2.3958086327229164e-07, "loss": 0.4168, "step": 14752 }, { "epoch": 0.8851622967540649, "grad_norm": 1.3722954988479614, "learning_rate": 2.3933360430734544e-07, "loss": 0.3329, "step": 14753 }, { "epoch": 0.8852222955540889, "grad_norm": 1.435634732246399, "learning_rate": 2.390864684829131e-07, "loss": 0.3845, "step": 14754 }, { "epoch": 0.8852822943541129, "grad_norm": 1.3660638332366943, "learning_rate": 2.3883945580832743e-07, "loss": 0.3974, "step": 14755 }, { "epoch": 0.8853422931541369, "grad_norm": 1.3153924942016602, "learning_rate": 2.385925662929186e-07, "loss": 0.3407, "step": 14756 }, { "epoch": 0.8854022919541609, "grad_norm": 1.3571586608886719, "learning_rate": 2.3834579994600946e-07, "loss": 0.3263, "step": 14757 }, { "epoch": 0.8854622907541849, "grad_norm": 1.3320589065551758, "learning_rate": 2.3809915677691956e-07, "loss": 0.3231, "step": 14758 }, { "epoch": 0.8855222895542089, "grad_norm": 1.371949553489685, "learning_rate": 2.3785263679496237e-07, "loss": 0.3479, "step": 14759 }, { "epoch": 0.8855822883542329, "grad_norm": 1.326186180114746, "learning_rate": 2.37606240009449e-07, "loss": 0.3622, "step": 14760 }, { "epoch": 0.885642287154257, "grad_norm": 1.2211686372756958, "learning_rate": 2.3735996642968593e-07, "loss": 0.3506, "step": 14761 }, { "epoch": 0.8857022859542809, "grad_norm": 1.36385977268219, "learning_rate": 2.37113816064971e-07, "loss": 0.3425, "step": 14762 }, { "epoch": 0.885762284754305, "grad_norm": 1.2456930875778198, "learning_rate": 2.3686778892460213e-07, "loss": 0.3202, "step": 14763 }, { "epoch": 0.8858222835543289, "grad_norm": 1.40363609790802, "learning_rate": 2.3662188501786945e-07, "loss": 0.3604, "step": 14764 }, { "epoch": 0.885882282354353, "grad_norm": 1.43120276927948, "learning_rate": 2.3637610435406076e-07, "loss": 0.3664, "step": 14765 }, { "epoch": 0.8859422811543769, "grad_norm": 1.3121734857559204, "learning_rate": 2.361304469424576e-07, "loss": 0.3308, "step": 14766 }, { "epoch": 0.886002279954401, "grad_norm": 1.2667673826217651, "learning_rate": 2.3588491279233693e-07, "loss": 0.3697, "step": 14767 }, { "epoch": 0.8860622787544249, "grad_norm": 1.2641586065292358, "learning_rate": 2.356395019129725e-07, "loss": 0.3699, "step": 14768 }, { "epoch": 0.886122277554449, "grad_norm": 1.2984764575958252, "learning_rate": 2.3539421431363116e-07, "loss": 0.3779, "step": 14769 }, { "epoch": 0.8861822763544729, "grad_norm": 1.2472596168518066, "learning_rate": 2.3514905000357737e-07, "loss": 0.3658, "step": 14770 }, { "epoch": 0.8862422751544969, "grad_norm": 1.3831404447555542, "learning_rate": 2.3490400899206865e-07, "loss": 0.376, "step": 14771 }, { "epoch": 0.8863022739545209, "grad_norm": 1.2204515933990479, "learning_rate": 2.346590912883601e-07, "loss": 0.3593, "step": 14772 }, { "epoch": 0.8863622727545449, "grad_norm": 1.3657193183898926, "learning_rate": 2.3441429690170107e-07, "loss": 0.403, "step": 14773 }, { "epoch": 0.8864222715545689, "grad_norm": 1.295589804649353, "learning_rate": 2.3416962584133543e-07, "loss": 0.3688, "step": 14774 }, { "epoch": 0.8864822703545929, "grad_norm": 1.4728871583938599, "learning_rate": 2.3392507811650437e-07, "loss": 0.3779, "step": 14775 }, { "epoch": 0.8865422691546169, "grad_norm": 1.402911901473999, "learning_rate": 2.3368065373644319e-07, "loss": 0.351, "step": 14776 }, { "epoch": 0.8866022679546409, "grad_norm": 1.310356855392456, "learning_rate": 2.3343635271038142e-07, "loss": 0.3535, "step": 14777 }, { "epoch": 0.8866622667546649, "grad_norm": 1.2864471673965454, "learning_rate": 2.3319217504754702e-07, "loss": 0.3342, "step": 14778 }, { "epoch": 0.8867222655546889, "grad_norm": 1.4835983514785767, "learning_rate": 2.3294812075716015e-07, "loss": 0.4259, "step": 14779 }, { "epoch": 0.8867822643547129, "grad_norm": 1.3126837015151978, "learning_rate": 2.3270418984843904e-07, "loss": 0.3774, "step": 14780 }, { "epoch": 0.8868422631547369, "grad_norm": 1.3490238189697266, "learning_rate": 2.324603823305938e-07, "loss": 0.3847, "step": 14781 }, { "epoch": 0.8869022619547609, "grad_norm": 1.1775341033935547, "learning_rate": 2.322166982128337e-07, "loss": 0.3562, "step": 14782 }, { "epoch": 0.8869622607547849, "grad_norm": 1.2986441850662231, "learning_rate": 2.3197313750436027e-07, "loss": 0.3807, "step": 14783 }, { "epoch": 0.887022259554809, "grad_norm": 1.3071660995483398, "learning_rate": 2.317297002143731e-07, "loss": 0.3423, "step": 14784 }, { "epoch": 0.8870822583548329, "grad_norm": 1.3269147872924805, "learning_rate": 2.314863863520648e-07, "loss": 0.3917, "step": 14785 }, { "epoch": 0.887142257154857, "grad_norm": 1.1943773031234741, "learning_rate": 2.3124319592662407e-07, "loss": 0.3472, "step": 14786 }, { "epoch": 0.8872022559548809, "grad_norm": 1.3910835981369019, "learning_rate": 2.310001289472357e-07, "loss": 0.3422, "step": 14787 }, { "epoch": 0.8872622547549049, "grad_norm": 1.5118449926376343, "learning_rate": 2.307571854230787e-07, "loss": 0.341, "step": 14788 }, { "epoch": 0.8873222535549289, "grad_norm": 1.316231369972229, "learning_rate": 2.305143653633282e-07, "loss": 0.3787, "step": 14789 }, { "epoch": 0.8873822523549529, "grad_norm": 1.5579837560653687, "learning_rate": 2.302716687771546e-07, "loss": 0.4005, "step": 14790 }, { "epoch": 0.8874422511549769, "grad_norm": 1.453963279724121, "learning_rate": 2.300290956737233e-07, "loss": 0.4061, "step": 14791 }, { "epoch": 0.8875022499550009, "grad_norm": 1.3576706647872925, "learning_rate": 2.2978664606219507e-07, "loss": 0.3807, "step": 14792 }, { "epoch": 0.8875622487550249, "grad_norm": 1.3234227895736694, "learning_rate": 2.2954431995172553e-07, "loss": 0.3796, "step": 14793 }, { "epoch": 0.8876222475550489, "grad_norm": 1.3260999917984009, "learning_rate": 2.2930211735146727e-07, "loss": 0.3582, "step": 14794 }, { "epoch": 0.8876822463550729, "grad_norm": 1.3121716976165771, "learning_rate": 2.2906003827056664e-07, "loss": 0.3608, "step": 14795 }, { "epoch": 0.8877422451550969, "grad_norm": 1.2318236827850342, "learning_rate": 2.288180827181661e-07, "loss": 0.3667, "step": 14796 }, { "epoch": 0.8878022439551209, "grad_norm": 1.400339126586914, "learning_rate": 2.2857625070340348e-07, "loss": 0.3694, "step": 14797 }, { "epoch": 0.8878622427551449, "grad_norm": 1.277300477027893, "learning_rate": 2.2833454223541072e-07, "loss": 0.3788, "step": 14798 }, { "epoch": 0.8879222415551689, "grad_norm": 1.5049079656600952, "learning_rate": 2.280929573233171e-07, "loss": 0.3813, "step": 14799 }, { "epoch": 0.8879822403551929, "grad_norm": 1.414088487625122, "learning_rate": 2.278514959762456e-07, "loss": 0.3983, "step": 14800 }, { "epoch": 0.8880422391552169, "grad_norm": 1.4112060070037842, "learning_rate": 2.2761015820331464e-07, "loss": 0.4024, "step": 14801 }, { "epoch": 0.8881022379552409, "grad_norm": 1.2958379983901978, "learning_rate": 2.273689440136399e-07, "loss": 0.3733, "step": 14802 }, { "epoch": 0.8881622367552648, "grad_norm": 1.15896475315094, "learning_rate": 2.2712785341632995e-07, "loss": 0.3304, "step": 14803 }, { "epoch": 0.8882222355552889, "grad_norm": 1.3889230489730835, "learning_rate": 2.2688688642048965e-07, "loss": 0.3555, "step": 14804 }, { "epoch": 0.8882822343553128, "grad_norm": 1.4638797044754028, "learning_rate": 2.2664604303521907e-07, "loss": 0.395, "step": 14805 }, { "epoch": 0.8883422331553369, "grad_norm": 1.431710958480835, "learning_rate": 2.2640532326961403e-07, "loss": 0.3713, "step": 14806 }, { "epoch": 0.8884022319553609, "grad_norm": 1.2204416990280151, "learning_rate": 2.2616472713276614e-07, "loss": 0.3053, "step": 14807 }, { "epoch": 0.8884622307553849, "grad_norm": 1.3645288944244385, "learning_rate": 2.2592425463376032e-07, "loss": 0.3994, "step": 14808 }, { "epoch": 0.8885222295554089, "grad_norm": 1.2515803575515747, "learning_rate": 2.256839057816792e-07, "loss": 0.3717, "step": 14809 }, { "epoch": 0.8885822283554329, "grad_norm": 1.4617652893066406, "learning_rate": 2.2544368058559876e-07, "loss": 0.3791, "step": 14810 }, { "epoch": 0.8886422271554569, "grad_norm": 1.3402786254882812, "learning_rate": 2.2520357905459238e-07, "loss": 0.3843, "step": 14811 }, { "epoch": 0.8887022259554809, "grad_norm": 1.2676072120666504, "learning_rate": 2.249636011977263e-07, "loss": 0.3686, "step": 14812 }, { "epoch": 0.8887622247555049, "grad_norm": 1.3357239961624146, "learning_rate": 2.2472374702406373e-07, "loss": 0.3724, "step": 14813 }, { "epoch": 0.8888222235555289, "grad_norm": 1.277662754058838, "learning_rate": 2.244840165426636e-07, "loss": 0.3639, "step": 14814 }, { "epoch": 0.8888822223555529, "grad_norm": 1.3538819551467896, "learning_rate": 2.2424440976257898e-07, "loss": 0.344, "step": 14815 }, { "epoch": 0.8889422211555769, "grad_norm": 1.3616422414779663, "learning_rate": 2.240049266928587e-07, "loss": 0.3437, "step": 14816 }, { "epoch": 0.8890022199556009, "grad_norm": 1.3786145448684692, "learning_rate": 2.2376556734254614e-07, "loss": 0.38, "step": 14817 }, { "epoch": 0.8890622187556249, "grad_norm": 1.29274320602417, "learning_rate": 2.2352633172068276e-07, "loss": 0.3934, "step": 14818 }, { "epoch": 0.8891222175556489, "grad_norm": 1.4601536989212036, "learning_rate": 2.232872198363018e-07, "loss": 0.3703, "step": 14819 }, { "epoch": 0.8891822163556728, "grad_norm": 1.1861966848373413, "learning_rate": 2.230482316984334e-07, "loss": 0.3321, "step": 14820 }, { "epoch": 0.8892422151556969, "grad_norm": 1.2921463251113892, "learning_rate": 2.2280936731610428e-07, "loss": 0.3745, "step": 14821 }, { "epoch": 0.8893022139557208, "grad_norm": 1.4005244970321655, "learning_rate": 2.225706266983341e-07, "loss": 0.3901, "step": 14822 }, { "epoch": 0.8893622127557449, "grad_norm": 1.4344810247421265, "learning_rate": 2.223320098541398e-07, "loss": 0.3699, "step": 14823 }, { "epoch": 0.8894222115557688, "grad_norm": 1.2260138988494873, "learning_rate": 2.2209351679253175e-07, "loss": 0.329, "step": 14824 }, { "epoch": 0.8894822103557929, "grad_norm": 1.2405251264572144, "learning_rate": 2.2185514752251755e-07, "loss": 0.3389, "step": 14825 }, { "epoch": 0.8895422091558168, "grad_norm": 1.1715006828308105, "learning_rate": 2.2161690205310018e-07, "loss": 0.3418, "step": 14826 }, { "epoch": 0.8896022079558409, "grad_norm": 1.336423397064209, "learning_rate": 2.2137878039327524e-07, "loss": 0.3774, "step": 14827 }, { "epoch": 0.8896622067558649, "grad_norm": 1.416505217552185, "learning_rate": 2.211407825520371e-07, "loss": 0.3696, "step": 14828 }, { "epoch": 0.8897222055558889, "grad_norm": 1.418115496635437, "learning_rate": 2.2090290853837246e-07, "loss": 0.3873, "step": 14829 }, { "epoch": 0.8897822043559129, "grad_norm": 1.2674899101257324, "learning_rate": 2.206651583612656e-07, "loss": 0.3642, "step": 14830 }, { "epoch": 0.8898422031559369, "grad_norm": 1.36729896068573, "learning_rate": 2.204275320296954e-07, "loss": 0.3599, "step": 14831 }, { "epoch": 0.8899022019559609, "grad_norm": 1.3257315158843994, "learning_rate": 2.2019002955263528e-07, "loss": 0.3506, "step": 14832 }, { "epoch": 0.8899622007559849, "grad_norm": 1.400404453277588, "learning_rate": 2.199526509390552e-07, "loss": 0.3182, "step": 14833 }, { "epoch": 0.8900221995560089, "grad_norm": 1.506938099861145, "learning_rate": 2.1971539619791964e-07, "loss": 0.383, "step": 14834 }, { "epoch": 0.8900821983560329, "grad_norm": 1.2978404760360718, "learning_rate": 2.1947826533818882e-07, "loss": 0.3518, "step": 14835 }, { "epoch": 0.8901421971560569, "grad_norm": 1.4147191047668457, "learning_rate": 2.1924125836881753e-07, "loss": 0.3263, "step": 14836 }, { "epoch": 0.8902021959560809, "grad_norm": 1.333457350730896, "learning_rate": 2.1900437529875628e-07, "loss": 0.3553, "step": 14837 }, { "epoch": 0.8902621947561049, "grad_norm": 1.4268591403961182, "learning_rate": 2.187676161369532e-07, "loss": 0.4079, "step": 14838 }, { "epoch": 0.8903221935561288, "grad_norm": 1.328869342803955, "learning_rate": 2.1853098089234645e-07, "loss": 0.3515, "step": 14839 }, { "epoch": 0.8903821923561529, "grad_norm": 1.2891134023666382, "learning_rate": 2.1829446957387521e-07, "loss": 0.3484, "step": 14840 }, { "epoch": 0.8904421911561768, "grad_norm": 1.3265208005905151, "learning_rate": 2.180580821904694e-07, "loss": 0.4161, "step": 14841 }, { "epoch": 0.8905021899562009, "grad_norm": 1.261307716369629, "learning_rate": 2.1782181875105816e-07, "loss": 0.3497, "step": 14842 }, { "epoch": 0.8905621887562248, "grad_norm": 1.2860422134399414, "learning_rate": 2.1758567926456322e-07, "loss": 0.3715, "step": 14843 }, { "epoch": 0.8906221875562489, "grad_norm": 1.3954299688339233, "learning_rate": 2.1734966373990212e-07, "loss": 0.4334, "step": 14844 }, { "epoch": 0.8906821863562728, "grad_norm": 1.3683465719223022, "learning_rate": 2.1711377218598876e-07, "loss": 0.3944, "step": 14845 }, { "epoch": 0.8907421851562969, "grad_norm": 1.306326985359192, "learning_rate": 2.1687800461173173e-07, "loss": 0.3828, "step": 14846 }, { "epoch": 0.8908021839563208, "grad_norm": 1.4379457235336304, "learning_rate": 2.1664236102603411e-07, "loss": 0.3753, "step": 14847 }, { "epoch": 0.8908621827563449, "grad_norm": 1.3477394580841064, "learning_rate": 2.1640684143779593e-07, "loss": 0.3518, "step": 14848 }, { "epoch": 0.8909221815563688, "grad_norm": 1.4015129804611206, "learning_rate": 2.161714458559113e-07, "loss": 0.3784, "step": 14849 }, { "epoch": 0.8909821803563929, "grad_norm": 1.3526962995529175, "learning_rate": 2.1593617428927015e-07, "loss": 0.3964, "step": 14850 }, { "epoch": 0.8910421791564169, "grad_norm": 1.335453748703003, "learning_rate": 2.157010267467569e-07, "loss": 0.3561, "step": 14851 }, { "epoch": 0.8911021779564409, "grad_norm": 1.426214575767517, "learning_rate": 2.1546600323725328e-07, "loss": 0.3598, "step": 14852 }, { "epoch": 0.8911621767564649, "grad_norm": 1.396479845046997, "learning_rate": 2.1523110376963444e-07, "loss": 0.3361, "step": 14853 }, { "epoch": 0.8912221755564889, "grad_norm": 1.4143661260604858, "learning_rate": 2.1499632835277083e-07, "loss": 0.3989, "step": 14854 }, { "epoch": 0.8912821743565129, "grad_norm": 1.3820834159851074, "learning_rate": 2.1476167699552978e-07, "loss": 0.3533, "step": 14855 }, { "epoch": 0.8913421731565369, "grad_norm": 1.2989909648895264, "learning_rate": 2.1452714970677245e-07, "loss": 0.4072, "step": 14856 }, { "epoch": 0.8914021719565609, "grad_norm": 1.2972968816757202, "learning_rate": 2.1429274649535723e-07, "loss": 0.3711, "step": 14857 }, { "epoch": 0.8914621707565848, "grad_norm": 1.308700680732727, "learning_rate": 2.1405846737013402e-07, "loss": 0.3609, "step": 14858 }, { "epoch": 0.8915221695566089, "grad_norm": 1.519814372062683, "learning_rate": 2.1382431233995185e-07, "loss": 0.3768, "step": 14859 }, { "epoch": 0.8915821683566328, "grad_norm": 1.3671751022338867, "learning_rate": 2.1359028141365405e-07, "loss": 0.4024, "step": 14860 }, { "epoch": 0.8916421671566569, "grad_norm": 1.2322605848312378, "learning_rate": 2.1335637460007877e-07, "loss": 0.3594, "step": 14861 }, { "epoch": 0.8917021659566808, "grad_norm": 1.4934338331222534, "learning_rate": 2.131225919080592e-07, "loss": 0.4299, "step": 14862 }, { "epoch": 0.8917621647567049, "grad_norm": 1.5026768445968628, "learning_rate": 2.1288893334642384e-07, "loss": 0.43, "step": 14863 }, { "epoch": 0.8918221635567288, "grad_norm": 1.4236347675323486, "learning_rate": 2.1265539892399765e-07, "loss": 0.3644, "step": 14864 }, { "epoch": 0.8918821623567529, "grad_norm": 1.2499369382858276, "learning_rate": 2.124219886496002e-07, "loss": 0.3329, "step": 14865 }, { "epoch": 0.8919421611567768, "grad_norm": 1.420397400856018, "learning_rate": 2.1218870253204564e-07, "loss": 0.4139, "step": 14866 }, { "epoch": 0.8920021599568009, "grad_norm": 1.4760690927505493, "learning_rate": 2.1195554058014493e-07, "loss": 0.3619, "step": 14867 }, { "epoch": 0.8920621587568248, "grad_norm": 1.3266502618789673, "learning_rate": 2.1172250280270326e-07, "loss": 0.3815, "step": 14868 }, { "epoch": 0.8921221575568489, "grad_norm": 1.248893141746521, "learning_rate": 2.1148958920852118e-07, "loss": 0.3516, "step": 14869 }, { "epoch": 0.8921821563568728, "grad_norm": 1.3036900758743286, "learning_rate": 2.1125679980639449e-07, "loss": 0.349, "step": 14870 }, { "epoch": 0.8922421551568969, "grad_norm": 1.4803733825683594, "learning_rate": 2.110241346051148e-07, "loss": 0.3956, "step": 14871 }, { "epoch": 0.8923021539569208, "grad_norm": 1.2270457744598389, "learning_rate": 2.107915936134702e-07, "loss": 0.3317, "step": 14872 }, { "epoch": 0.8923621527569449, "grad_norm": 1.1718038320541382, "learning_rate": 2.1055917684024062e-07, "loss": 0.344, "step": 14873 }, { "epoch": 0.8924221515569689, "grad_norm": 1.4441676139831543, "learning_rate": 2.1032688429420475e-07, "loss": 0.3426, "step": 14874 }, { "epoch": 0.8924821503569929, "grad_norm": 1.497816801071167, "learning_rate": 2.1009471598413405e-07, "loss": 0.3896, "step": 14875 }, { "epoch": 0.8925421491570169, "grad_norm": 1.3832603693008423, "learning_rate": 2.0986267191879787e-07, "loss": 0.3735, "step": 14876 }, { "epoch": 0.8926021479570408, "grad_norm": 1.4729187488555908, "learning_rate": 2.096307521069583e-07, "loss": 0.3937, "step": 14877 }, { "epoch": 0.8926621467570649, "grad_norm": 1.2817586660385132, "learning_rate": 2.093989565573743e-07, "loss": 0.3792, "step": 14878 }, { "epoch": 0.8927221455570888, "grad_norm": 1.2893836498260498, "learning_rate": 2.0916728527880006e-07, "loss": 0.3497, "step": 14879 }, { "epoch": 0.8927821443571129, "grad_norm": 1.3159034252166748, "learning_rate": 2.089357382799845e-07, "loss": 0.3712, "step": 14880 }, { "epoch": 0.8928421431571368, "grad_norm": 1.4728749990463257, "learning_rate": 2.0870431556967216e-07, "loss": 0.3499, "step": 14881 }, { "epoch": 0.8929021419571609, "grad_norm": 1.4857910871505737, "learning_rate": 2.0847301715660215e-07, "loss": 0.3806, "step": 14882 }, { "epoch": 0.8929621407571848, "grad_norm": 1.3559420108795166, "learning_rate": 2.082418430495101e-07, "loss": 0.4099, "step": 14883 }, { "epoch": 0.8930221395572089, "grad_norm": 1.232386827468872, "learning_rate": 2.0801079325712735e-07, "loss": 0.3541, "step": 14884 }, { "epoch": 0.8930821383572328, "grad_norm": 1.5399045944213867, "learning_rate": 2.0777986778817786e-07, "loss": 0.3631, "step": 14885 }, { "epoch": 0.8931421371572569, "grad_norm": 1.4122978448867798, "learning_rate": 2.0754906665138366e-07, "loss": 0.3732, "step": 14886 }, { "epoch": 0.8932021359572808, "grad_norm": 1.2945492267608643, "learning_rate": 2.073183898554606e-07, "loss": 0.3489, "step": 14887 }, { "epoch": 0.8932621347573049, "grad_norm": 1.3668811321258545, "learning_rate": 2.070878374091209e-07, "loss": 0.3906, "step": 14888 }, { "epoch": 0.8933221335573288, "grad_norm": 1.435460090637207, "learning_rate": 2.068574093210711e-07, "loss": 0.4092, "step": 14889 }, { "epoch": 0.8933821323573529, "grad_norm": 1.3746483325958252, "learning_rate": 2.0662710560001302e-07, "loss": 0.3739, "step": 14890 }, { "epoch": 0.8934421311573768, "grad_norm": 1.4445033073425293, "learning_rate": 2.0639692625464496e-07, "loss": 0.3972, "step": 14891 }, { "epoch": 0.8935021299574009, "grad_norm": 1.3944568634033203, "learning_rate": 2.061668712936598e-07, "loss": 0.4343, "step": 14892 }, { "epoch": 0.8935621287574248, "grad_norm": 1.4042978286743164, "learning_rate": 2.0593694072574457e-07, "loss": 0.3358, "step": 14893 }, { "epoch": 0.8936221275574489, "grad_norm": 1.1749579906463623, "learning_rate": 2.0570713455958328e-07, "loss": 0.3866, "step": 14894 }, { "epoch": 0.8936821263574729, "grad_norm": 1.2538259029388428, "learning_rate": 2.0547745280385516e-07, "loss": 0.3615, "step": 14895 }, { "epoch": 0.8937421251574968, "grad_norm": 1.274543285369873, "learning_rate": 2.0524789546723404e-07, "loss": 0.3983, "step": 14896 }, { "epoch": 0.8938021239575209, "grad_norm": 1.2476149797439575, "learning_rate": 2.0501846255838834e-07, "loss": 0.3217, "step": 14897 }, { "epoch": 0.8938621227575448, "grad_norm": 1.400346279144287, "learning_rate": 2.0478915408598414e-07, "loss": 0.3112, "step": 14898 }, { "epoch": 0.8939221215575689, "grad_norm": 1.4198133945465088, "learning_rate": 2.0455997005868048e-07, "loss": 0.3892, "step": 14899 }, { "epoch": 0.8939821203575928, "grad_norm": 1.4457268714904785, "learning_rate": 2.043309104851322e-07, "loss": 0.3689, "step": 14900 }, { "epoch": 0.8940421191576169, "grad_norm": 1.3777965307235718, "learning_rate": 2.0410197537399093e-07, "loss": 0.4077, "step": 14901 }, { "epoch": 0.8941021179576408, "grad_norm": 1.3408724069595337, "learning_rate": 2.0387316473390143e-07, "loss": 0.3799, "step": 14902 }, { "epoch": 0.8941621167576649, "grad_norm": 1.3657410144805908, "learning_rate": 2.0364447857350643e-07, "loss": 0.3756, "step": 14903 }, { "epoch": 0.8942221155576888, "grad_norm": 1.325953483581543, "learning_rate": 2.034159169014405e-07, "loss": 0.3666, "step": 14904 }, { "epoch": 0.8942821143577129, "grad_norm": 1.5214987993240356, "learning_rate": 2.0318747972633633e-07, "loss": 0.3743, "step": 14905 }, { "epoch": 0.8943421131577368, "grad_norm": 1.3661645650863647, "learning_rate": 2.029591670568207e-07, "loss": 0.35, "step": 14906 }, { "epoch": 0.8944021119577609, "grad_norm": 1.4876350164413452, "learning_rate": 2.027309789015162e-07, "loss": 0.3801, "step": 14907 }, { "epoch": 0.8944621107577848, "grad_norm": 1.4815679788589478, "learning_rate": 2.025029152690403e-07, "loss": 0.3806, "step": 14908 }, { "epoch": 0.8945221095578089, "grad_norm": 1.1801530122756958, "learning_rate": 2.0227497616800583e-07, "loss": 0.3368, "step": 14909 }, { "epoch": 0.8945821083578328, "grad_norm": 1.333187460899353, "learning_rate": 2.0204716160702136e-07, "loss": 0.37, "step": 14910 }, { "epoch": 0.8946421071578569, "grad_norm": 1.3390698432922363, "learning_rate": 2.0181947159468998e-07, "loss": 0.3544, "step": 14911 }, { "epoch": 0.8947021059578808, "grad_norm": 1.3820029497146606, "learning_rate": 2.015919061396106e-07, "loss": 0.3874, "step": 14912 }, { "epoch": 0.8947621047579049, "grad_norm": 1.4138877391815186, "learning_rate": 2.0136446525037772e-07, "loss": 0.3873, "step": 14913 }, { "epoch": 0.8948221035579288, "grad_norm": 1.2655149698257446, "learning_rate": 2.011371489355801e-07, "loss": 0.3312, "step": 14914 }, { "epoch": 0.8948821023579528, "grad_norm": 1.480696439743042, "learning_rate": 2.009099572038034e-07, "loss": 0.3507, "step": 14915 }, { "epoch": 0.8949421011579768, "grad_norm": 1.3147746324539185, "learning_rate": 2.0068289006362623e-07, "loss": 0.3576, "step": 14916 }, { "epoch": 0.8950020999580008, "grad_norm": 1.3688452243804932, "learning_rate": 2.004559475236245e-07, "loss": 0.3627, "step": 14917 }, { "epoch": 0.8950620987580249, "grad_norm": 1.240336298942566, "learning_rate": 2.0022912959236988e-07, "loss": 0.3526, "step": 14918 }, { "epoch": 0.8951220975580488, "grad_norm": 1.2255258560180664, "learning_rate": 2.0000243627842662e-07, "loss": 0.3435, "step": 14919 }, { "epoch": 0.8951820963580729, "grad_norm": 1.5141448974609375, "learning_rate": 1.9977586759035703e-07, "loss": 0.4083, "step": 14920 }, { "epoch": 0.8952420951580968, "grad_norm": 1.371961236000061, "learning_rate": 1.9954942353671645e-07, "loss": 0.3674, "step": 14921 }, { "epoch": 0.8953020939581209, "grad_norm": 1.3107415437698364, "learning_rate": 1.9932310412605785e-07, "loss": 0.4129, "step": 14922 }, { "epoch": 0.8953620927581448, "grad_norm": 1.2470511198043823, "learning_rate": 1.9909690936692804e-07, "loss": 0.3418, "step": 14923 }, { "epoch": 0.8954220915581689, "grad_norm": 1.4053220748901367, "learning_rate": 1.988708392678683e-07, "loss": 0.385, "step": 14924 }, { "epoch": 0.8954820903581928, "grad_norm": 1.4222406148910522, "learning_rate": 1.9864489383741767e-07, "loss": 0.3811, "step": 14925 }, { "epoch": 0.8955420891582169, "grad_norm": 1.2762800455093384, "learning_rate": 1.9841907308410854e-07, "loss": 0.375, "step": 14926 }, { "epoch": 0.8956020879582408, "grad_norm": 1.4611088037490845, "learning_rate": 1.9819337701646904e-07, "loss": 0.3788, "step": 14927 }, { "epoch": 0.8956620867582649, "grad_norm": 1.2635470628738403, "learning_rate": 1.9796780564302186e-07, "loss": 0.3558, "step": 14928 }, { "epoch": 0.8957220855582888, "grad_norm": 1.3656200170516968, "learning_rate": 1.977423589722869e-07, "loss": 0.382, "step": 14929 }, { "epoch": 0.8957820843583129, "grad_norm": 1.3074146509170532, "learning_rate": 1.9751703701277917e-07, "loss": 0.4073, "step": 14930 }, { "epoch": 0.8958420831583368, "grad_norm": 1.3835612535476685, "learning_rate": 1.9729183977300572e-07, "loss": 0.3783, "step": 14931 }, { "epoch": 0.8959020819583609, "grad_norm": 1.3272064924240112, "learning_rate": 1.970667672614726e-07, "loss": 0.3982, "step": 14932 }, { "epoch": 0.8959620807583848, "grad_norm": 1.2421369552612305, "learning_rate": 1.968418194866795e-07, "loss": 0.3122, "step": 14933 }, { "epoch": 0.8960220795584088, "grad_norm": 1.2112587690353394, "learning_rate": 1.9661699645712272e-07, "loss": 0.3341, "step": 14934 }, { "epoch": 0.8960820783584328, "grad_norm": 1.3952479362487793, "learning_rate": 1.9639229818129033e-07, "loss": 0.3608, "step": 14935 }, { "epoch": 0.8961420771584568, "grad_norm": 1.2138725519180298, "learning_rate": 1.9616772466767006e-07, "loss": 0.3777, "step": 14936 }, { "epoch": 0.8962020759584808, "grad_norm": 1.3070085048675537, "learning_rate": 1.9594327592474302e-07, "loss": 0.3457, "step": 14937 }, { "epoch": 0.8962620747585048, "grad_norm": 1.27713143825531, "learning_rate": 1.9571895196098526e-07, "loss": 0.4017, "step": 14938 }, { "epoch": 0.8963220735585288, "grad_norm": 1.2972849607467651, "learning_rate": 1.9549475278486816e-07, "loss": 0.3821, "step": 14939 }, { "epoch": 0.8963820723585528, "grad_norm": 1.314009666442871, "learning_rate": 1.9527067840485845e-07, "loss": 0.3337, "step": 14940 }, { "epoch": 0.8964420711585769, "grad_norm": 1.377332091331482, "learning_rate": 1.9504672882941975e-07, "loss": 0.3511, "step": 14941 }, { "epoch": 0.8965020699586008, "grad_norm": 1.299642562866211, "learning_rate": 1.948229040670087e-07, "loss": 0.3538, "step": 14942 }, { "epoch": 0.8965620687586249, "grad_norm": 1.4856884479522705, "learning_rate": 1.945992041260776e-07, "loss": 0.3886, "step": 14943 }, { "epoch": 0.8966220675586488, "grad_norm": 1.2997571229934692, "learning_rate": 1.9437562901507575e-07, "loss": 0.3537, "step": 14944 }, { "epoch": 0.8966820663586729, "grad_norm": 1.4270310401916504, "learning_rate": 1.9415217874244578e-07, "loss": 0.3811, "step": 14945 }, { "epoch": 0.8967420651586968, "grad_norm": 1.35826575756073, "learning_rate": 1.9392885331662678e-07, "loss": 0.3817, "step": 14946 }, { "epoch": 0.8968020639587209, "grad_norm": 1.3466435670852661, "learning_rate": 1.9370565274605212e-07, "loss": 0.3929, "step": 14947 }, { "epoch": 0.8968620627587448, "grad_norm": 1.269150972366333, "learning_rate": 1.9348257703915154e-07, "loss": 0.4264, "step": 14948 }, { "epoch": 0.8969220615587689, "grad_norm": 1.4163135290145874, "learning_rate": 1.9325962620435026e-07, "loss": 0.3709, "step": 14949 }, { "epoch": 0.8969820603587928, "grad_norm": 1.361045241355896, "learning_rate": 1.930368002500668e-07, "loss": 0.3602, "step": 14950 }, { "epoch": 0.8970420591588169, "grad_norm": 1.2746798992156982, "learning_rate": 1.9281409918471736e-07, "loss": 0.3578, "step": 14951 }, { "epoch": 0.8971020579588408, "grad_norm": 1.3639425039291382, "learning_rate": 1.9259152301671116e-07, "loss": 0.3524, "step": 14952 }, { "epoch": 0.8971620567588648, "grad_norm": 1.4273192882537842, "learning_rate": 1.923690717544555e-07, "loss": 0.4035, "step": 14953 }, { "epoch": 0.8972220555588888, "grad_norm": 1.3377325534820557, "learning_rate": 1.9214674540635025e-07, "loss": 0.334, "step": 14954 }, { "epoch": 0.8972820543589128, "grad_norm": 1.3616869449615479, "learning_rate": 1.919245439807914e-07, "loss": 0.3603, "step": 14955 }, { "epoch": 0.8973420531589368, "grad_norm": 1.4037660360336304, "learning_rate": 1.9170246748617108e-07, "loss": 0.3655, "step": 14956 }, { "epoch": 0.8974020519589608, "grad_norm": 1.4138566255569458, "learning_rate": 1.9148051593087638e-07, "loss": 0.4021, "step": 14957 }, { "epoch": 0.8974620507589848, "grad_norm": 1.4167706966400146, "learning_rate": 1.9125868932328854e-07, "loss": 0.3965, "step": 14958 }, { "epoch": 0.8975220495590088, "grad_norm": 1.3574057817459106, "learning_rate": 1.9103698767178606e-07, "loss": 0.3716, "step": 14959 }, { "epoch": 0.8975820483590328, "grad_norm": 1.3957972526550293, "learning_rate": 1.9081541098474085e-07, "loss": 0.4024, "step": 14960 }, { "epoch": 0.8976420471590568, "grad_norm": 1.347382664680481, "learning_rate": 1.905939592705206e-07, "loss": 0.3524, "step": 14961 }, { "epoch": 0.8977020459590808, "grad_norm": 1.3828901052474976, "learning_rate": 1.903726325374886e-07, "loss": 0.3847, "step": 14962 }, { "epoch": 0.8977620447591048, "grad_norm": 1.2352193593978882, "learning_rate": 1.9015143079400436e-07, "loss": 0.3648, "step": 14963 }, { "epoch": 0.8978220435591289, "grad_norm": 1.5136284828186035, "learning_rate": 1.899303540484204e-07, "loss": 0.4047, "step": 14964 }, { "epoch": 0.8978820423591528, "grad_norm": 1.36552095413208, "learning_rate": 1.8970940230908645e-07, "loss": 0.391, "step": 14965 }, { "epoch": 0.8979420411591769, "grad_norm": 1.3404898643493652, "learning_rate": 1.8948857558434723e-07, "loss": 0.3732, "step": 14966 }, { "epoch": 0.8980020399592008, "grad_norm": 1.2388051748275757, "learning_rate": 1.8926787388254124e-07, "loss": 0.3756, "step": 14967 }, { "epoch": 0.8980620387592249, "grad_norm": 1.311065673828125, "learning_rate": 1.8904729721200425e-07, "loss": 0.3382, "step": 14968 }, { "epoch": 0.8981220375592488, "grad_norm": 1.2555490732192993, "learning_rate": 1.888268455810662e-07, "loss": 0.4032, "step": 14969 }, { "epoch": 0.8981820363592729, "grad_norm": 1.3391958475112915, "learning_rate": 1.8860651899805242e-07, "loss": 0.3786, "step": 14970 }, { "epoch": 0.8982420351592968, "grad_norm": 1.338618278503418, "learning_rate": 1.8838631747128388e-07, "loss": 0.3909, "step": 14971 }, { "epoch": 0.8983020339593208, "grad_norm": 1.3810198307037354, "learning_rate": 1.8816624100907653e-07, "loss": 0.3689, "step": 14972 }, { "epoch": 0.8983620327593448, "grad_norm": 1.367628574371338, "learning_rate": 1.879462896197413e-07, "loss": 0.4012, "step": 14973 }, { "epoch": 0.8984220315593688, "grad_norm": 1.3626296520233154, "learning_rate": 1.877264633115848e-07, "loss": 0.3457, "step": 14974 }, { "epoch": 0.8984820303593928, "grad_norm": 1.3379207849502563, "learning_rate": 1.8750676209290985e-07, "loss": 0.3813, "step": 14975 }, { "epoch": 0.8985420291594168, "grad_norm": 1.4061801433563232, "learning_rate": 1.8728718597201248e-07, "loss": 0.3902, "step": 14976 }, { "epoch": 0.8986020279594408, "grad_norm": 1.3700891733169556, "learning_rate": 1.8706773495718504e-07, "loss": 0.3912, "step": 14977 }, { "epoch": 0.8986620267594648, "grad_norm": 1.3492568731307983, "learning_rate": 1.8684840905671622e-07, "loss": 0.3636, "step": 14978 }, { "epoch": 0.8987220255594888, "grad_norm": 1.351658821105957, "learning_rate": 1.8662920827888746e-07, "loss": 0.3439, "step": 14979 }, { "epoch": 0.8987820243595128, "grad_norm": 1.2425813674926758, "learning_rate": 1.8641013263197892e-07, "loss": 0.3084, "step": 14980 }, { "epoch": 0.8988420231595368, "grad_norm": 1.3473830223083496, "learning_rate": 1.8619118212426228e-07, "loss": 0.3499, "step": 14981 }, { "epoch": 0.8989020219595608, "grad_norm": 1.3914920091629028, "learning_rate": 1.8597235676400687e-07, "loss": 0.3792, "step": 14982 }, { "epoch": 0.8989620207595848, "grad_norm": 1.3218350410461426, "learning_rate": 1.8575365655947734e-07, "loss": 0.3561, "step": 14983 }, { "epoch": 0.8990220195596088, "grad_norm": 1.3194587230682373, "learning_rate": 1.8553508151893217e-07, "loss": 0.3555, "step": 14984 }, { "epoch": 0.8990820183596329, "grad_norm": 1.4996082782745361, "learning_rate": 1.8531663165062668e-07, "loss": 0.4039, "step": 14985 }, { "epoch": 0.8991420171596568, "grad_norm": 1.3672202825546265, "learning_rate": 1.8509830696281e-07, "loss": 0.3479, "step": 14986 }, { "epoch": 0.8992020159596809, "grad_norm": 1.2969565391540527, "learning_rate": 1.8488010746372774e-07, "loss": 0.378, "step": 14987 }, { "epoch": 0.8992620147597048, "grad_norm": 1.4624109268188477, "learning_rate": 1.846620331616205e-07, "loss": 0.369, "step": 14988 }, { "epoch": 0.8993220135597289, "grad_norm": 1.3635060787200928, "learning_rate": 1.84444084064723e-07, "loss": 0.3466, "step": 14989 }, { "epoch": 0.8993820123597528, "grad_norm": 1.4250787496566772, "learning_rate": 1.8422626018126732e-07, "loss": 0.3543, "step": 14990 }, { "epoch": 0.8994420111597768, "grad_norm": 1.2867367267608643, "learning_rate": 1.840085615194788e-07, "loss": 0.3858, "step": 14991 }, { "epoch": 0.8995020099598008, "grad_norm": 1.2593491077423096, "learning_rate": 1.837909880875798e-07, "loss": 0.3479, "step": 14992 }, { "epoch": 0.8995620087598248, "grad_norm": 1.3087600469589233, "learning_rate": 1.8357353989378604e-07, "loss": 0.3365, "step": 14993 }, { "epoch": 0.8996220075598488, "grad_norm": 1.2229958772659302, "learning_rate": 1.8335621694630972e-07, "loss": 0.3671, "step": 14994 }, { "epoch": 0.8996820063598728, "grad_norm": 1.418972373008728, "learning_rate": 1.831390192533599e-07, "loss": 0.3529, "step": 14995 }, { "epoch": 0.8997420051598968, "grad_norm": 1.257562518119812, "learning_rate": 1.829219468231364e-07, "loss": 0.349, "step": 14996 }, { "epoch": 0.8998020039599208, "grad_norm": 1.41378653049469, "learning_rate": 1.8270499966383895e-07, "loss": 0.3597, "step": 14997 }, { "epoch": 0.8998620027599448, "grad_norm": 1.2834490537643433, "learning_rate": 1.824881777836596e-07, "loss": 0.3517, "step": 14998 }, { "epoch": 0.8999220015599688, "grad_norm": 1.3695040941238403, "learning_rate": 1.82271481190788e-07, "loss": 0.3813, "step": 14999 }, { "epoch": 0.8999820003599928, "grad_norm": 1.4282070398330688, "learning_rate": 1.8205490989340682e-07, "loss": 0.3721, "step": 15000 }, { "epoch": 0.9000419991600168, "grad_norm": 1.352622151374817, "learning_rate": 1.8183846389969487e-07, "loss": 0.3211, "step": 15001 }, { "epoch": 0.9001019979600408, "grad_norm": 1.4103699922561646, "learning_rate": 1.8162214321782664e-07, "loss": 0.3699, "step": 15002 }, { "epoch": 0.9001619967600648, "grad_norm": 1.285659909248352, "learning_rate": 1.8140594785597203e-07, "loss": 0.3628, "step": 15003 }, { "epoch": 0.9002219955600888, "grad_norm": 1.231297254562378, "learning_rate": 1.81189877822295e-07, "loss": 0.3438, "step": 15004 }, { "epoch": 0.9002819943601128, "grad_norm": 1.3020912408828735, "learning_rate": 1.8097393312495574e-07, "loss": 0.3421, "step": 15005 }, { "epoch": 0.9003419931601367, "grad_norm": 1.4062660932540894, "learning_rate": 1.807581137721097e-07, "loss": 0.3492, "step": 15006 }, { "epoch": 0.9004019919601608, "grad_norm": 1.4969159364700317, "learning_rate": 1.8054241977190733e-07, "loss": 0.3878, "step": 15007 }, { "epoch": 0.9004619907601848, "grad_norm": 1.2330858707427979, "learning_rate": 1.8032685113249397e-07, "loss": 0.3765, "step": 15008 }, { "epoch": 0.9005219895602088, "grad_norm": 1.2742571830749512, "learning_rate": 1.8011140786201152e-07, "loss": 0.3337, "step": 15009 }, { "epoch": 0.9005819883602328, "grad_norm": 1.2526686191558838, "learning_rate": 1.7989608996859524e-07, "loss": 0.3635, "step": 15010 }, { "epoch": 0.9006419871602568, "grad_norm": 1.1227973699569702, "learning_rate": 1.7968089746037768e-07, "loss": 0.3572, "step": 15011 }, { "epoch": 0.9007019859602808, "grad_norm": 1.2259982824325562, "learning_rate": 1.7946583034548557e-07, "loss": 0.3541, "step": 15012 }, { "epoch": 0.9007619847603048, "grad_norm": 1.2530477046966553, "learning_rate": 1.792508886320402e-07, "loss": 0.3513, "step": 15013 }, { "epoch": 0.9008219835603288, "grad_norm": 1.383908987045288, "learning_rate": 1.7903607232815971e-07, "loss": 0.364, "step": 15014 }, { "epoch": 0.9008819823603528, "grad_norm": 1.305626630783081, "learning_rate": 1.7882138144195685e-07, "loss": 0.3783, "step": 15015 }, { "epoch": 0.9009419811603768, "grad_norm": 1.3374179601669312, "learning_rate": 1.7860681598153894e-07, "loss": 0.385, "step": 15016 }, { "epoch": 0.9010019799604008, "grad_norm": 1.460834264755249, "learning_rate": 1.7839237595500896e-07, "loss": 0.4207, "step": 15017 }, { "epoch": 0.9010619787604248, "grad_norm": 1.3921074867248535, "learning_rate": 1.7817806137046604e-07, "loss": 0.3331, "step": 15018 }, { "epoch": 0.9011219775604488, "grad_norm": 1.2578613758087158, "learning_rate": 1.7796387223600353e-07, "loss": 0.3705, "step": 15019 }, { "epoch": 0.9011819763604728, "grad_norm": 1.404388189315796, "learning_rate": 1.7774980855971e-07, "loss": 0.3805, "step": 15020 }, { "epoch": 0.9012419751604968, "grad_norm": 1.3258907794952393, "learning_rate": 1.7753587034967067e-07, "loss": 0.3823, "step": 15021 }, { "epoch": 0.9013019739605208, "grad_norm": 1.3649004697799683, "learning_rate": 1.7732205761396442e-07, "loss": 0.3724, "step": 15022 }, { "epoch": 0.9013619727605447, "grad_norm": 1.2862927913665771, "learning_rate": 1.771083703606655e-07, "loss": 0.3857, "step": 15023 }, { "epoch": 0.9014219715605688, "grad_norm": 1.4551578760147095, "learning_rate": 1.7689480859784474e-07, "loss": 0.4282, "step": 15024 }, { "epoch": 0.9014819703605927, "grad_norm": 1.318224549293518, "learning_rate": 1.7668137233356662e-07, "loss": 0.362, "step": 15025 }, { "epoch": 0.9015419691606168, "grad_norm": 1.3496230840682983, "learning_rate": 1.7646806157589301e-07, "loss": 0.3612, "step": 15026 }, { "epoch": 0.9016019679606407, "grad_norm": 1.3752763271331787, "learning_rate": 1.7625487633287756e-07, "loss": 0.3573, "step": 15027 }, { "epoch": 0.9016619667606648, "grad_norm": 1.3329261541366577, "learning_rate": 1.7604181661257317e-07, "loss": 0.4182, "step": 15028 }, { "epoch": 0.9017219655606887, "grad_norm": 1.317283034324646, "learning_rate": 1.7582888242302496e-07, "loss": 0.3988, "step": 15029 }, { "epoch": 0.9017819643607128, "grad_norm": 1.34347403049469, "learning_rate": 1.7561607377227573e-07, "loss": 0.3608, "step": 15030 }, { "epoch": 0.9018419631607368, "grad_norm": 1.3551779985427856, "learning_rate": 1.7540339066836126e-07, "loss": 0.3801, "step": 15031 }, { "epoch": 0.9019019619607608, "grad_norm": 1.2842578887939453, "learning_rate": 1.7519083311931343e-07, "loss": 0.2991, "step": 15032 }, { "epoch": 0.9019619607607848, "grad_norm": 1.301171898841858, "learning_rate": 1.7497840113316032e-07, "loss": 0.3687, "step": 15033 }, { "epoch": 0.9020219595608088, "grad_norm": 1.3859120607376099, "learning_rate": 1.7476609471792447e-07, "loss": 0.3594, "step": 15034 }, { "epoch": 0.9020819583608328, "grad_norm": 1.3570539951324463, "learning_rate": 1.7455391388162306e-07, "loss": 0.3477, "step": 15035 }, { "epoch": 0.9021419571608568, "grad_norm": 1.2072540521621704, "learning_rate": 1.7434185863227007e-07, "loss": 0.3485, "step": 15036 }, { "epoch": 0.9022019559608808, "grad_norm": 1.4171454906463623, "learning_rate": 1.7412992897787337e-07, "loss": 0.4071, "step": 15037 }, { "epoch": 0.9022619547609048, "grad_norm": 1.2817856073379517, "learning_rate": 1.739181249264365e-07, "loss": 0.3852, "step": 15038 }, { "epoch": 0.9023219535609288, "grad_norm": 1.3692011833190918, "learning_rate": 1.737064464859579e-07, "loss": 0.3682, "step": 15039 }, { "epoch": 0.9023819523609528, "grad_norm": 1.4319888353347778, "learning_rate": 1.734948936644326e-07, "loss": 0.4116, "step": 15040 }, { "epoch": 0.9024419511609768, "grad_norm": 1.362684965133667, "learning_rate": 1.7328346646985055e-07, "loss": 0.3627, "step": 15041 }, { "epoch": 0.9025019499610007, "grad_norm": 1.358109951019287, "learning_rate": 1.7307216491019428e-07, "loss": 0.3488, "step": 15042 }, { "epoch": 0.9025619487610248, "grad_norm": 1.2859827280044556, "learning_rate": 1.728609889934456e-07, "loss": 0.3494, "step": 15043 }, { "epoch": 0.9026219475610487, "grad_norm": 1.37321937084198, "learning_rate": 1.726499387275785e-07, "loss": 0.4141, "step": 15044 }, { "epoch": 0.9026819463610728, "grad_norm": 1.423475742340088, "learning_rate": 1.7243901412056429e-07, "loss": 0.3811, "step": 15045 }, { "epoch": 0.9027419451610967, "grad_norm": 1.3571361303329468, "learning_rate": 1.72228215180368e-07, "loss": 0.3719, "step": 15046 }, { "epoch": 0.9028019439611208, "grad_norm": 1.4667245149612427, "learning_rate": 1.7201754191495049e-07, "loss": 0.3564, "step": 15047 }, { "epoch": 0.9028619427611447, "grad_norm": 1.4984323978424072, "learning_rate": 1.7180699433226863e-07, "loss": 0.3732, "step": 15048 }, { "epoch": 0.9029219415611688, "grad_norm": 1.3497343063354492, "learning_rate": 1.7159657244027315e-07, "loss": 0.3313, "step": 15049 }, { "epoch": 0.9029819403611927, "grad_norm": 1.4327852725982666, "learning_rate": 1.713862762469112e-07, "loss": 0.3355, "step": 15050 }, { "epoch": 0.9030419391612168, "grad_norm": 1.4066218137741089, "learning_rate": 1.7117610576012383e-07, "loss": 0.3757, "step": 15051 }, { "epoch": 0.9031019379612408, "grad_norm": 1.2462036609649658, "learning_rate": 1.709660609878496e-07, "loss": 0.3417, "step": 15052 }, { "epoch": 0.9031619367612648, "grad_norm": 1.300206184387207, "learning_rate": 1.7075614193801986e-07, "loss": 0.3797, "step": 15053 }, { "epoch": 0.9032219355612888, "grad_norm": 1.459013819694519, "learning_rate": 1.7054634861856232e-07, "loss": 0.3505, "step": 15054 }, { "epoch": 0.9032819343613128, "grad_norm": 1.2781773805618286, "learning_rate": 1.703366810374009e-07, "loss": 0.3749, "step": 15055 }, { "epoch": 0.9033419331613368, "grad_norm": 1.3538483381271362, "learning_rate": 1.7012713920245247e-07, "loss": 0.3964, "step": 15056 }, { "epoch": 0.9034019319613608, "grad_norm": 1.3137600421905518, "learning_rate": 1.69917723121632e-07, "loss": 0.365, "step": 15057 }, { "epoch": 0.9034619307613848, "grad_norm": 1.4897375106811523, "learning_rate": 1.6970843280284663e-07, "loss": 0.38, "step": 15058 }, { "epoch": 0.9035219295614088, "grad_norm": 1.4827913045883179, "learning_rate": 1.6949926825400085e-07, "loss": 0.4287, "step": 15059 }, { "epoch": 0.9035819283614328, "grad_norm": 1.2962102890014648, "learning_rate": 1.6929022948299405e-07, "loss": 0.349, "step": 15060 }, { "epoch": 0.9036419271614567, "grad_norm": 1.355526328086853, "learning_rate": 1.6908131649772097e-07, "loss": 0.4205, "step": 15061 }, { "epoch": 0.9037019259614808, "grad_norm": 1.443403720855713, "learning_rate": 1.6887252930607127e-07, "loss": 0.4078, "step": 15062 }, { "epoch": 0.9037619247615047, "grad_norm": 1.347334384918213, "learning_rate": 1.6866386791592846e-07, "loss": 0.3689, "step": 15063 }, { "epoch": 0.9038219235615288, "grad_norm": 1.2412705421447754, "learning_rate": 1.6845533233517485e-07, "loss": 0.3329, "step": 15064 }, { "epoch": 0.9038819223615527, "grad_norm": 1.3138689994812012, "learning_rate": 1.6824692257168455e-07, "loss": 0.3008, "step": 15065 }, { "epoch": 0.9039419211615768, "grad_norm": 1.44065523147583, "learning_rate": 1.6803863863332786e-07, "loss": 0.3609, "step": 15066 }, { "epoch": 0.9040019199616007, "grad_norm": 1.3143922090530396, "learning_rate": 1.6783048052797227e-07, "loss": 0.3832, "step": 15067 }, { "epoch": 0.9040619187616248, "grad_norm": 1.2765259742736816, "learning_rate": 1.67622448263478e-07, "loss": 0.3896, "step": 15068 }, { "epoch": 0.9041219175616487, "grad_norm": 1.5910507440567017, "learning_rate": 1.6741454184770165e-07, "loss": 0.4234, "step": 15069 }, { "epoch": 0.9041819163616728, "grad_norm": 1.3799089193344116, "learning_rate": 1.672067612884941e-07, "loss": 0.3788, "step": 15070 }, { "epoch": 0.9042419151616967, "grad_norm": 1.6905392408370972, "learning_rate": 1.6699910659370343e-07, "loss": 0.433, "step": 15071 }, { "epoch": 0.9043019139617208, "grad_norm": 1.2522902488708496, "learning_rate": 1.6679157777117194e-07, "loss": 0.3246, "step": 15072 }, { "epoch": 0.9043619127617447, "grad_norm": 1.3719514608383179, "learning_rate": 1.6658417482873566e-07, "loss": 0.3645, "step": 15073 }, { "epoch": 0.9044219115617688, "grad_norm": 1.3286852836608887, "learning_rate": 1.6637689777422875e-07, "loss": 0.3865, "step": 15074 }, { "epoch": 0.9044819103617928, "grad_norm": 1.3654814958572388, "learning_rate": 1.6616974661547795e-07, "loss": 0.3709, "step": 15075 }, { "epoch": 0.9045419091618168, "grad_norm": 1.331442952156067, "learning_rate": 1.6596272136030726e-07, "loss": 0.3435, "step": 15076 }, { "epoch": 0.9046019079618408, "grad_norm": 1.2623660564422607, "learning_rate": 1.657558220165345e-07, "loss": 0.3642, "step": 15077 }, { "epoch": 0.9046619067618648, "grad_norm": 1.3331043720245361, "learning_rate": 1.6554904859197355e-07, "loss": 0.3509, "step": 15078 }, { "epoch": 0.9047219055618888, "grad_norm": 1.2953697443008423, "learning_rate": 1.6534240109443372e-07, "loss": 0.3485, "step": 15079 }, { "epoch": 0.9047819043619127, "grad_norm": 1.364704966545105, "learning_rate": 1.6513587953171838e-07, "loss": 0.3746, "step": 15080 }, { "epoch": 0.9048419031619368, "grad_norm": 1.4431861639022827, "learning_rate": 1.6492948391162708e-07, "loss": 0.374, "step": 15081 }, { "epoch": 0.9049019019619607, "grad_norm": 1.4992766380310059, "learning_rate": 1.6472321424195511e-07, "loss": 0.3906, "step": 15082 }, { "epoch": 0.9049619007619848, "grad_norm": 1.3324086666107178, "learning_rate": 1.6451707053049192e-07, "loss": 0.3723, "step": 15083 }, { "epoch": 0.9050218995620087, "grad_norm": 1.5421172380447388, "learning_rate": 1.6431105278502222e-07, "loss": 0.4175, "step": 15084 }, { "epoch": 0.9050818983620328, "grad_norm": 1.4273954629898071, "learning_rate": 1.6410516101332656e-07, "loss": 0.4077, "step": 15085 }, { "epoch": 0.9051418971620567, "grad_norm": 1.3165401220321655, "learning_rate": 1.6389939522318115e-07, "loss": 0.3484, "step": 15086 }, { "epoch": 0.9052018959620808, "grad_norm": 1.3815479278564453, "learning_rate": 1.6369375542235598e-07, "loss": 0.363, "step": 15087 }, { "epoch": 0.9052618947621047, "grad_norm": 1.321678876876831, "learning_rate": 1.6348824161861758e-07, "loss": 0.3836, "step": 15088 }, { "epoch": 0.9053218935621288, "grad_norm": 1.4028103351593018, "learning_rate": 1.63282853819727e-07, "loss": 0.3735, "step": 15089 }, { "epoch": 0.9053818923621527, "grad_norm": 1.3705298900604248, "learning_rate": 1.6307759203344107e-07, "loss": 0.4003, "step": 15090 }, { "epoch": 0.9054418911621768, "grad_norm": 1.3721466064453125, "learning_rate": 1.628724562675119e-07, "loss": 0.3495, "step": 15091 }, { "epoch": 0.9055018899622007, "grad_norm": 1.3534077405929565, "learning_rate": 1.626674465296858e-07, "loss": 0.368, "step": 15092 }, { "epoch": 0.9055618887622248, "grad_norm": 1.3668668270111084, "learning_rate": 1.6246256282770515e-07, "loss": 0.3724, "step": 15093 }, { "epoch": 0.9056218875622487, "grad_norm": 1.441270112991333, "learning_rate": 1.6225780516930815e-07, "loss": 0.3712, "step": 15094 }, { "epoch": 0.9056818863622728, "grad_norm": 1.2767137289047241, "learning_rate": 1.6205317356222709e-07, "loss": 0.3632, "step": 15095 }, { "epoch": 0.9057418851622967, "grad_norm": 1.2891764640808105, "learning_rate": 1.6184866801419002e-07, "loss": 0.3792, "step": 15096 }, { "epoch": 0.9058018839623208, "grad_norm": 1.3880500793457031, "learning_rate": 1.6164428853291988e-07, "loss": 0.3439, "step": 15097 }, { "epoch": 0.9058618827623448, "grad_norm": 1.3333181142807007, "learning_rate": 1.6144003512613585e-07, "loss": 0.346, "step": 15098 }, { "epoch": 0.9059218815623687, "grad_norm": 1.357107400894165, "learning_rate": 1.6123590780155118e-07, "loss": 0.3507, "step": 15099 }, { "epoch": 0.9059818803623928, "grad_norm": 1.3748104572296143, "learning_rate": 1.6103190656687443e-07, "loss": 0.3588, "step": 15100 }, { "epoch": 0.9060418791624167, "grad_norm": 1.4067020416259766, "learning_rate": 1.6082803142981077e-07, "loss": 0.3569, "step": 15101 }, { "epoch": 0.9061018779624408, "grad_norm": 1.3699790239334106, "learning_rate": 1.606242823980587e-07, "loss": 0.3461, "step": 15102 }, { "epoch": 0.9061618767624647, "grad_norm": 1.4013723134994507, "learning_rate": 1.6042065947931438e-07, "loss": 0.3384, "step": 15103 }, { "epoch": 0.9062218755624888, "grad_norm": 1.342637062072754, "learning_rate": 1.6021716268126547e-07, "loss": 0.3881, "step": 15104 }, { "epoch": 0.9062818743625127, "grad_norm": 1.213321328163147, "learning_rate": 1.6001379201159838e-07, "loss": 0.3452, "step": 15105 }, { "epoch": 0.9063418731625368, "grad_norm": 1.2761483192443848, "learning_rate": 1.5981054747799377e-07, "loss": 0.374, "step": 15106 }, { "epoch": 0.9064018719625607, "grad_norm": 1.3300575017929077, "learning_rate": 1.5960742908812724e-07, "loss": 0.3392, "step": 15107 }, { "epoch": 0.9064618707625848, "grad_norm": 1.339442253112793, "learning_rate": 1.5940443684966892e-07, "loss": 0.3666, "step": 15108 }, { "epoch": 0.9065218695626087, "grad_norm": 1.3160865306854248, "learning_rate": 1.5920157077028507e-07, "loss": 0.3976, "step": 15109 }, { "epoch": 0.9065818683626328, "grad_norm": 1.3431264162063599, "learning_rate": 1.5899883085763804e-07, "loss": 0.3504, "step": 15110 }, { "epoch": 0.9066418671626567, "grad_norm": 1.2127002477645874, "learning_rate": 1.5879621711938324e-07, "loss": 0.2923, "step": 15111 }, { "epoch": 0.9067018659626808, "grad_norm": 1.4711660146713257, "learning_rate": 1.5859372956317214e-07, "loss": 0.3637, "step": 15112 }, { "epoch": 0.9067618647627047, "grad_norm": 1.2536307573318481, "learning_rate": 1.583913681966535e-07, "loss": 0.3561, "step": 15113 }, { "epoch": 0.9068218635627288, "grad_norm": 1.3025128841400146, "learning_rate": 1.5818913302746795e-07, "loss": 0.382, "step": 15114 }, { "epoch": 0.9068818623627527, "grad_norm": 1.2517842054367065, "learning_rate": 1.5798702406325415e-07, "loss": 0.3852, "step": 15115 }, { "epoch": 0.9069418611627768, "grad_norm": 1.375671625137329, "learning_rate": 1.577850413116434e-07, "loss": 0.354, "step": 15116 }, { "epoch": 0.9070018599628007, "grad_norm": 1.3400275707244873, "learning_rate": 1.5758318478026462e-07, "loss": 0.3695, "step": 15117 }, { "epoch": 0.9070618587628247, "grad_norm": 1.343991994857788, "learning_rate": 1.5738145447674212e-07, "loss": 0.3542, "step": 15118 }, { "epoch": 0.9071218575628487, "grad_norm": 1.3113213777542114, "learning_rate": 1.5717985040869204e-07, "loss": 0.3594, "step": 15119 }, { "epoch": 0.9071818563628727, "grad_norm": 1.4434261322021484, "learning_rate": 1.569783725837297e-07, "loss": 0.3625, "step": 15120 }, { "epoch": 0.9072418551628968, "grad_norm": 1.4253548383712769, "learning_rate": 1.5677702100946273e-07, "loss": 0.3774, "step": 15121 }, { "epoch": 0.9073018539629207, "grad_norm": 1.501538872718811, "learning_rate": 1.5657579569349672e-07, "loss": 0.302, "step": 15122 }, { "epoch": 0.9073618527629448, "grad_norm": 1.3392189741134644, "learning_rate": 1.5637469664343035e-07, "loss": 0.382, "step": 15123 }, { "epoch": 0.9074218515629687, "grad_norm": 1.3771520853042603, "learning_rate": 1.5617372386685758e-07, "loss": 0.3793, "step": 15124 }, { "epoch": 0.9074818503629928, "grad_norm": 1.5374982357025146, "learning_rate": 1.5597287737136928e-07, "loss": 0.4144, "step": 15125 }, { "epoch": 0.9075418491630167, "grad_norm": 1.438637375831604, "learning_rate": 1.5577215716455011e-07, "loss": 0.34, "step": 15126 }, { "epoch": 0.9076018479630408, "grad_norm": 1.2217222452163696, "learning_rate": 1.555715632539801e-07, "loss": 0.3606, "step": 15127 }, { "epoch": 0.9076618467630647, "grad_norm": 1.2680230140686035, "learning_rate": 1.553710956472349e-07, "loss": 0.4203, "step": 15128 }, { "epoch": 0.9077218455630888, "grad_norm": 1.3375612497329712, "learning_rate": 1.5517075435188559e-07, "loss": 0.3877, "step": 15129 }, { "epoch": 0.9077818443631127, "grad_norm": 1.3996413946151733, "learning_rate": 1.5497053937549782e-07, "loss": 0.3796, "step": 15130 }, { "epoch": 0.9078418431631368, "grad_norm": 1.3204680681228638, "learning_rate": 1.547704507256321e-07, "loss": 0.4071, "step": 15131 }, { "epoch": 0.9079018419631607, "grad_norm": 1.4544745683670044, "learning_rate": 1.5457048840984672e-07, "loss": 0.3789, "step": 15132 }, { "epoch": 0.9079618407631848, "grad_norm": 1.2790615558624268, "learning_rate": 1.543706524356917e-07, "loss": 0.3647, "step": 15133 }, { "epoch": 0.9080218395632087, "grad_norm": 1.308672308921814, "learning_rate": 1.541709428107144e-07, "loss": 0.3286, "step": 15134 }, { "epoch": 0.9080818383632328, "grad_norm": 1.3855409622192383, "learning_rate": 1.5397135954245756e-07, "loss": 0.3913, "step": 15135 }, { "epoch": 0.9081418371632567, "grad_norm": 1.457381010055542, "learning_rate": 1.537719026384572e-07, "loss": 0.3907, "step": 15136 }, { "epoch": 0.9082018359632807, "grad_norm": 1.4267886877059937, "learning_rate": 1.5357257210624708e-07, "loss": 0.3829, "step": 15137 }, { "epoch": 0.9082618347633047, "grad_norm": 1.4119974374771118, "learning_rate": 1.5337336795335516e-07, "loss": 0.3615, "step": 15138 }, { "epoch": 0.9083218335633287, "grad_norm": 1.5034856796264648, "learning_rate": 1.531742901873035e-07, "loss": 0.4002, "step": 15139 }, { "epoch": 0.9083818323633527, "grad_norm": 1.3630908727645874, "learning_rate": 1.5297533881561031e-07, "loss": 0.3985, "step": 15140 }, { "epoch": 0.9084418311633767, "grad_norm": 1.3130857944488525, "learning_rate": 1.5277651384579032e-07, "loss": 0.3784, "step": 15141 }, { "epoch": 0.9085018299634008, "grad_norm": 1.3706836700439453, "learning_rate": 1.5257781528535123e-07, "loss": 0.3905, "step": 15142 }, { "epoch": 0.9085618287634247, "grad_norm": 1.3946164846420288, "learning_rate": 1.5237924314179691e-07, "loss": 0.3421, "step": 15143 }, { "epoch": 0.9086218275634488, "grad_norm": 1.2674323320388794, "learning_rate": 1.5218079742262763e-07, "loss": 0.3574, "step": 15144 }, { "epoch": 0.9086818263634727, "grad_norm": 1.4502558708190918, "learning_rate": 1.519824781353364e-07, "loss": 0.377, "step": 15145 }, { "epoch": 0.9087418251634968, "grad_norm": 1.3194093704223633, "learning_rate": 1.517842852874134e-07, "loss": 0.3594, "step": 15146 }, { "epoch": 0.9088018239635207, "grad_norm": 1.3105363845825195, "learning_rate": 1.5158621888634381e-07, "loss": 0.35, "step": 15147 }, { "epoch": 0.9088618227635448, "grad_norm": 1.4544380903244019, "learning_rate": 1.5138827893960704e-07, "loss": 0.356, "step": 15148 }, { "epoch": 0.9089218215635687, "grad_norm": 1.2850607633590698, "learning_rate": 1.5119046545467967e-07, "loss": 0.3514, "step": 15149 }, { "epoch": 0.9089818203635928, "grad_norm": 1.4200546741485596, "learning_rate": 1.509927784390302e-07, "loss": 0.3861, "step": 15150 }, { "epoch": 0.9090418191636167, "grad_norm": 1.406830906867981, "learning_rate": 1.5079521790012589e-07, "loss": 0.3606, "step": 15151 }, { "epoch": 0.9091018179636408, "grad_norm": 1.2737936973571777, "learning_rate": 1.5059778384542672e-07, "loss": 0.3846, "step": 15152 }, { "epoch": 0.9091618167636647, "grad_norm": 1.4325413703918457, "learning_rate": 1.5040047628239023e-07, "loss": 0.411, "step": 15153 }, { "epoch": 0.9092218155636888, "grad_norm": 1.4315545558929443, "learning_rate": 1.5020329521846665e-07, "loss": 0.3796, "step": 15154 }, { "epoch": 0.9092818143637127, "grad_norm": 1.4633097648620605, "learning_rate": 1.5000624066110267e-07, "loss": 0.4127, "step": 15155 }, { "epoch": 0.9093418131637367, "grad_norm": 1.3074612617492676, "learning_rate": 1.4980931261774116e-07, "loss": 0.3011, "step": 15156 }, { "epoch": 0.9094018119637607, "grad_norm": 1.45380699634552, "learning_rate": 1.496125110958183e-07, "loss": 0.4206, "step": 15157 }, { "epoch": 0.9094618107637847, "grad_norm": 1.2080377340316772, "learning_rate": 1.4941583610276604e-07, "loss": 0.3581, "step": 15158 }, { "epoch": 0.9095218095638087, "grad_norm": 1.278814673423767, "learning_rate": 1.4921928764601283e-07, "loss": 0.3883, "step": 15159 }, { "epoch": 0.9095818083638327, "grad_norm": 1.3185560703277588, "learning_rate": 1.490228657329809e-07, "loss": 0.4363, "step": 15160 }, { "epoch": 0.9096418071638567, "grad_norm": 1.415663480758667, "learning_rate": 1.488265703710886e-07, "loss": 0.3951, "step": 15161 }, { "epoch": 0.9097018059638807, "grad_norm": 1.353212833404541, "learning_rate": 1.4863040156774842e-07, "loss": 0.3872, "step": 15162 }, { "epoch": 0.9097618047639047, "grad_norm": 1.4334454536437988, "learning_rate": 1.4843435933036902e-07, "loss": 0.3776, "step": 15163 }, { "epoch": 0.9098218035639287, "grad_norm": 1.427623987197876, "learning_rate": 1.4823844366635513e-07, "loss": 0.3944, "step": 15164 }, { "epoch": 0.9098818023639528, "grad_norm": 1.352299451828003, "learning_rate": 1.4804265458310334e-07, "loss": 0.3604, "step": 15165 }, { "epoch": 0.9099418011639767, "grad_norm": 1.2437467575073242, "learning_rate": 1.4784699208800983e-07, "loss": 0.3597, "step": 15166 }, { "epoch": 0.9100017999640008, "grad_norm": 1.3861273527145386, "learning_rate": 1.4765145618846265e-07, "loss": 0.3641, "step": 15167 }, { "epoch": 0.9100617987640247, "grad_norm": 1.4741041660308838, "learning_rate": 1.4745604689184748e-07, "loss": 0.3954, "step": 15168 }, { "epoch": 0.9101217975640488, "grad_norm": 1.4064327478408813, "learning_rate": 1.4726076420554268e-07, "loss": 0.3718, "step": 15169 }, { "epoch": 0.9101817963640727, "grad_norm": 1.3409194946289062, "learning_rate": 1.4706560813692344e-07, "loss": 0.385, "step": 15170 }, { "epoch": 0.9102417951640968, "grad_norm": 1.3985785245895386, "learning_rate": 1.4687057869336068e-07, "loss": 0.3922, "step": 15171 }, { "epoch": 0.9103017939641207, "grad_norm": 1.471846580505371, "learning_rate": 1.466756758822195e-07, "loss": 0.3645, "step": 15172 }, { "epoch": 0.9103617927641448, "grad_norm": 1.3026257753372192, "learning_rate": 1.4648089971086004e-07, "loss": 0.3534, "step": 15173 }, { "epoch": 0.9104217915641687, "grad_norm": 1.373982310295105, "learning_rate": 1.462862501866384e-07, "loss": 0.3728, "step": 15174 }, { "epoch": 0.9104817903641927, "grad_norm": 1.309989094734192, "learning_rate": 1.4609172731690568e-07, "loss": 0.3618, "step": 15175 }, { "epoch": 0.9105417891642167, "grad_norm": 1.3587238788604736, "learning_rate": 1.4589733110900838e-07, "loss": 0.3542, "step": 15176 }, { "epoch": 0.9106017879642407, "grad_norm": 1.263805866241455, "learning_rate": 1.4570306157028672e-07, "loss": 0.4279, "step": 15177 }, { "epoch": 0.9106617867642647, "grad_norm": 1.2741526365280151, "learning_rate": 1.4550891870807902e-07, "loss": 0.3365, "step": 15178 }, { "epoch": 0.9107217855642887, "grad_norm": 1.3722519874572754, "learning_rate": 1.4531490252971658e-07, "loss": 0.3553, "step": 15179 }, { "epoch": 0.9107817843643127, "grad_norm": 1.3387999534606934, "learning_rate": 1.451210130425264e-07, "loss": 0.3667, "step": 15180 }, { "epoch": 0.9108417831643367, "grad_norm": 1.194267988204956, "learning_rate": 1.4492725025383006e-07, "loss": 0.3395, "step": 15181 }, { "epoch": 0.9109017819643607, "grad_norm": 1.4607816934585571, "learning_rate": 1.447336141709457e-07, "loss": 0.4028, "step": 15182 }, { "epoch": 0.9109617807643847, "grad_norm": 1.2975033521652222, "learning_rate": 1.445401048011871e-07, "loss": 0.3805, "step": 15183 }, { "epoch": 0.9110217795644087, "grad_norm": 1.324922800064087, "learning_rate": 1.4434672215186074e-07, "loss": 0.3615, "step": 15184 }, { "epoch": 0.9110817783644327, "grad_norm": 1.4011489152908325, "learning_rate": 1.4415346623027065e-07, "loss": 0.3945, "step": 15185 }, { "epoch": 0.9111417771644567, "grad_norm": 1.4058445692062378, "learning_rate": 1.4396033704371474e-07, "loss": 0.3527, "step": 15186 }, { "epoch": 0.9112017759644807, "grad_norm": 1.4904426336288452, "learning_rate": 1.4376733459948666e-07, "loss": 0.4029, "step": 15187 }, { "epoch": 0.9112617747645048, "grad_norm": 1.3917187452316284, "learning_rate": 1.4357445890487607e-07, "loss": 0.3522, "step": 15188 }, { "epoch": 0.9113217735645287, "grad_norm": 1.2971453666687012, "learning_rate": 1.4338170996716536e-07, "loss": 0.401, "step": 15189 }, { "epoch": 0.9113817723645528, "grad_norm": 1.3243099451065063, "learning_rate": 1.4318908779363533e-07, "loss": 0.3993, "step": 15190 }, { "epoch": 0.9114417711645767, "grad_norm": 1.4710017442703247, "learning_rate": 1.4299659239156009e-07, "loss": 0.4167, "step": 15191 }, { "epoch": 0.9115017699646007, "grad_norm": 1.2676211595535278, "learning_rate": 1.4280422376820883e-07, "loss": 0.341, "step": 15192 }, { "epoch": 0.9115617687646247, "grad_norm": 1.2420012950897217, "learning_rate": 1.426119819308464e-07, "loss": 0.3191, "step": 15193 }, { "epoch": 0.9116217675646487, "grad_norm": 1.5237709283828735, "learning_rate": 1.4241986688673298e-07, "loss": 0.3539, "step": 15194 }, { "epoch": 0.9116817663646727, "grad_norm": 1.3040812015533447, "learning_rate": 1.4222787864312525e-07, "loss": 0.3995, "step": 15195 }, { "epoch": 0.9117417651646967, "grad_norm": 1.3834081888198853, "learning_rate": 1.4203601720727137e-07, "loss": 0.3575, "step": 15196 }, { "epoch": 0.9118017639647207, "grad_norm": 1.435463547706604, "learning_rate": 1.4184428258641912e-07, "loss": 0.3636, "step": 15197 }, { "epoch": 0.9118617627647447, "grad_norm": 1.5221672058105469, "learning_rate": 1.4165267478780768e-07, "loss": 0.3388, "step": 15198 }, { "epoch": 0.9119217615647687, "grad_norm": 1.307532787322998, "learning_rate": 1.4146119381867512e-07, "loss": 0.3934, "step": 15199 }, { "epoch": 0.9119817603647927, "grad_norm": 1.3930102586746216, "learning_rate": 1.412698396862513e-07, "loss": 0.376, "step": 15200 }, { "epoch": 0.9120417591648167, "grad_norm": 1.1772191524505615, "learning_rate": 1.4107861239776298e-07, "loss": 0.3693, "step": 15201 }, { "epoch": 0.9121017579648407, "grad_norm": 1.533892273902893, "learning_rate": 1.408875119604331e-07, "loss": 0.3237, "step": 15202 }, { "epoch": 0.9121617567648647, "grad_norm": 1.3502591848373413, "learning_rate": 1.4069653838147748e-07, "loss": 0.3765, "step": 15203 }, { "epoch": 0.9122217555648887, "grad_norm": 1.1861605644226074, "learning_rate": 1.4050569166810821e-07, "loss": 0.3333, "step": 15204 }, { "epoch": 0.9122817543649127, "grad_norm": 1.4277945756912231, "learning_rate": 1.4031497182753337e-07, "loss": 0.3469, "step": 15205 }, { "epoch": 0.9123417531649367, "grad_norm": 1.2347921133041382, "learning_rate": 1.4012437886695568e-07, "loss": 0.3192, "step": 15206 }, { "epoch": 0.9124017519649607, "grad_norm": 1.2842081785202026, "learning_rate": 1.399339127935727e-07, "loss": 0.4157, "step": 15207 }, { "epoch": 0.9124617507649847, "grad_norm": 1.4262518882751465, "learning_rate": 1.3974357361457666e-07, "loss": 0.369, "step": 15208 }, { "epoch": 0.9125217495650088, "grad_norm": 1.3186308145523071, "learning_rate": 1.3955336133715746e-07, "loss": 0.4381, "step": 15209 }, { "epoch": 0.9125817483650327, "grad_norm": 1.3711581230163574, "learning_rate": 1.3936327596849712e-07, "loss": 0.3573, "step": 15210 }, { "epoch": 0.9126417471650567, "grad_norm": 1.3108863830566406, "learning_rate": 1.3917331751577465e-07, "loss": 0.3888, "step": 15211 }, { "epoch": 0.9127017459650807, "grad_norm": 1.2840121984481812, "learning_rate": 1.389834859861644e-07, "loss": 0.405, "step": 15212 }, { "epoch": 0.9127617447651047, "grad_norm": 1.3053511381149292, "learning_rate": 1.3879378138683478e-07, "loss": 0.3627, "step": 15213 }, { "epoch": 0.9128217435651287, "grad_norm": 1.316501259803772, "learning_rate": 1.3860420372495124e-07, "loss": 0.3361, "step": 15214 }, { "epoch": 0.9128817423651527, "grad_norm": 1.4183021783828735, "learning_rate": 1.3841475300767175e-07, "loss": 0.387, "step": 15215 }, { "epoch": 0.9129417411651767, "grad_norm": 1.3082813024520874, "learning_rate": 1.3822542924215158e-07, "loss": 0.3578, "step": 15216 }, { "epoch": 0.9130017399652007, "grad_norm": 1.377676010131836, "learning_rate": 1.3803623243554135e-07, "loss": 0.3806, "step": 15217 }, { "epoch": 0.9130617387652247, "grad_norm": 1.3139132261276245, "learning_rate": 1.378471625949851e-07, "loss": 0.3504, "step": 15218 }, { "epoch": 0.9131217375652487, "grad_norm": 1.4719007015228271, "learning_rate": 1.3765821972762404e-07, "loss": 0.3783, "step": 15219 }, { "epoch": 0.9131817363652727, "grad_norm": 1.2526638507843018, "learning_rate": 1.3746940384059254e-07, "loss": 0.3619, "step": 15220 }, { "epoch": 0.9132417351652967, "grad_norm": 1.4311494827270508, "learning_rate": 1.372807149410225e-07, "loss": 0.4247, "step": 15221 }, { "epoch": 0.9133017339653207, "grad_norm": 1.2662633657455444, "learning_rate": 1.3709215303603934e-07, "loss": 0.4137, "step": 15222 }, { "epoch": 0.9133617327653447, "grad_norm": 1.1634007692337036, "learning_rate": 1.3690371813276368e-07, "loss": 0.3505, "step": 15223 }, { "epoch": 0.9134217315653687, "grad_norm": 1.2418651580810547, "learning_rate": 1.367154102383132e-07, "loss": 0.3603, "step": 15224 }, { "epoch": 0.9134817303653927, "grad_norm": 1.3323293924331665, "learning_rate": 1.36527229359798e-07, "loss": 0.4107, "step": 15225 }, { "epoch": 0.9135417291654166, "grad_norm": 1.3587440252304077, "learning_rate": 1.363391755043256e-07, "loss": 0.3601, "step": 15226 }, { "epoch": 0.9136017279654407, "grad_norm": 1.4071223735809326, "learning_rate": 1.361512486789977e-07, "loss": 0.3649, "step": 15227 }, { "epoch": 0.9136617267654646, "grad_norm": 1.5091311931610107, "learning_rate": 1.359634488909112e-07, "loss": 0.3934, "step": 15228 }, { "epoch": 0.9137217255654887, "grad_norm": 1.3786064386367798, "learning_rate": 1.3577577614715892e-07, "loss": 0.3677, "step": 15229 }, { "epoch": 0.9137817243655126, "grad_norm": 1.2228797674179077, "learning_rate": 1.3558823045482842e-07, "loss": 0.3707, "step": 15230 }, { "epoch": 0.9138417231655367, "grad_norm": 1.2431188821792603, "learning_rate": 1.354008118210024e-07, "loss": 0.3743, "step": 15231 }, { "epoch": 0.9139017219655607, "grad_norm": 1.4224408864974976, "learning_rate": 1.3521352025275795e-07, "loss": 0.3125, "step": 15232 }, { "epoch": 0.9139617207655847, "grad_norm": 1.356225848197937, "learning_rate": 1.350263557571692e-07, "loss": 0.3799, "step": 15233 }, { "epoch": 0.9140217195656087, "grad_norm": 1.3000109195709229, "learning_rate": 1.3483931834130475e-07, "loss": 0.3633, "step": 15234 }, { "epoch": 0.9140817183656327, "grad_norm": 1.211515188217163, "learning_rate": 1.346524080122266e-07, "loss": 0.4053, "step": 15235 }, { "epoch": 0.9141417171656567, "grad_norm": 1.3880478143692017, "learning_rate": 1.3446562477699525e-07, "loss": 0.3642, "step": 15236 }, { "epoch": 0.9142017159656807, "grad_norm": 1.4859731197357178, "learning_rate": 1.3427896864266375e-07, "loss": 0.3779, "step": 15237 }, { "epoch": 0.9142617147657047, "grad_norm": 1.263504981994629, "learning_rate": 1.340924396162817e-07, "loss": 0.347, "step": 15238 }, { "epoch": 0.9143217135657287, "grad_norm": 1.3625725507736206, "learning_rate": 1.3390603770489246e-07, "loss": 0.368, "step": 15239 }, { "epoch": 0.9143817123657527, "grad_norm": 1.2928088903427124, "learning_rate": 1.3371976291553668e-07, "loss": 0.3375, "step": 15240 }, { "epoch": 0.9144417111657767, "grad_norm": 1.3350474834442139, "learning_rate": 1.3353361525524916e-07, "loss": 0.3662, "step": 15241 }, { "epoch": 0.9145017099658007, "grad_norm": 1.3929662704467773, "learning_rate": 1.3334759473105888e-07, "loss": 0.4124, "step": 15242 }, { "epoch": 0.9145617087658247, "grad_norm": 1.3545005321502686, "learning_rate": 1.3316170134999138e-07, "loss": 0.362, "step": 15243 }, { "epoch": 0.9146217075658487, "grad_norm": 1.4636342525482178, "learning_rate": 1.3297593511906746e-07, "loss": 0.3914, "step": 15244 }, { "epoch": 0.9146817063658726, "grad_norm": 1.2548974752426147, "learning_rate": 1.3279029604530213e-07, "loss": 0.3781, "step": 15245 }, { "epoch": 0.9147417051658967, "grad_norm": 1.2809439897537231, "learning_rate": 1.3260478413570685e-07, "loss": 0.3565, "step": 15246 }, { "epoch": 0.9148017039659206, "grad_norm": 1.3291809558868408, "learning_rate": 1.324193993972862e-07, "loss": 0.3853, "step": 15247 }, { "epoch": 0.9148617027659447, "grad_norm": 1.383307695388794, "learning_rate": 1.3223414183704307e-07, "loss": 0.334, "step": 15248 }, { "epoch": 0.9149217015659686, "grad_norm": 1.3125663995742798, "learning_rate": 1.3204901146197268e-07, "loss": 0.342, "step": 15249 }, { "epoch": 0.9149817003659927, "grad_norm": 1.331308364868164, "learning_rate": 1.318640082790667e-07, "loss": 0.385, "step": 15250 }, { "epoch": 0.9150416991660166, "grad_norm": 1.3871891498565674, "learning_rate": 1.3167913229531137e-07, "loss": 0.3648, "step": 15251 }, { "epoch": 0.9151016979660407, "grad_norm": 1.2952436208724976, "learning_rate": 1.3149438351768982e-07, "loss": 0.3609, "step": 15252 }, { "epoch": 0.9151616967660646, "grad_norm": 1.360060453414917, "learning_rate": 1.3130976195317863e-07, "loss": 0.4016, "step": 15253 }, { "epoch": 0.9152216955660887, "grad_norm": 1.2789978981018066, "learning_rate": 1.311252676087492e-07, "loss": 0.3594, "step": 15254 }, { "epoch": 0.9152816943661127, "grad_norm": 1.2905843257904053, "learning_rate": 1.3094090049137074e-07, "loss": 0.3898, "step": 15255 }, { "epoch": 0.9153416931661367, "grad_norm": 1.300058126449585, "learning_rate": 1.30756660608005e-07, "loss": 0.4042, "step": 15256 }, { "epoch": 0.9154016919661607, "grad_norm": 1.2846322059631348, "learning_rate": 1.3057254796560907e-07, "loss": 0.344, "step": 15257 }, { "epoch": 0.9154616907661847, "grad_norm": 1.3635399341583252, "learning_rate": 1.3038856257113774e-07, "loss": 0.376, "step": 15258 }, { "epoch": 0.9155216895662087, "grad_norm": 1.3331722021102905, "learning_rate": 1.3020470443153799e-07, "loss": 0.3564, "step": 15259 }, { "epoch": 0.9155816883662327, "grad_norm": 1.323292851448059, "learning_rate": 1.3002097355375486e-07, "loss": 0.4035, "step": 15260 }, { "epoch": 0.9156416871662567, "grad_norm": 1.282466173171997, "learning_rate": 1.298373699447249e-07, "loss": 0.35, "step": 15261 }, { "epoch": 0.9157016859662807, "grad_norm": 1.4190353155136108, "learning_rate": 1.2965389361138342e-07, "loss": 0.3944, "step": 15262 }, { "epoch": 0.9157616847663047, "grad_norm": 1.6073802709579468, "learning_rate": 1.2947054456065875e-07, "loss": 0.3932, "step": 15263 }, { "epoch": 0.9158216835663286, "grad_norm": 1.3564231395721436, "learning_rate": 1.292873227994762e-07, "loss": 0.3231, "step": 15264 }, { "epoch": 0.9158816823663527, "grad_norm": 1.3246263265609741, "learning_rate": 1.2910422833475433e-07, "loss": 0.3912, "step": 15265 }, { "epoch": 0.9159416811663766, "grad_norm": 1.3094412088394165, "learning_rate": 1.289212611734075e-07, "loss": 0.3474, "step": 15266 }, { "epoch": 0.9160016799664007, "grad_norm": 1.3005996942520142, "learning_rate": 1.2873842132234664e-07, "loss": 0.3578, "step": 15267 }, { "epoch": 0.9160616787664246, "grad_norm": 1.3934834003448486, "learning_rate": 1.2855570878847595e-07, "loss": 0.3977, "step": 15268 }, { "epoch": 0.9161216775664487, "grad_norm": 1.3697073459625244, "learning_rate": 1.283731235786954e-07, "loss": 0.4166, "step": 15269 }, { "epoch": 0.9161816763664726, "grad_norm": 1.360495924949646, "learning_rate": 1.281906656999015e-07, "loss": 0.3429, "step": 15270 }, { "epoch": 0.9162416751664967, "grad_norm": 1.3018262386322021, "learning_rate": 1.2800833515898413e-07, "loss": 0.3698, "step": 15271 }, { "epoch": 0.9163016739665206, "grad_norm": 1.3867594003677368, "learning_rate": 1.2782613196282928e-07, "loss": 0.388, "step": 15272 }, { "epoch": 0.9163616727665447, "grad_norm": 1.3267381191253662, "learning_rate": 1.2764405611831747e-07, "loss": 0.4128, "step": 15273 }, { "epoch": 0.9164216715665686, "grad_norm": 1.4665991067886353, "learning_rate": 1.274621076323254e-07, "loss": 0.4228, "step": 15274 }, { "epoch": 0.9164816703665927, "grad_norm": 1.3145787715911865, "learning_rate": 1.272802865117243e-07, "loss": 0.4158, "step": 15275 }, { "epoch": 0.9165416691666166, "grad_norm": 1.3939889669418335, "learning_rate": 1.270985927633807e-07, "loss": 0.4079, "step": 15276 }, { "epoch": 0.9166016679666407, "grad_norm": 1.260646939277649, "learning_rate": 1.2691702639415652e-07, "loss": 0.3341, "step": 15277 }, { "epoch": 0.9166616667666647, "grad_norm": 1.299250841140747, "learning_rate": 1.2673558741090824e-07, "loss": 0.3209, "step": 15278 }, { "epoch": 0.9167216655666887, "grad_norm": 1.29557204246521, "learning_rate": 1.265542758204884e-07, "loss": 0.3822, "step": 15279 }, { "epoch": 0.9167816643667127, "grad_norm": 1.4257618188858032, "learning_rate": 1.2637309162974451e-07, "loss": 0.3992, "step": 15280 }, { "epoch": 0.9168416631667367, "grad_norm": 1.3130098581314087, "learning_rate": 1.2619203484551833e-07, "loss": 0.3442, "step": 15281 }, { "epoch": 0.9169016619667607, "grad_norm": 1.3545254468917847, "learning_rate": 1.26011105474648e-07, "loss": 0.413, "step": 15282 }, { "epoch": 0.9169616607667846, "grad_norm": 1.5429378747940063, "learning_rate": 1.258303035239667e-07, "loss": 0.3703, "step": 15283 }, { "epoch": 0.9170216595668087, "grad_norm": 1.3266323804855347, "learning_rate": 1.256496290003017e-07, "loss": 0.3798, "step": 15284 }, { "epoch": 0.9170816583668326, "grad_norm": 1.5173605680465698, "learning_rate": 1.2546908191047685e-07, "loss": 0.3826, "step": 15285 }, { "epoch": 0.9171416571668567, "grad_norm": 1.3817723989486694, "learning_rate": 1.2528866226131014e-07, "loss": 0.4006, "step": 15286 }, { "epoch": 0.9172016559668806, "grad_norm": 1.4531329870224, "learning_rate": 1.2510837005961646e-07, "loss": 0.3728, "step": 15287 }, { "epoch": 0.9172616547669047, "grad_norm": 1.4071801900863647, "learning_rate": 1.249282053122029e-07, "loss": 0.3569, "step": 15288 }, { "epoch": 0.9173216535669286, "grad_norm": 1.240336537361145, "learning_rate": 1.2474816802587424e-07, "loss": 0.3136, "step": 15289 }, { "epoch": 0.9173816523669527, "grad_norm": 1.213482141494751, "learning_rate": 1.2456825820742984e-07, "loss": 0.2914, "step": 15290 }, { "epoch": 0.9174416511669766, "grad_norm": 1.4029412269592285, "learning_rate": 1.2438847586366435e-07, "loss": 0.3575, "step": 15291 }, { "epoch": 0.9175016499670007, "grad_norm": 1.4954819679260254, "learning_rate": 1.2420882100136627e-07, "loss": 0.4377, "step": 15292 }, { "epoch": 0.9175616487670246, "grad_norm": 1.2713903188705444, "learning_rate": 1.2402929362732052e-07, "loss": 0.3616, "step": 15293 }, { "epoch": 0.9176216475670487, "grad_norm": 1.494905948638916, "learning_rate": 1.2384989374830818e-07, "loss": 0.3924, "step": 15294 }, { "epoch": 0.9176816463670726, "grad_norm": 1.3841438293457031, "learning_rate": 1.2367062137110374e-07, "loss": 0.3825, "step": 15295 }, { "epoch": 0.9177416451670967, "grad_norm": 1.31086266040802, "learning_rate": 1.2349147650247737e-07, "loss": 0.3515, "step": 15296 }, { "epoch": 0.9178016439671206, "grad_norm": 1.2306643724441528, "learning_rate": 1.2331245914919425e-07, "loss": 0.3101, "step": 15297 }, { "epoch": 0.9178616427671447, "grad_norm": 1.3828535079956055, "learning_rate": 1.231335693180156e-07, "loss": 0.3619, "step": 15298 }, { "epoch": 0.9179216415671687, "grad_norm": 1.298545002937317, "learning_rate": 1.229548070156973e-07, "loss": 0.3424, "step": 15299 }, { "epoch": 0.9179816403671927, "grad_norm": 1.2357847690582275, "learning_rate": 1.2277617224898963e-07, "loss": 0.3214, "step": 15300 }, { "epoch": 0.9180416391672167, "grad_norm": 1.249901533126831, "learning_rate": 1.2259766502463953e-07, "loss": 0.3594, "step": 15301 }, { "epoch": 0.9181016379672406, "grad_norm": 1.4314618110656738, "learning_rate": 1.2241928534938845e-07, "loss": 0.4169, "step": 15302 }, { "epoch": 0.9181616367672647, "grad_norm": 1.207660436630249, "learning_rate": 1.2224103322997276e-07, "loss": 0.3633, "step": 15303 }, { "epoch": 0.9182216355672886, "grad_norm": 1.4072222709655762, "learning_rate": 1.220629086731238e-07, "loss": 0.3683, "step": 15304 }, { "epoch": 0.9182816343673127, "grad_norm": 1.4823962450027466, "learning_rate": 1.2188491168556902e-07, "loss": 0.4204, "step": 15305 }, { "epoch": 0.9183416331673366, "grad_norm": 1.4373000860214233, "learning_rate": 1.2170704227403124e-07, "loss": 0.4329, "step": 15306 }, { "epoch": 0.9184016319673607, "grad_norm": 1.2778347730636597, "learning_rate": 1.215293004452258e-07, "loss": 0.3156, "step": 15307 }, { "epoch": 0.9184616307673846, "grad_norm": 1.4881807565689087, "learning_rate": 1.213516862058674e-07, "loss": 0.3818, "step": 15308 }, { "epoch": 0.9185216295674087, "grad_norm": 1.3595943450927734, "learning_rate": 1.2117419956266207e-07, "loss": 0.3787, "step": 15309 }, { "epoch": 0.9185816283674326, "grad_norm": 1.3122962713241577, "learning_rate": 1.2099684052231359e-07, "loss": 0.3448, "step": 15310 }, { "epoch": 0.9186416271674567, "grad_norm": 1.3188244104385376, "learning_rate": 1.2081960909151985e-07, "loss": 0.3382, "step": 15311 }, { "epoch": 0.9187016259674806, "grad_norm": 1.304495930671692, "learning_rate": 1.2064250527697372e-07, "loss": 0.3216, "step": 15312 }, { "epoch": 0.9187616247675047, "grad_norm": 1.284849762916565, "learning_rate": 1.2046552908536385e-07, "loss": 0.3425, "step": 15313 }, { "epoch": 0.9188216235675286, "grad_norm": 1.473496437072754, "learning_rate": 1.202886805233741e-07, "loss": 0.3996, "step": 15314 }, { "epoch": 0.9188816223675527, "grad_norm": 1.3578674793243408, "learning_rate": 1.2011195959768261e-07, "loss": 0.3295, "step": 15315 }, { "epoch": 0.9189416211675766, "grad_norm": 1.3339062929153442, "learning_rate": 1.1993536631496399e-07, "loss": 0.3746, "step": 15316 }, { "epoch": 0.9190016199676007, "grad_norm": 1.3603745698928833, "learning_rate": 1.197589006818866e-07, "loss": 0.36, "step": 15317 }, { "epoch": 0.9190616187676246, "grad_norm": 1.2344647645950317, "learning_rate": 1.1958256270511614e-07, "loss": 0.3305, "step": 15318 }, { "epoch": 0.9191216175676487, "grad_norm": 1.5862510204315186, "learning_rate": 1.1940635239131046e-07, "loss": 0.3211, "step": 15319 }, { "epoch": 0.9191816163676726, "grad_norm": 1.292120337486267, "learning_rate": 1.1923026974712519e-07, "loss": 0.4048, "step": 15320 }, { "epoch": 0.9192416151676966, "grad_norm": 1.2348710298538208, "learning_rate": 1.1905431477920925e-07, "loss": 0.365, "step": 15321 }, { "epoch": 0.9193016139677207, "grad_norm": 1.1380841732025146, "learning_rate": 1.1887848749420887e-07, "loss": 0.3631, "step": 15322 }, { "epoch": 0.9193616127677446, "grad_norm": 1.2922701835632324, "learning_rate": 1.1870278789876371e-07, "loss": 0.3401, "step": 15323 }, { "epoch": 0.9194216115677687, "grad_norm": 1.4302436113357544, "learning_rate": 1.1852721599950871e-07, "loss": 0.3763, "step": 15324 }, { "epoch": 0.9194816103677926, "grad_norm": 1.2743351459503174, "learning_rate": 1.1835177180307498e-07, "loss": 0.3562, "step": 15325 }, { "epoch": 0.9195416091678167, "grad_norm": 1.1688321828842163, "learning_rate": 1.1817645531608817e-07, "loss": 0.3432, "step": 15326 }, { "epoch": 0.9196016079678406, "grad_norm": 1.3791528940200806, "learning_rate": 1.1800126654516885e-07, "loss": 0.3162, "step": 15327 }, { "epoch": 0.9196616067678647, "grad_norm": 1.3926818370819092, "learning_rate": 1.1782620549693373e-07, "loss": 0.3805, "step": 15328 }, { "epoch": 0.9197216055678886, "grad_norm": 1.1575266122817993, "learning_rate": 1.1765127217799331e-07, "loss": 0.3961, "step": 15329 }, { "epoch": 0.9197816043679127, "grad_norm": 1.2444465160369873, "learning_rate": 1.1747646659495497e-07, "loss": 0.3293, "step": 15330 }, { "epoch": 0.9198416031679366, "grad_norm": 1.2233701944351196, "learning_rate": 1.173017887544191e-07, "loss": 0.3942, "step": 15331 }, { "epoch": 0.9199016019679607, "grad_norm": 1.3291760683059692, "learning_rate": 1.1712723866298336e-07, "loss": 0.3635, "step": 15332 }, { "epoch": 0.9199616007679846, "grad_norm": 1.483969807624817, "learning_rate": 1.1695281632723958e-07, "loss": 0.363, "step": 15333 }, { "epoch": 0.9200215995680087, "grad_norm": 1.363627552986145, "learning_rate": 1.1677852175377457e-07, "loss": 0.3764, "step": 15334 }, { "epoch": 0.9200815983680326, "grad_norm": 1.3399364948272705, "learning_rate": 1.166043549491712e-07, "loss": 0.3782, "step": 15335 }, { "epoch": 0.9201415971680567, "grad_norm": 1.4393181800842285, "learning_rate": 1.1643031592000618e-07, "loss": 0.4132, "step": 15336 }, { "epoch": 0.9202015959680806, "grad_norm": 1.3323928117752075, "learning_rate": 1.1625640467285309e-07, "loss": 0.3682, "step": 15337 }, { "epoch": 0.9202615947681047, "grad_norm": 1.3688936233520508, "learning_rate": 1.1608262121427887e-07, "loss": 0.3349, "step": 15338 }, { "epoch": 0.9203215935681286, "grad_norm": 1.4869699478149414, "learning_rate": 1.1590896555084701e-07, "loss": 0.3411, "step": 15339 }, { "epoch": 0.9203815923681526, "grad_norm": 1.3265576362609863, "learning_rate": 1.1573543768911593e-07, "loss": 0.3506, "step": 15340 }, { "epoch": 0.9204415911681766, "grad_norm": 1.3232223987579346, "learning_rate": 1.155620376356386e-07, "loss": 0.3781, "step": 15341 }, { "epoch": 0.9205015899682006, "grad_norm": 1.3862274885177612, "learning_rate": 1.1538876539696375e-07, "loss": 0.3584, "step": 15342 }, { "epoch": 0.9205615887682246, "grad_norm": 1.3609645366668701, "learning_rate": 1.1521562097963461e-07, "loss": 0.3629, "step": 15343 }, { "epoch": 0.9206215875682486, "grad_norm": 1.5248774290084839, "learning_rate": 1.1504260439019098e-07, "loss": 0.3788, "step": 15344 }, { "epoch": 0.9206815863682727, "grad_norm": 1.2611241340637207, "learning_rate": 1.14869715635166e-07, "loss": 0.3668, "step": 15345 }, { "epoch": 0.9207415851682966, "grad_norm": 1.312252163887024, "learning_rate": 1.1469695472108898e-07, "loss": 0.3876, "step": 15346 }, { "epoch": 0.9208015839683207, "grad_norm": 1.4610258340835571, "learning_rate": 1.1452432165448529e-07, "loss": 0.4033, "step": 15347 }, { "epoch": 0.9208615827683446, "grad_norm": 1.3274847269058228, "learning_rate": 1.143518164418733e-07, "loss": 0.3885, "step": 15348 }, { "epoch": 0.9209215815683687, "grad_norm": 1.4133541584014893, "learning_rate": 1.1417943908976873e-07, "loss": 0.3863, "step": 15349 }, { "epoch": 0.9209815803683926, "grad_norm": 1.3512731790542603, "learning_rate": 1.1400718960468026e-07, "loss": 0.4064, "step": 15350 }, { "epoch": 0.9210415791684167, "grad_norm": 1.4698512554168701, "learning_rate": 1.1383506799311343e-07, "loss": 0.3958, "step": 15351 }, { "epoch": 0.9211015779684406, "grad_norm": 1.4976024627685547, "learning_rate": 1.1366307426156996e-07, "loss": 0.3893, "step": 15352 }, { "epoch": 0.9211615767684647, "grad_norm": 1.3340946435928345, "learning_rate": 1.1349120841654336e-07, "loss": 0.3736, "step": 15353 }, { "epoch": 0.9212215755684886, "grad_norm": 1.3903619050979614, "learning_rate": 1.1331947046452484e-07, "loss": 0.4081, "step": 15354 }, { "epoch": 0.9212815743685127, "grad_norm": 1.371537446975708, "learning_rate": 1.1314786041200014e-07, "loss": 0.3345, "step": 15355 }, { "epoch": 0.9213415731685366, "grad_norm": 1.3735065460205078, "learning_rate": 1.1297637826545076e-07, "loss": 0.3281, "step": 15356 }, { "epoch": 0.9214015719685607, "grad_norm": 1.3692636489868164, "learning_rate": 1.1280502403135233e-07, "loss": 0.3959, "step": 15357 }, { "epoch": 0.9214615707685846, "grad_norm": 1.2903468608856201, "learning_rate": 1.1263379771617548e-07, "loss": 0.3486, "step": 15358 }, { "epoch": 0.9215215695686086, "grad_norm": 1.3886032104492188, "learning_rate": 1.1246269932638808e-07, "loss": 0.3695, "step": 15359 }, { "epoch": 0.9215815683686326, "grad_norm": 1.2098578214645386, "learning_rate": 1.1229172886845063e-07, "loss": 0.3523, "step": 15360 }, { "epoch": 0.9216415671686566, "grad_norm": 1.26500403881073, "learning_rate": 1.1212088634882012e-07, "loss": 0.3171, "step": 15361 }, { "epoch": 0.9217015659686806, "grad_norm": 1.4790644645690918, "learning_rate": 1.119501717739485e-07, "loss": 0.3947, "step": 15362 }, { "epoch": 0.9217615647687046, "grad_norm": 1.3113348484039307, "learning_rate": 1.1177958515028269e-07, "loss": 0.368, "step": 15363 }, { "epoch": 0.9218215635687286, "grad_norm": 1.408424973487854, "learning_rate": 1.1160912648426645e-07, "loss": 0.319, "step": 15364 }, { "epoch": 0.9218815623687526, "grad_norm": 1.5067979097366333, "learning_rate": 1.1143879578233467e-07, "loss": 0.3857, "step": 15365 }, { "epoch": 0.9219415611687767, "grad_norm": 1.2131073474884033, "learning_rate": 1.1126859305092219e-07, "loss": 0.3472, "step": 15366 }, { "epoch": 0.9220015599688006, "grad_norm": 1.3957282304763794, "learning_rate": 1.1109851829645528e-07, "loss": 0.3638, "step": 15367 }, { "epoch": 0.9220615587688247, "grad_norm": 1.3681960105895996, "learning_rate": 1.1092857152535795e-07, "loss": 0.3684, "step": 15368 }, { "epoch": 0.9221215575688486, "grad_norm": 1.2813059091567993, "learning_rate": 1.1075875274404833e-07, "loss": 0.3262, "step": 15369 }, { "epoch": 0.9221815563688727, "grad_norm": 1.3428181409835815, "learning_rate": 1.1058906195893836e-07, "loss": 0.3565, "step": 15370 }, { "epoch": 0.9222415551688966, "grad_norm": 1.3112080097198486, "learning_rate": 1.1041949917643801e-07, "loss": 0.3434, "step": 15371 }, { "epoch": 0.9223015539689207, "grad_norm": 1.3424644470214844, "learning_rate": 1.1025006440295027e-07, "loss": 0.3755, "step": 15372 }, { "epoch": 0.9223615527689446, "grad_norm": 1.3909629583358765, "learning_rate": 1.1008075764487385e-07, "loss": 0.3436, "step": 15373 }, { "epoch": 0.9224215515689687, "grad_norm": 1.5181535482406616, "learning_rate": 1.0991157890860243e-07, "loss": 0.4238, "step": 15374 }, { "epoch": 0.9224815503689926, "grad_norm": 1.3335672616958618, "learning_rate": 1.0974252820052577e-07, "loss": 0.3779, "step": 15375 }, { "epoch": 0.9225415491690167, "grad_norm": 1.325598120689392, "learning_rate": 1.0957360552702821e-07, "loss": 0.3744, "step": 15376 }, { "epoch": 0.9226015479690406, "grad_norm": 1.4959161281585693, "learning_rate": 1.0940481089448828e-07, "loss": 0.368, "step": 15377 }, { "epoch": 0.9226615467690646, "grad_norm": 1.260462999343872, "learning_rate": 1.0923614430928135e-07, "loss": 0.3089, "step": 15378 }, { "epoch": 0.9227215455690886, "grad_norm": 1.411813497543335, "learning_rate": 1.0906760577777702e-07, "loss": 0.3466, "step": 15379 }, { "epoch": 0.9227815443691126, "grad_norm": 1.3714812994003296, "learning_rate": 1.088991953063398e-07, "loss": 0.3911, "step": 15380 }, { "epoch": 0.9228415431691366, "grad_norm": 1.3170832395553589, "learning_rate": 1.087309129013303e-07, "loss": 0.3814, "step": 15381 }, { "epoch": 0.9229015419691606, "grad_norm": 1.2351800203323364, "learning_rate": 1.0856275856910374e-07, "loss": 0.3661, "step": 15382 }, { "epoch": 0.9229615407691846, "grad_norm": 1.375316858291626, "learning_rate": 1.0839473231601065e-07, "loss": 0.3472, "step": 15383 }, { "epoch": 0.9230215395692086, "grad_norm": 1.381287932395935, "learning_rate": 1.0822683414839612e-07, "loss": 0.3544, "step": 15384 }, { "epoch": 0.9230815383692326, "grad_norm": 1.453266978263855, "learning_rate": 1.0805906407260135e-07, "loss": 0.3272, "step": 15385 }, { "epoch": 0.9231415371692566, "grad_norm": 1.3227564096450806, "learning_rate": 1.0789142209496172e-07, "loss": 0.3595, "step": 15386 }, { "epoch": 0.9232015359692806, "grad_norm": 1.2938501834869385, "learning_rate": 1.077239082218091e-07, "loss": 0.3482, "step": 15387 }, { "epoch": 0.9232615347693046, "grad_norm": 1.3134286403656006, "learning_rate": 1.0755652245946917e-07, "loss": 0.3577, "step": 15388 }, { "epoch": 0.9233215335693287, "grad_norm": 1.31674063205719, "learning_rate": 1.0738926481426331e-07, "loss": 0.3875, "step": 15389 }, { "epoch": 0.9233815323693526, "grad_norm": 1.3700964450836182, "learning_rate": 1.0722213529250862e-07, "loss": 0.3764, "step": 15390 }, { "epoch": 0.9234415311693767, "grad_norm": 1.3788504600524902, "learning_rate": 1.0705513390051602e-07, "loss": 0.3445, "step": 15391 }, { "epoch": 0.9235015299694006, "grad_norm": 1.2983567714691162, "learning_rate": 1.068882606445929e-07, "loss": 0.3788, "step": 15392 }, { "epoch": 0.9235615287694247, "grad_norm": 1.2682011127471924, "learning_rate": 1.067215155310412e-07, "loss": 0.3092, "step": 15393 }, { "epoch": 0.9236215275694486, "grad_norm": 1.412081241607666, "learning_rate": 1.0655489856615824e-07, "loss": 0.382, "step": 15394 }, { "epoch": 0.9236815263694726, "grad_norm": 1.3386605978012085, "learning_rate": 1.0638840975623626e-07, "loss": 0.3472, "step": 15395 }, { "epoch": 0.9237415251694966, "grad_norm": 1.2391554117202759, "learning_rate": 1.0622204910756244e-07, "loss": 0.3468, "step": 15396 }, { "epoch": 0.9238015239695206, "grad_norm": 1.3324600458145142, "learning_rate": 1.0605581662641972e-07, "loss": 0.38, "step": 15397 }, { "epoch": 0.9238615227695446, "grad_norm": 1.3359979391098022, "learning_rate": 1.0588971231908711e-07, "loss": 0.3656, "step": 15398 }, { "epoch": 0.9239215215695686, "grad_norm": 1.4129661321640015, "learning_rate": 1.057237361918355e-07, "loss": 0.3791, "step": 15399 }, { "epoch": 0.9239815203695926, "grad_norm": 1.282455563545227, "learning_rate": 1.055578882509346e-07, "loss": 0.3443, "step": 15400 }, { "epoch": 0.9240415191696166, "grad_norm": 1.2430434226989746, "learning_rate": 1.0539216850264671e-07, "loss": 0.3299, "step": 15401 }, { "epoch": 0.9241015179696406, "grad_norm": 1.486608624458313, "learning_rate": 1.0522657695323107e-07, "loss": 0.4134, "step": 15402 }, { "epoch": 0.9241615167696646, "grad_norm": 1.2974073886871338, "learning_rate": 1.0506111360894144e-07, "loss": 0.3801, "step": 15403 }, { "epoch": 0.9242215155696886, "grad_norm": 1.446226716041565, "learning_rate": 1.0489577847602537e-07, "loss": 0.3989, "step": 15404 }, { "epoch": 0.9242815143697126, "grad_norm": 1.354299545288086, "learning_rate": 1.0473057156072851e-07, "loss": 0.3989, "step": 15405 }, { "epoch": 0.9243415131697366, "grad_norm": 1.2992584705352783, "learning_rate": 1.0456549286928906e-07, "loss": 0.3794, "step": 15406 }, { "epoch": 0.9244015119697606, "grad_norm": 1.4289692640304565, "learning_rate": 1.0440054240794138e-07, "loss": 0.3901, "step": 15407 }, { "epoch": 0.9244615107697846, "grad_norm": 1.3628119230270386, "learning_rate": 1.0423572018291438e-07, "loss": 0.3636, "step": 15408 }, { "epoch": 0.9245215095698086, "grad_norm": 1.2824208736419678, "learning_rate": 1.0407102620043307e-07, "loss": 0.3839, "step": 15409 }, { "epoch": 0.9245815083698325, "grad_norm": 1.3124587535858154, "learning_rate": 1.0390646046671819e-07, "loss": 0.3694, "step": 15410 }, { "epoch": 0.9246415071698566, "grad_norm": 1.417264699935913, "learning_rate": 1.0374202298798312e-07, "loss": 0.3786, "step": 15411 }, { "epoch": 0.9247015059698807, "grad_norm": 1.5618667602539062, "learning_rate": 1.0357771377043889e-07, "loss": 0.3876, "step": 15412 }, { "epoch": 0.9247615047699046, "grad_norm": 1.4099153280258179, "learning_rate": 1.0341353282028992e-07, "loss": 0.4118, "step": 15413 }, { "epoch": 0.9248215035699286, "grad_norm": 1.3272825479507446, "learning_rate": 1.0324948014373753e-07, "loss": 0.3822, "step": 15414 }, { "epoch": 0.9248815023699526, "grad_norm": 1.3084163665771484, "learning_rate": 1.0308555574697641e-07, "loss": 0.3346, "step": 15415 }, { "epoch": 0.9249415011699766, "grad_norm": 1.323485255241394, "learning_rate": 1.0292175963619704e-07, "loss": 0.299, "step": 15416 }, { "epoch": 0.9250014999700006, "grad_norm": 1.4288665056228638, "learning_rate": 1.0275809181758632e-07, "loss": 0.4068, "step": 15417 }, { "epoch": 0.9250614987700246, "grad_norm": 1.3998712301254272, "learning_rate": 1.02594552297325e-07, "loss": 0.3673, "step": 15418 }, { "epoch": 0.9251214975700486, "grad_norm": 1.3694838285446167, "learning_rate": 1.0243114108158874e-07, "loss": 0.3343, "step": 15419 }, { "epoch": 0.9251814963700726, "grad_norm": 1.4220918416976929, "learning_rate": 1.0226785817654855e-07, "loss": 0.3653, "step": 15420 }, { "epoch": 0.9252414951700966, "grad_norm": 1.5869463682174683, "learning_rate": 1.0210470358837154e-07, "loss": 0.4528, "step": 15421 }, { "epoch": 0.9253014939701206, "grad_norm": 1.3677946329116821, "learning_rate": 1.0194167732321902e-07, "loss": 0.409, "step": 15422 }, { "epoch": 0.9253614927701446, "grad_norm": 1.2133301496505737, "learning_rate": 1.01778779387248e-07, "loss": 0.3699, "step": 15423 }, { "epoch": 0.9254214915701686, "grad_norm": 1.352519154548645, "learning_rate": 1.0161600978661006e-07, "loss": 0.3476, "step": 15424 }, { "epoch": 0.9254814903701926, "grad_norm": 1.2618540525436401, "learning_rate": 1.0145336852745251e-07, "loss": 0.3309, "step": 15425 }, { "epoch": 0.9255414891702166, "grad_norm": 1.4215375185012817, "learning_rate": 1.012908556159176e-07, "loss": 0.3545, "step": 15426 }, { "epoch": 0.9256014879702406, "grad_norm": 1.3229025602340698, "learning_rate": 1.0112847105814254e-07, "loss": 0.4332, "step": 15427 }, { "epoch": 0.9256614867702646, "grad_norm": 1.3875361680984497, "learning_rate": 1.0096621486025947e-07, "loss": 0.3635, "step": 15428 }, { "epoch": 0.9257214855702885, "grad_norm": 1.2854079008102417, "learning_rate": 1.0080408702839743e-07, "loss": 0.3301, "step": 15429 }, { "epoch": 0.9257814843703126, "grad_norm": 1.2787100076675415, "learning_rate": 1.006420875686777e-07, "loss": 0.3642, "step": 15430 }, { "epoch": 0.9258414831703365, "grad_norm": 1.4173184633255005, "learning_rate": 1.0048021648721883e-07, "loss": 0.3606, "step": 15431 }, { "epoch": 0.9259014819703606, "grad_norm": 1.4076095819473267, "learning_rate": 1.0031847379013393e-07, "loss": 0.3973, "step": 15432 }, { "epoch": 0.9259614807703845, "grad_norm": 1.2796111106872559, "learning_rate": 1.0015685948353181e-07, "loss": 0.3682, "step": 15433 }, { "epoch": 0.9260214795704086, "grad_norm": 1.4384161233901978, "learning_rate": 9.99953735735155e-08, "loss": 0.3749, "step": 15434 }, { "epoch": 0.9260814783704326, "grad_norm": 1.2775564193725586, "learning_rate": 9.983401606618297e-08, "loss": 0.3398, "step": 15435 }, { "epoch": 0.9261414771704566, "grad_norm": 1.3778290748596191, "learning_rate": 9.967278696762904e-08, "loss": 0.4095, "step": 15436 }, { "epoch": 0.9262014759704806, "grad_norm": 1.3415406942367554, "learning_rate": 9.951168628394197e-08, "loss": 0.3807, "step": 15437 }, { "epoch": 0.9262614747705046, "grad_norm": 1.2148312330245972, "learning_rate": 9.935071402120532e-08, "loss": 0.3165, "step": 15438 }, { "epoch": 0.9263214735705286, "grad_norm": 1.3312815427780151, "learning_rate": 9.918987018549958e-08, "loss": 0.3726, "step": 15439 }, { "epoch": 0.9263814723705526, "grad_norm": 1.421635627746582, "learning_rate": 9.902915478289859e-08, "loss": 0.3969, "step": 15440 }, { "epoch": 0.9264414711705766, "grad_norm": 1.331108808517456, "learning_rate": 9.886856781947117e-08, "loss": 0.3617, "step": 15441 }, { "epoch": 0.9265014699706006, "grad_norm": 1.231798529624939, "learning_rate": 9.870810930128226e-08, "loss": 0.4016, "step": 15442 }, { "epoch": 0.9265614687706246, "grad_norm": 1.3291223049163818, "learning_rate": 9.85477792343925e-08, "loss": 0.3931, "step": 15443 }, { "epoch": 0.9266214675706486, "grad_norm": 1.3494480848312378, "learning_rate": 9.838757762485517e-08, "loss": 0.3808, "step": 15444 }, { "epoch": 0.9266814663706726, "grad_norm": 1.3756283521652222, "learning_rate": 9.822750447872198e-08, "loss": 0.3575, "step": 15445 }, { "epoch": 0.9267414651706966, "grad_norm": 1.3983948230743408, "learning_rate": 9.806755980203763e-08, "loss": 0.3463, "step": 15446 }, { "epoch": 0.9268014639707206, "grad_norm": 1.3446892499923706, "learning_rate": 9.790774360084182e-08, "loss": 0.3289, "step": 15447 }, { "epoch": 0.9268614627707445, "grad_norm": 1.3661233186721802, "learning_rate": 9.774805588117147e-08, "loss": 0.396, "step": 15448 }, { "epoch": 0.9269214615707686, "grad_norm": 1.5085227489471436, "learning_rate": 9.758849664905578e-08, "loss": 0.4159, "step": 15449 }, { "epoch": 0.9269814603707925, "grad_norm": 1.2895183563232422, "learning_rate": 9.742906591052119e-08, "loss": 0.3683, "step": 15450 }, { "epoch": 0.9270414591708166, "grad_norm": 1.4192842245101929, "learning_rate": 9.726976367158913e-08, "loss": 0.4072, "step": 15451 }, { "epoch": 0.9271014579708405, "grad_norm": 1.336441993713379, "learning_rate": 9.711058993827514e-08, "loss": 0.3805, "step": 15452 }, { "epoch": 0.9271614567708646, "grad_norm": 1.3351444005966187, "learning_rate": 9.695154471659057e-08, "loss": 0.3462, "step": 15453 }, { "epoch": 0.9272214555708885, "grad_norm": 1.288119912147522, "learning_rate": 9.679262801254162e-08, "loss": 0.3491, "step": 15454 }, { "epoch": 0.9272814543709126, "grad_norm": 1.4221463203430176, "learning_rate": 9.66338398321307e-08, "loss": 0.3522, "step": 15455 }, { "epoch": 0.9273414531709366, "grad_norm": 1.3168671131134033, "learning_rate": 9.647518018135354e-08, "loss": 0.3834, "step": 15456 }, { "epoch": 0.9274014519709606, "grad_norm": 1.4374644756317139, "learning_rate": 9.631664906620241e-08, "loss": 0.4006, "step": 15457 }, { "epoch": 0.9274614507709846, "grad_norm": 1.4191772937774658, "learning_rate": 9.615824649266452e-08, "loss": 0.3772, "step": 15458 }, { "epoch": 0.9275214495710086, "grad_norm": 1.3092360496520996, "learning_rate": 9.599997246672164e-08, "loss": 0.3371, "step": 15459 }, { "epoch": 0.9275814483710326, "grad_norm": 1.223900318145752, "learning_rate": 9.584182699435168e-08, "loss": 0.3734, "step": 15460 }, { "epoch": 0.9276414471710566, "grad_norm": 1.40764319896698, "learning_rate": 9.568381008152587e-08, "loss": 0.3726, "step": 15461 }, { "epoch": 0.9277014459710806, "grad_norm": 1.3351898193359375, "learning_rate": 9.552592173421281e-08, "loss": 0.347, "step": 15462 }, { "epoch": 0.9277614447711046, "grad_norm": 1.2474323511123657, "learning_rate": 9.536816195837483e-08, "loss": 0.3953, "step": 15463 }, { "epoch": 0.9278214435711286, "grad_norm": 1.4531877040863037, "learning_rate": 9.521053075997038e-08, "loss": 0.3791, "step": 15464 }, { "epoch": 0.9278814423711526, "grad_norm": 1.4234822988510132, "learning_rate": 9.50530281449517e-08, "loss": 0.3251, "step": 15465 }, { "epoch": 0.9279414411711766, "grad_norm": 1.3828548192977905, "learning_rate": 9.489565411926714e-08, "loss": 0.375, "step": 15466 }, { "epoch": 0.9280014399712005, "grad_norm": 1.3123408555984497, "learning_rate": 9.473840868886002e-08, "loss": 0.3684, "step": 15467 }, { "epoch": 0.9280614387712246, "grad_norm": 1.4529720544815063, "learning_rate": 9.458129185966934e-08, "loss": 0.3934, "step": 15468 }, { "epoch": 0.9281214375712485, "grad_norm": 1.3226321935653687, "learning_rate": 9.442430363762754e-08, "loss": 0.356, "step": 15469 }, { "epoch": 0.9281814363712726, "grad_norm": 1.236791968345642, "learning_rate": 9.426744402866471e-08, "loss": 0.3812, "step": 15470 }, { "epoch": 0.9282414351712965, "grad_norm": 1.3421214818954468, "learning_rate": 9.411071303870395e-08, "loss": 0.3323, "step": 15471 }, { "epoch": 0.9283014339713206, "grad_norm": 1.5149301290512085, "learning_rate": 9.395411067366404e-08, "loss": 0.4113, "step": 15472 }, { "epoch": 0.9283614327713445, "grad_norm": 1.3614654541015625, "learning_rate": 9.379763693945959e-08, "loss": 0.4216, "step": 15473 }, { "epoch": 0.9284214315713686, "grad_norm": 1.4254209995269775, "learning_rate": 9.364129184199926e-08, "loss": 0.3527, "step": 15474 }, { "epoch": 0.9284814303713925, "grad_norm": 1.3401463031768799, "learning_rate": 9.348507538718908e-08, "loss": 0.4027, "step": 15475 }, { "epoch": 0.9285414291714166, "grad_norm": 1.3303302526474, "learning_rate": 9.332898758092689e-08, "loss": 0.3455, "step": 15476 }, { "epoch": 0.9286014279714405, "grad_norm": 1.3616788387298584, "learning_rate": 9.317302842910818e-08, "loss": 0.3849, "step": 15477 }, { "epoch": 0.9286614267714646, "grad_norm": 1.2521841526031494, "learning_rate": 9.301719793762264e-08, "loss": 0.3571, "step": 15478 }, { "epoch": 0.9287214255714886, "grad_norm": 1.3118712902069092, "learning_rate": 9.286149611235528e-08, "loss": 0.4023, "step": 15479 }, { "epoch": 0.9287814243715126, "grad_norm": 1.3264291286468506, "learning_rate": 9.270592295918684e-08, "loss": 0.3945, "step": 15480 }, { "epoch": 0.9288414231715366, "grad_norm": 1.6916768550872803, "learning_rate": 9.255047848399145e-08, "loss": 0.3981, "step": 15481 }, { "epoch": 0.9289014219715606, "grad_norm": 1.1546021699905396, "learning_rate": 9.239516269264053e-08, "loss": 0.3806, "step": 15482 }, { "epoch": 0.9289614207715846, "grad_norm": 1.2794685363769531, "learning_rate": 9.223997559099928e-08, "loss": 0.3745, "step": 15483 }, { "epoch": 0.9290214195716086, "grad_norm": 1.4782518148422241, "learning_rate": 9.208491718492861e-08, "loss": 0.3792, "step": 15484 }, { "epoch": 0.9290814183716326, "grad_norm": 1.2784149646759033, "learning_rate": 9.192998748028364e-08, "loss": 0.3622, "step": 15485 }, { "epoch": 0.9291414171716565, "grad_norm": 1.4684350490570068, "learning_rate": 9.177518648291633e-08, "loss": 0.388, "step": 15486 }, { "epoch": 0.9292014159716806, "grad_norm": 1.3440181016921997, "learning_rate": 9.162051419867246e-08, "loss": 0.3712, "step": 15487 }, { "epoch": 0.9292614147717045, "grad_norm": 1.2301437854766846, "learning_rate": 9.146597063339312e-08, "loss": 0.3181, "step": 15488 }, { "epoch": 0.9293214135717286, "grad_norm": 1.5234854221343994, "learning_rate": 9.131155579291477e-08, "loss": 0.3931, "step": 15489 }, { "epoch": 0.9293814123717525, "grad_norm": 1.232043743133545, "learning_rate": 9.11572696830688e-08, "loss": 0.3764, "step": 15490 }, { "epoch": 0.9294414111717766, "grad_norm": 1.275866985321045, "learning_rate": 9.100311230968233e-08, "loss": 0.3714, "step": 15491 }, { "epoch": 0.9295014099718005, "grad_norm": 1.2069275379180908, "learning_rate": 9.084908367857741e-08, "loss": 0.3356, "step": 15492 }, { "epoch": 0.9295614087718246, "grad_norm": 1.3570562601089478, "learning_rate": 9.06951837955699e-08, "loss": 0.4117, "step": 15493 }, { "epoch": 0.9296214075718485, "grad_norm": 1.2972157001495361, "learning_rate": 9.054141266647252e-08, "loss": 0.337, "step": 15494 }, { "epoch": 0.9296814063718726, "grad_norm": 1.290432095527649, "learning_rate": 9.0387770297093e-08, "loss": 0.3616, "step": 15495 }, { "epoch": 0.9297414051718965, "grad_norm": 1.2453423738479614, "learning_rate": 9.023425669323315e-08, "loss": 0.3783, "step": 15496 }, { "epoch": 0.9298014039719206, "grad_norm": 1.252121925354004, "learning_rate": 9.00808718606902e-08, "loss": 0.3561, "step": 15497 }, { "epoch": 0.9298614027719445, "grad_norm": 1.5882596969604492, "learning_rate": 8.992761580525743e-08, "loss": 0.3644, "step": 15498 }, { "epoch": 0.9299214015719686, "grad_norm": 1.2070446014404297, "learning_rate": 8.977448853272274e-08, "loss": 0.3571, "step": 15499 }, { "epoch": 0.9299814003719925, "grad_norm": 1.3872381448745728, "learning_rate": 8.962149004886776e-08, "loss": 0.399, "step": 15500 }, { "epoch": 0.9300413991720166, "grad_norm": 1.4207203388214111, "learning_rate": 8.946862035947222e-08, "loss": 0.3799, "step": 15501 }, { "epoch": 0.9301013979720406, "grad_norm": 1.3397009372711182, "learning_rate": 8.931587947030844e-08, "loss": 0.3705, "step": 15502 }, { "epoch": 0.9301613967720646, "grad_norm": 1.2655894756317139, "learning_rate": 8.916326738714447e-08, "loss": 0.3826, "step": 15503 }, { "epoch": 0.9302213955720886, "grad_norm": 1.4397372007369995, "learning_rate": 8.901078411574448e-08, "loss": 0.4183, "step": 15504 }, { "epoch": 0.9302813943721125, "grad_norm": 1.471838355064392, "learning_rate": 8.88584296618664e-08, "loss": 0.4134, "step": 15505 }, { "epoch": 0.9303413931721366, "grad_norm": 1.3247424364089966, "learning_rate": 8.870620403126473e-08, "loss": 0.3439, "step": 15506 }, { "epoch": 0.9304013919721605, "grad_norm": 1.2825242280960083, "learning_rate": 8.855410722968765e-08, "loss": 0.3491, "step": 15507 }, { "epoch": 0.9304613907721846, "grad_norm": 1.4130874872207642, "learning_rate": 8.840213926287953e-08, "loss": 0.4028, "step": 15508 }, { "epoch": 0.9305213895722085, "grad_norm": 1.3444467782974243, "learning_rate": 8.82503001365793e-08, "loss": 0.2933, "step": 15509 }, { "epoch": 0.9305813883722326, "grad_norm": 1.3678388595581055, "learning_rate": 8.809858985652157e-08, "loss": 0.3706, "step": 15510 }, { "epoch": 0.9306413871722565, "grad_norm": 1.2972344160079956, "learning_rate": 8.794700842843555e-08, "loss": 0.3685, "step": 15511 }, { "epoch": 0.9307013859722806, "grad_norm": 1.3772121667861938, "learning_rate": 8.779555585804538e-08, "loss": 0.3443, "step": 15512 }, { "epoch": 0.9307613847723045, "grad_norm": 1.3933517932891846, "learning_rate": 8.764423215107131e-08, "loss": 0.3875, "step": 15513 }, { "epoch": 0.9308213835723286, "grad_norm": 1.4415749311447144, "learning_rate": 8.749303731322815e-08, "loss": 0.3697, "step": 15514 }, { "epoch": 0.9308813823723525, "grad_norm": 1.2498865127563477, "learning_rate": 8.734197135022493e-08, "loss": 0.3871, "step": 15515 }, { "epoch": 0.9309413811723766, "grad_norm": 1.3245877027511597, "learning_rate": 8.719103426776824e-08, "loss": 0.364, "step": 15516 }, { "epoch": 0.9310013799724005, "grad_norm": 1.2937968969345093, "learning_rate": 8.704022607155703e-08, "loss": 0.3592, "step": 15517 }, { "epoch": 0.9310613787724246, "grad_norm": 1.3209054470062256, "learning_rate": 8.688954676728744e-08, "loss": 0.3502, "step": 15518 }, { "epoch": 0.9311213775724485, "grad_norm": 1.3078287839889526, "learning_rate": 8.673899636064902e-08, "loss": 0.3876, "step": 15519 }, { "epoch": 0.9311813763724726, "grad_norm": 1.2202539443969727, "learning_rate": 8.658857485732785e-08, "loss": 0.3137, "step": 15520 }, { "epoch": 0.9312413751724965, "grad_norm": 1.4341442584991455, "learning_rate": 8.64382822630057e-08, "loss": 0.4174, "step": 15521 }, { "epoch": 0.9313013739725206, "grad_norm": 1.3360331058502197, "learning_rate": 8.628811858335661e-08, "loss": 0.3741, "step": 15522 }, { "epoch": 0.9313613727725446, "grad_norm": 1.2738653421401978, "learning_rate": 8.613808382405264e-08, "loss": 0.3512, "step": 15523 }, { "epoch": 0.9314213715725685, "grad_norm": 1.4439480304718018, "learning_rate": 8.598817799075964e-08, "loss": 0.3719, "step": 15524 }, { "epoch": 0.9314813703725926, "grad_norm": 1.5861924886703491, "learning_rate": 8.583840108913959e-08, "loss": 0.3823, "step": 15525 }, { "epoch": 0.9315413691726165, "grad_norm": 1.3676663637161255, "learning_rate": 8.568875312484747e-08, "loss": 0.3856, "step": 15526 }, { "epoch": 0.9316013679726406, "grad_norm": 1.3784732818603516, "learning_rate": 8.553923410353592e-08, "loss": 0.354, "step": 15527 }, { "epoch": 0.9316613667726645, "grad_norm": 1.331679105758667, "learning_rate": 8.538984403085136e-08, "loss": 0.3535, "step": 15528 }, { "epoch": 0.9317213655726886, "grad_norm": 1.3711750507354736, "learning_rate": 8.524058291243519e-08, "loss": 0.3658, "step": 15529 }, { "epoch": 0.9317813643727125, "grad_norm": 1.279470443725586, "learning_rate": 8.509145075392488e-08, "loss": 0.3661, "step": 15530 }, { "epoch": 0.9318413631727366, "grad_norm": 1.4356740713119507, "learning_rate": 8.49424475609521e-08, "loss": 0.4052, "step": 15531 }, { "epoch": 0.9319013619727605, "grad_norm": 1.4423476457595825, "learning_rate": 8.479357333914384e-08, "loss": 0.3573, "step": 15532 }, { "epoch": 0.9319613607727846, "grad_norm": 1.444736123085022, "learning_rate": 8.464482809412321e-08, "loss": 0.4084, "step": 15533 }, { "epoch": 0.9320213595728085, "grad_norm": 1.515903115272522, "learning_rate": 8.449621183150635e-08, "loss": 0.3746, "step": 15534 }, { "epoch": 0.9320813583728326, "grad_norm": 1.293869972229004, "learning_rate": 8.434772455690742e-08, "loss": 0.3858, "step": 15535 }, { "epoch": 0.9321413571728565, "grad_norm": 1.3565828800201416, "learning_rate": 8.419936627593245e-08, "loss": 0.3692, "step": 15536 }, { "epoch": 0.9322013559728806, "grad_norm": 1.46976900100708, "learning_rate": 8.405113699418587e-08, "loss": 0.369, "step": 15537 }, { "epoch": 0.9322613547729045, "grad_norm": 1.376729965209961, "learning_rate": 8.39030367172644e-08, "loss": 0.4503, "step": 15538 }, { "epoch": 0.9323213535729286, "grad_norm": 1.4425569772720337, "learning_rate": 8.375506545076122e-08, "loss": 0.3435, "step": 15539 }, { "epoch": 0.9323813523729525, "grad_norm": 1.283979892730713, "learning_rate": 8.360722320026526e-08, "loss": 0.3754, "step": 15540 }, { "epoch": 0.9324413511729766, "grad_norm": 1.3502849340438843, "learning_rate": 8.34595099713596e-08, "loss": 0.367, "step": 15541 }, { "epoch": 0.9325013499730005, "grad_norm": 1.311481237411499, "learning_rate": 8.331192576962227e-08, "loss": 0.381, "step": 15542 }, { "epoch": 0.9325613487730245, "grad_norm": 1.2992980480194092, "learning_rate": 8.316447060062664e-08, "loss": 0.3688, "step": 15543 }, { "epoch": 0.9326213475730485, "grad_norm": 1.2881678342819214, "learning_rate": 8.301714446994263e-08, "loss": 0.3242, "step": 15544 }, { "epoch": 0.9326813463730725, "grad_norm": 1.298431396484375, "learning_rate": 8.286994738313307e-08, "loss": 0.3567, "step": 15545 }, { "epoch": 0.9327413451730966, "grad_norm": 1.377626657485962, "learning_rate": 8.2722879345757e-08, "loss": 0.3853, "step": 15546 }, { "epoch": 0.9328013439731205, "grad_norm": 1.2778292894363403, "learning_rate": 8.257594036336912e-08, "loss": 0.3466, "step": 15547 }, { "epoch": 0.9328613427731446, "grad_norm": 1.2694240808486938, "learning_rate": 8.242913044151795e-08, "loss": 0.3508, "step": 15548 }, { "epoch": 0.9329213415731685, "grad_norm": 1.2951494455337524, "learning_rate": 8.228244958574848e-08, "loss": 0.3226, "step": 15549 }, { "epoch": 0.9329813403731926, "grad_norm": 1.3670377731323242, "learning_rate": 8.213589780159913e-08, "loss": 0.3936, "step": 15550 }, { "epoch": 0.9330413391732165, "grad_norm": 1.4020988941192627, "learning_rate": 8.198947509460558e-08, "loss": 0.3711, "step": 15551 }, { "epoch": 0.9331013379732406, "grad_norm": 1.2167787551879883, "learning_rate": 8.184318147029729e-08, "loss": 0.3222, "step": 15552 }, { "epoch": 0.9331613367732645, "grad_norm": 1.3468598127365112, "learning_rate": 8.169701693419867e-08, "loss": 0.3756, "step": 15553 }, { "epoch": 0.9332213355732886, "grad_norm": 1.4064687490463257, "learning_rate": 8.155098149182988e-08, "loss": 0.3905, "step": 15554 }, { "epoch": 0.9332813343733125, "grad_norm": 1.2195422649383545, "learning_rate": 8.140507514870638e-08, "loss": 0.3546, "step": 15555 }, { "epoch": 0.9333413331733366, "grad_norm": 1.3490006923675537, "learning_rate": 8.125929791033781e-08, "loss": 0.3422, "step": 15556 }, { "epoch": 0.9334013319733605, "grad_norm": 1.5597929954528809, "learning_rate": 8.111364978223034e-08, "loss": 0.4365, "step": 15557 }, { "epoch": 0.9334613307733846, "grad_norm": 1.2603446245193481, "learning_rate": 8.096813076988313e-08, "loss": 0.3401, "step": 15558 }, { "epoch": 0.9335213295734085, "grad_norm": 1.3847758769989014, "learning_rate": 8.082274087879338e-08, "loss": 0.3556, "step": 15559 }, { "epoch": 0.9335813283734326, "grad_norm": 1.4496214389801025, "learning_rate": 8.067748011445053e-08, "loss": 0.344, "step": 15560 }, { "epoch": 0.9336413271734565, "grad_norm": 1.262033224105835, "learning_rate": 8.053234848234091e-08, "loss": 0.3882, "step": 15561 }, { "epoch": 0.9337013259734805, "grad_norm": 1.2711853981018066, "learning_rate": 8.038734598794544e-08, "loss": 0.3517, "step": 15562 }, { "epoch": 0.9337613247735045, "grad_norm": 1.5167771577835083, "learning_rate": 8.024247263674033e-08, "loss": 0.3875, "step": 15563 }, { "epoch": 0.9338213235735285, "grad_norm": 1.2458416223526, "learning_rate": 8.009772843419638e-08, "loss": 0.3745, "step": 15564 }, { "epoch": 0.9338813223735525, "grad_norm": 1.3942599296569824, "learning_rate": 7.99531133857801e-08, "loss": 0.3463, "step": 15565 }, { "epoch": 0.9339413211735765, "grad_norm": 1.1762334108352661, "learning_rate": 7.980862749695372e-08, "loss": 0.3086, "step": 15566 }, { "epoch": 0.9340013199736005, "grad_norm": 1.3205842971801758, "learning_rate": 7.966427077317251e-08, "loss": 0.3775, "step": 15567 }, { "epoch": 0.9340613187736245, "grad_norm": 1.3053927421569824, "learning_rate": 7.952004321988898e-08, "loss": 0.383, "step": 15568 }, { "epoch": 0.9341213175736486, "grad_norm": 1.27841055393219, "learning_rate": 7.937594484254945e-08, "loss": 0.3864, "step": 15569 }, { "epoch": 0.9341813163736725, "grad_norm": 1.4798073768615723, "learning_rate": 7.923197564659634e-08, "loss": 0.3621, "step": 15570 }, { "epoch": 0.9342413151736966, "grad_norm": 1.3604090213775635, "learning_rate": 7.908813563746663e-08, "loss": 0.3623, "step": 15571 }, { "epoch": 0.9343013139737205, "grad_norm": 1.4118657112121582, "learning_rate": 7.894442482059267e-08, "loss": 0.4142, "step": 15572 }, { "epoch": 0.9343613127737446, "grad_norm": 1.4326214790344238, "learning_rate": 7.880084320140091e-08, "loss": 0.3587, "step": 15573 }, { "epoch": 0.9344213115737685, "grad_norm": 1.2797716856002808, "learning_rate": 7.865739078531475e-08, "loss": 0.3716, "step": 15574 }, { "epoch": 0.9344813103737926, "grad_norm": 1.3537219762802124, "learning_rate": 7.851406757775137e-08, "loss": 0.4131, "step": 15575 }, { "epoch": 0.9345413091738165, "grad_norm": 1.4141652584075928, "learning_rate": 7.837087358412326e-08, "loss": 0.4254, "step": 15576 }, { "epoch": 0.9346013079738406, "grad_norm": 1.3750993013381958, "learning_rate": 7.822780880983826e-08, "loss": 0.3847, "step": 15577 }, { "epoch": 0.9346613067738645, "grad_norm": 1.4180115461349487, "learning_rate": 7.808487326029955e-08, "loss": 0.3691, "step": 15578 }, { "epoch": 0.9347213055738886, "grad_norm": 1.382352352142334, "learning_rate": 7.794206694090488e-08, "loss": 0.3675, "step": 15579 }, { "epoch": 0.9347813043739125, "grad_norm": 1.4277288913726807, "learning_rate": 7.779938985704731e-08, "loss": 0.3451, "step": 15580 }, { "epoch": 0.9348413031739365, "grad_norm": 1.3464378118515015, "learning_rate": 7.765684201411566e-08, "loss": 0.3345, "step": 15581 }, { "epoch": 0.9349013019739605, "grad_norm": 1.5374491214752197, "learning_rate": 7.751442341749248e-08, "loss": 0.4513, "step": 15582 }, { "epoch": 0.9349613007739845, "grad_norm": 1.277438998222351, "learning_rate": 7.737213407255727e-08, "loss": 0.3418, "step": 15583 }, { "epoch": 0.9350212995740085, "grad_norm": 1.4476996660232544, "learning_rate": 7.72299739846829e-08, "loss": 0.3444, "step": 15584 }, { "epoch": 0.9350812983740325, "grad_norm": 1.2675014734268188, "learning_rate": 7.708794315923795e-08, "loss": 0.3087, "step": 15585 }, { "epoch": 0.9351412971740565, "grad_norm": 1.2841897010803223, "learning_rate": 7.694604160158675e-08, "loss": 0.3491, "step": 15586 }, { "epoch": 0.9352012959740805, "grad_norm": 1.3296128511428833, "learning_rate": 7.680426931708856e-08, "loss": 0.3728, "step": 15587 }, { "epoch": 0.9352612947741045, "grad_norm": 1.1508705615997314, "learning_rate": 7.666262631109683e-08, "loss": 0.3156, "step": 15588 }, { "epoch": 0.9353212935741285, "grad_norm": 1.3459056615829468, "learning_rate": 7.65211125889611e-08, "loss": 0.3707, "step": 15589 }, { "epoch": 0.9353812923741525, "grad_norm": 1.4908313751220703, "learning_rate": 7.63797281560255e-08, "loss": 0.3483, "step": 15590 }, { "epoch": 0.9354412911741765, "grad_norm": 1.4343156814575195, "learning_rate": 7.623847301762987e-08, "loss": 0.3578, "step": 15591 }, { "epoch": 0.9355012899742006, "grad_norm": 1.4445592164993286, "learning_rate": 7.609734717910781e-08, "loss": 0.3993, "step": 15592 }, { "epoch": 0.9355612887742245, "grad_norm": 1.4996204376220703, "learning_rate": 7.595635064579026e-08, "loss": 0.3881, "step": 15593 }, { "epoch": 0.9356212875742486, "grad_norm": 1.432658076286316, "learning_rate": 7.581548342300148e-08, "loss": 0.3766, "step": 15594 }, { "epoch": 0.9356812863742725, "grad_norm": 1.3507509231567383, "learning_rate": 7.567474551606112e-08, "loss": 0.3561, "step": 15595 }, { "epoch": 0.9357412851742966, "grad_norm": 1.366969108581543, "learning_rate": 7.553413693028454e-08, "loss": 0.3632, "step": 15596 }, { "epoch": 0.9358012839743205, "grad_norm": 1.3972675800323486, "learning_rate": 7.539365767098127e-08, "loss": 0.3642, "step": 15597 }, { "epoch": 0.9358612827743445, "grad_norm": 1.410581350326538, "learning_rate": 7.525330774345812e-08, "loss": 0.3805, "step": 15598 }, { "epoch": 0.9359212815743685, "grad_norm": 1.315032958984375, "learning_rate": 7.511308715301374e-08, "loss": 0.3861, "step": 15599 }, { "epoch": 0.9359812803743925, "grad_norm": 1.2846599817276, "learning_rate": 7.497299590494443e-08, "loss": 0.3179, "step": 15600 }, { "epoch": 0.9360412791744165, "grad_norm": 1.3935073614120483, "learning_rate": 7.483303400454033e-08, "loss": 0.4, "step": 15601 }, { "epoch": 0.9361012779744405, "grad_norm": 1.2758249044418335, "learning_rate": 7.469320145708802e-08, "loss": 0.3507, "step": 15602 }, { "epoch": 0.9361612767744645, "grad_norm": 1.2376539707183838, "learning_rate": 7.45534982678679e-08, "loss": 0.3343, "step": 15603 }, { "epoch": 0.9362212755744885, "grad_norm": 1.3225423097610474, "learning_rate": 7.441392444215527e-08, "loss": 0.3185, "step": 15604 }, { "epoch": 0.9362812743745125, "grad_norm": 1.549561858177185, "learning_rate": 7.427447998522241e-08, "loss": 0.3817, "step": 15605 }, { "epoch": 0.9363412731745365, "grad_norm": 1.3816485404968262, "learning_rate": 7.413516490233452e-08, "loss": 0.3389, "step": 15606 }, { "epoch": 0.9364012719745605, "grad_norm": 1.4206905364990234, "learning_rate": 7.399597919875333e-08, "loss": 0.3734, "step": 15607 }, { "epoch": 0.9364612707745845, "grad_norm": 1.1968821287155151, "learning_rate": 7.385692287973516e-08, "loss": 0.3198, "step": 15608 }, { "epoch": 0.9365212695746085, "grad_norm": 1.3721351623535156, "learning_rate": 7.371799595053163e-08, "loss": 0.4228, "step": 15609 }, { "epoch": 0.9365812683746325, "grad_norm": 1.3965028524398804, "learning_rate": 7.357919841638932e-08, "loss": 0.3763, "step": 15610 }, { "epoch": 0.9366412671746565, "grad_norm": 1.3619780540466309, "learning_rate": 7.344053028254938e-08, "loss": 0.3447, "step": 15611 }, { "epoch": 0.9367012659746805, "grad_norm": 1.4225835800170898, "learning_rate": 7.330199155424983e-08, "loss": 0.3999, "step": 15612 }, { "epoch": 0.9367612647747046, "grad_norm": 1.5616799592971802, "learning_rate": 7.31635822367221e-08, "loss": 0.3909, "step": 15613 }, { "epoch": 0.9368212635747285, "grad_norm": 1.4476978778839111, "learning_rate": 7.302530233519294e-08, "loss": 0.3815, "step": 15614 }, { "epoch": 0.9368812623747526, "grad_norm": 1.2441953420639038, "learning_rate": 7.288715185488526e-08, "loss": 0.351, "step": 15615 }, { "epoch": 0.9369412611747765, "grad_norm": 1.3705636262893677, "learning_rate": 7.274913080101531e-08, "loss": 0.3753, "step": 15616 }, { "epoch": 0.9370012599748005, "grad_norm": 1.2377785444259644, "learning_rate": 7.261123917879663e-08, "loss": 0.3378, "step": 15617 }, { "epoch": 0.9370612587748245, "grad_norm": 1.2629890441894531, "learning_rate": 7.24734769934362e-08, "loss": 0.4362, "step": 15618 }, { "epoch": 0.9371212575748485, "grad_norm": 1.3794578313827515, "learning_rate": 7.233584425013706e-08, "loss": 0.3837, "step": 15619 }, { "epoch": 0.9371812563748725, "grad_norm": 1.3243411779403687, "learning_rate": 7.219834095409644e-08, "loss": 0.3916, "step": 15620 }, { "epoch": 0.9372412551748965, "grad_norm": 1.3586004972457886, "learning_rate": 7.20609671105073e-08, "loss": 0.3971, "step": 15621 }, { "epoch": 0.9373012539749205, "grad_norm": 1.2572202682495117, "learning_rate": 7.192372272455794e-08, "loss": 0.3151, "step": 15622 }, { "epoch": 0.9373612527749445, "grad_norm": 1.394935965538025, "learning_rate": 7.17866078014312e-08, "loss": 0.342, "step": 15623 }, { "epoch": 0.9374212515749685, "grad_norm": 1.3642597198486328, "learning_rate": 7.164962234630568e-08, "loss": 0.3494, "step": 15624 }, { "epoch": 0.9374812503749925, "grad_norm": 1.418094515800476, "learning_rate": 7.15127663643541e-08, "loss": 0.396, "step": 15625 }, { "epoch": 0.9375412491750165, "grad_norm": 1.3385134935379028, "learning_rate": 7.137603986074498e-08, "loss": 0.3749, "step": 15626 }, { "epoch": 0.9376012479750405, "grad_norm": 1.4701621532440186, "learning_rate": 7.123944284064249e-08, "loss": 0.4068, "step": 15627 }, { "epoch": 0.9376612467750645, "grad_norm": 1.3542635440826416, "learning_rate": 7.110297530920461e-08, "loss": 0.3599, "step": 15628 }, { "epoch": 0.9377212455750885, "grad_norm": 1.2351375818252563, "learning_rate": 7.096663727158586e-08, "loss": 0.3726, "step": 15629 }, { "epoch": 0.9377812443751125, "grad_norm": 1.3497881889343262, "learning_rate": 7.083042873293372e-08, "loss": 0.3993, "step": 15630 }, { "epoch": 0.9378412431751365, "grad_norm": 1.3277106285095215, "learning_rate": 7.069434969839372e-08, "loss": 0.3344, "step": 15631 }, { "epoch": 0.9379012419751604, "grad_norm": 1.2728992700576782, "learning_rate": 7.055840017310366e-08, "loss": 0.3061, "step": 15632 }, { "epoch": 0.9379612407751845, "grad_norm": 1.5966321229934692, "learning_rate": 7.0422580162199e-08, "loss": 0.3642, "step": 15633 }, { "epoch": 0.9380212395752084, "grad_norm": 1.4196388721466064, "learning_rate": 7.028688967080815e-08, "loss": 0.429, "step": 15634 }, { "epoch": 0.9380812383752325, "grad_norm": 1.4091238975524902, "learning_rate": 7.015132870405532e-08, "loss": 0.3644, "step": 15635 }, { "epoch": 0.9381412371752565, "grad_norm": 1.535415768623352, "learning_rate": 7.001589726706081e-08, "loss": 0.3315, "step": 15636 }, { "epoch": 0.9382012359752805, "grad_norm": 1.2929481267929077, "learning_rate": 6.988059536493907e-08, "loss": 0.3701, "step": 15637 }, { "epoch": 0.9382612347753045, "grad_norm": 1.347886323928833, "learning_rate": 6.974542300279912e-08, "loss": 0.3422, "step": 15638 }, { "epoch": 0.9383212335753285, "grad_norm": 1.5413047075271606, "learning_rate": 6.961038018574689e-08, "loss": 0.3846, "step": 15639 }, { "epoch": 0.9383812323753525, "grad_norm": 1.4011539220809937, "learning_rate": 6.947546691888167e-08, "loss": 0.3217, "step": 15640 }, { "epoch": 0.9384412311753765, "grad_norm": 1.428575873374939, "learning_rate": 6.934068320729853e-08, "loss": 0.3809, "step": 15641 }, { "epoch": 0.9385012299754005, "grad_norm": 1.360094666481018, "learning_rate": 6.920602905608742e-08, "loss": 0.3337, "step": 15642 }, { "epoch": 0.9385612287754245, "grad_norm": 1.3178722858428955, "learning_rate": 6.907150447033405e-08, "loss": 0.3875, "step": 15643 }, { "epoch": 0.9386212275754485, "grad_norm": 1.3172954320907593, "learning_rate": 6.89371094551191e-08, "loss": 0.371, "step": 15644 }, { "epoch": 0.9386812263754725, "grad_norm": 1.2551155090332031, "learning_rate": 6.880284401551739e-08, "loss": 0.3455, "step": 15645 }, { "epoch": 0.9387412251754965, "grad_norm": 1.4100232124328613, "learning_rate": 6.866870815659987e-08, "loss": 0.3595, "step": 15646 }, { "epoch": 0.9388012239755205, "grad_norm": 1.2848496437072754, "learning_rate": 6.853470188343163e-08, "loss": 0.3876, "step": 15647 }, { "epoch": 0.9388612227755445, "grad_norm": 1.3734595775604248, "learning_rate": 6.840082520107509e-08, "loss": 0.3816, "step": 15648 }, { "epoch": 0.9389212215755685, "grad_norm": 1.3736168146133423, "learning_rate": 6.82670781145841e-08, "loss": 0.3609, "step": 15649 }, { "epoch": 0.9389812203755925, "grad_norm": 1.4076143503189087, "learning_rate": 6.813346062901054e-08, "loss": 0.386, "step": 15650 }, { "epoch": 0.9390412191756164, "grad_norm": 1.591082215309143, "learning_rate": 6.799997274940129e-08, "loss": 0.4142, "step": 15651 }, { "epoch": 0.9391012179756405, "grad_norm": 1.3171706199645996, "learning_rate": 6.786661448079656e-08, "loss": 0.3657, "step": 15652 }, { "epoch": 0.9391612167756644, "grad_norm": 1.3952534198760986, "learning_rate": 6.773338582823313e-08, "loss": 0.3694, "step": 15653 }, { "epoch": 0.9392212155756885, "grad_norm": 1.3698126077651978, "learning_rate": 6.760028679674229e-08, "loss": 0.3509, "step": 15654 }, { "epoch": 0.9392812143757124, "grad_norm": 1.5292936563491821, "learning_rate": 6.74673173913507e-08, "loss": 0.3873, "step": 15655 }, { "epoch": 0.9393412131757365, "grad_norm": 1.3189318180084229, "learning_rate": 6.733447761707995e-08, "loss": 0.3674, "step": 15656 }, { "epoch": 0.9394012119757604, "grad_norm": 1.3292930126190186, "learning_rate": 6.720176747894657e-08, "loss": 0.4163, "step": 15657 }, { "epoch": 0.9394612107757845, "grad_norm": 1.4671516418457031, "learning_rate": 6.706918698196285e-08, "loss": 0.3687, "step": 15658 }, { "epoch": 0.9395212095758085, "grad_norm": 1.329255223274231, "learning_rate": 6.69367361311356e-08, "loss": 0.3715, "step": 15659 }, { "epoch": 0.9395812083758325, "grad_norm": 1.5701961517333984, "learning_rate": 6.680441493146699e-08, "loss": 0.4131, "step": 15660 }, { "epoch": 0.9396412071758565, "grad_norm": 1.3336360454559326, "learning_rate": 6.667222338795376e-08, "loss": 0.337, "step": 15661 }, { "epoch": 0.9397012059758805, "grad_norm": 1.269178032875061, "learning_rate": 6.654016150558794e-08, "loss": 0.3595, "step": 15662 }, { "epoch": 0.9397612047759045, "grad_norm": 1.3241153955459595, "learning_rate": 6.640822928935813e-08, "loss": 0.3428, "step": 15663 }, { "epoch": 0.9398212035759285, "grad_norm": 1.4824484586715698, "learning_rate": 6.627642674424627e-08, "loss": 0.3652, "step": 15664 }, { "epoch": 0.9398812023759525, "grad_norm": 1.4721447229385376, "learning_rate": 6.614475387522928e-08, "loss": 0.3745, "step": 15665 }, { "epoch": 0.9399412011759765, "grad_norm": 1.3364769220352173, "learning_rate": 6.601321068728017e-08, "loss": 0.3907, "step": 15666 }, { "epoch": 0.9400011999760005, "grad_norm": 1.3010507822036743, "learning_rate": 6.58817971853673e-08, "loss": 0.3903, "step": 15667 }, { "epoch": 0.9400611987760245, "grad_norm": 1.409053087234497, "learning_rate": 6.57505133744532e-08, "loss": 0.4108, "step": 15668 }, { "epoch": 0.9401211975760485, "grad_norm": 1.2912956476211548, "learning_rate": 6.561935925949535e-08, "loss": 0.3702, "step": 15669 }, { "epoch": 0.9401811963760724, "grad_norm": 1.3394516706466675, "learning_rate": 6.548833484544774e-08, "loss": 0.3681, "step": 15670 }, { "epoch": 0.9402411951760965, "grad_norm": 1.3519351482391357, "learning_rate": 6.535744013725813e-08, "loss": 0.3646, "step": 15671 }, { "epoch": 0.9403011939761204, "grad_norm": 1.218832015991211, "learning_rate": 6.522667513986963e-08, "loss": 0.3387, "step": 15672 }, { "epoch": 0.9403611927761445, "grad_norm": 1.2564585208892822, "learning_rate": 6.509603985822065e-08, "loss": 0.3691, "step": 15673 }, { "epoch": 0.9404211915761684, "grad_norm": 1.4054521322250366, "learning_rate": 6.4965534297245e-08, "loss": 0.3855, "step": 15674 }, { "epoch": 0.9404811903761925, "grad_norm": 1.520411729812622, "learning_rate": 6.483515846187137e-08, "loss": 0.3818, "step": 15675 }, { "epoch": 0.9405411891762164, "grad_norm": 1.269524097442627, "learning_rate": 6.470491235702308e-08, "loss": 0.3657, "step": 15676 }, { "epoch": 0.9406011879762405, "grad_norm": 1.3420100212097168, "learning_rate": 6.457479598761911e-08, "loss": 0.3245, "step": 15677 }, { "epoch": 0.9406611867762644, "grad_norm": 1.3702126741409302, "learning_rate": 6.444480935857306e-08, "loss": 0.3586, "step": 15678 }, { "epoch": 0.9407211855762885, "grad_norm": 1.3679760694503784, "learning_rate": 6.431495247479457e-08, "loss": 0.3823, "step": 15679 }, { "epoch": 0.9407811843763125, "grad_norm": 1.4008357524871826, "learning_rate": 6.418522534118753e-08, "loss": 0.4173, "step": 15680 }, { "epoch": 0.9408411831763365, "grad_norm": 1.4505162239074707, "learning_rate": 6.405562796265035e-08, "loss": 0.3886, "step": 15681 }, { "epoch": 0.9409011819763605, "grad_norm": 1.3112781047821045, "learning_rate": 6.392616034407833e-08, "loss": 0.4228, "step": 15682 }, { "epoch": 0.9409611807763845, "grad_norm": 1.3347247838974, "learning_rate": 6.379682249036095e-08, "loss": 0.4061, "step": 15683 }, { "epoch": 0.9410211795764085, "grad_norm": 1.3731968402862549, "learning_rate": 6.366761440638146e-08, "loss": 0.3835, "step": 15684 }, { "epoch": 0.9410811783764325, "grad_norm": 1.4799801111221313, "learning_rate": 6.353853609702082e-08, "loss": 0.3739, "step": 15685 }, { "epoch": 0.9411411771764565, "grad_norm": 1.4090594053268433, "learning_rate": 6.340958756715331e-08, "loss": 0.3729, "step": 15686 }, { "epoch": 0.9412011759764805, "grad_norm": 1.310184121131897, "learning_rate": 6.328076882164823e-08, "loss": 0.3399, "step": 15687 }, { "epoch": 0.9412611747765045, "grad_norm": 1.2864259481430054, "learning_rate": 6.315207986537096e-08, "loss": 0.3481, "step": 15688 }, { "epoch": 0.9413211735765284, "grad_norm": 1.3646358251571655, "learning_rate": 6.302352070318146e-08, "loss": 0.3325, "step": 15689 }, { "epoch": 0.9413811723765525, "grad_norm": 1.498655080795288, "learning_rate": 6.289509133993459e-08, "loss": 0.4253, "step": 15690 }, { "epoch": 0.9414411711765764, "grad_norm": 1.186931848526001, "learning_rate": 6.276679178048061e-08, "loss": 0.327, "step": 15691 }, { "epoch": 0.9415011699766005, "grad_norm": 1.403265357017517, "learning_rate": 6.263862202966546e-08, "loss": 0.3717, "step": 15692 }, { "epoch": 0.9415611687766244, "grad_norm": 1.313995361328125, "learning_rate": 6.251058209232851e-08, "loss": 0.3224, "step": 15693 }, { "epoch": 0.9416211675766485, "grad_norm": 1.338603138923645, "learning_rate": 6.238267197330599e-08, "loss": 0.3379, "step": 15694 }, { "epoch": 0.9416811663766724, "grad_norm": 1.391278862953186, "learning_rate": 6.225489167742793e-08, "loss": 0.3862, "step": 15695 }, { "epoch": 0.9417411651766965, "grad_norm": 1.2837722301483154, "learning_rate": 6.212724120952045e-08, "loss": 0.3659, "step": 15696 }, { "epoch": 0.9418011639767204, "grad_norm": 1.3089051246643066, "learning_rate": 6.19997205744039e-08, "loss": 0.3303, "step": 15697 }, { "epoch": 0.9418611627767445, "grad_norm": 1.4927977323532104, "learning_rate": 6.187232977689505e-08, "loss": 0.3479, "step": 15698 }, { "epoch": 0.9419211615767684, "grad_norm": 1.3521336317062378, "learning_rate": 6.174506882180375e-08, "loss": 0.3761, "step": 15699 }, { "epoch": 0.9419811603767925, "grad_norm": 1.4013739824295044, "learning_rate": 6.16179377139363e-08, "loss": 0.3821, "step": 15700 }, { "epoch": 0.9420411591768164, "grad_norm": 1.3727610111236572, "learning_rate": 6.149093645809478e-08, "loss": 0.3821, "step": 15701 }, { "epoch": 0.9421011579768405, "grad_norm": 1.3448818922042847, "learning_rate": 6.13640650590746e-08, "loss": 0.3757, "step": 15702 }, { "epoch": 0.9421611567768645, "grad_norm": 1.4431467056274414, "learning_rate": 6.123732352166695e-08, "loss": 0.3512, "step": 15703 }, { "epoch": 0.9422211555768885, "grad_norm": 1.376266360282898, "learning_rate": 6.111071185065869e-08, "loss": 0.3598, "step": 15704 }, { "epoch": 0.9422811543769125, "grad_norm": 1.4637395143508911, "learning_rate": 6.09842300508317e-08, "loss": 0.4127, "step": 15705 }, { "epoch": 0.9423411531769365, "grad_norm": 1.2419418096542358, "learning_rate": 6.085787812696198e-08, "loss": 0.3376, "step": 15706 }, { "epoch": 0.9424011519769605, "grad_norm": 1.2642276287078857, "learning_rate": 6.073165608382126e-08, "loss": 0.3156, "step": 15707 }, { "epoch": 0.9424611507769844, "grad_norm": 1.4088283777236938, "learning_rate": 6.06055639261766e-08, "loss": 0.3494, "step": 15708 }, { "epoch": 0.9425211495770085, "grad_norm": 1.3610763549804688, "learning_rate": 6.047960165879007e-08, "loss": 0.4215, "step": 15709 }, { "epoch": 0.9425811483770324, "grad_norm": 1.278393268585205, "learning_rate": 6.0353769286419e-08, "loss": 0.3355, "step": 15710 }, { "epoch": 0.9426411471770565, "grad_norm": 1.3103114366531372, "learning_rate": 6.022806681381493e-08, "loss": 0.386, "step": 15711 }, { "epoch": 0.9427011459770804, "grad_norm": 1.3534497022628784, "learning_rate": 6.010249424572472e-08, "loss": 0.3725, "step": 15712 }, { "epoch": 0.9427611447771045, "grad_norm": 1.4036563634872437, "learning_rate": 5.997705158689176e-08, "loss": 0.3779, "step": 15713 }, { "epoch": 0.9428211435771284, "grad_norm": 1.4586964845657349, "learning_rate": 5.985173884205241e-08, "loss": 0.3841, "step": 15714 }, { "epoch": 0.9428811423771525, "grad_norm": 1.2503503561019897, "learning_rate": 5.972655601593957e-08, "loss": 0.34, "step": 15715 }, { "epoch": 0.9429411411771764, "grad_norm": 1.4470356702804565, "learning_rate": 5.960150311328144e-08, "loss": 0.4069, "step": 15716 }, { "epoch": 0.9430011399772005, "grad_norm": 1.2794841527938843, "learning_rate": 5.947658013879964e-08, "loss": 0.3292, "step": 15717 }, { "epoch": 0.9430611387772244, "grad_norm": 1.3775434494018555, "learning_rate": 5.935178709721267e-08, "loss": 0.3818, "step": 15718 }, { "epoch": 0.9431211375772485, "grad_norm": 1.3720887899398804, "learning_rate": 5.922712399323282e-08, "loss": 0.3377, "step": 15719 }, { "epoch": 0.9431811363772724, "grad_norm": 1.3903366327285767, "learning_rate": 5.910259083156849e-08, "loss": 0.3752, "step": 15720 }, { "epoch": 0.9432411351772965, "grad_norm": 1.3599655628204346, "learning_rate": 5.8978187616923024e-08, "loss": 0.4121, "step": 15721 }, { "epoch": 0.9433011339773204, "grad_norm": 1.297777771949768, "learning_rate": 5.885391435399356e-08, "loss": 0.4172, "step": 15722 }, { "epoch": 0.9433611327773445, "grad_norm": 1.4390803575515747, "learning_rate": 5.872977104747451e-08, "loss": 0.3497, "step": 15723 }, { "epoch": 0.9434211315773684, "grad_norm": 1.2148463726043701, "learning_rate": 5.8605757702052896e-08, "loss": 0.3984, "step": 15724 }, { "epoch": 0.9434811303773925, "grad_norm": 1.214595079421997, "learning_rate": 5.848187432241342e-08, "loss": 0.3329, "step": 15725 }, { "epoch": 0.9435411291774165, "grad_norm": 1.4113491773605347, "learning_rate": 5.835812091323378e-08, "loss": 0.3364, "step": 15726 }, { "epoch": 0.9436011279774404, "grad_norm": 1.3443681001663208, "learning_rate": 5.8234497479187794e-08, "loss": 0.4243, "step": 15727 }, { "epoch": 0.9436611267774645, "grad_norm": 1.3177428245544434, "learning_rate": 5.811100402494462e-08, "loss": 0.3371, "step": 15728 }, { "epoch": 0.9437211255774884, "grad_norm": 1.422584056854248, "learning_rate": 5.79876405551672e-08, "loss": 0.3889, "step": 15729 }, { "epoch": 0.9437811243775125, "grad_norm": 1.2924106121063232, "learning_rate": 5.786440707451495e-08, "loss": 0.4174, "step": 15730 }, { "epoch": 0.9438411231775364, "grad_norm": 1.2972455024719238, "learning_rate": 5.77413035876419e-08, "loss": 0.3189, "step": 15731 }, { "epoch": 0.9439011219775605, "grad_norm": 1.2876769304275513, "learning_rate": 5.761833009919659e-08, "loss": 0.3514, "step": 15732 }, { "epoch": 0.9439611207775844, "grad_norm": 1.2799203395843506, "learning_rate": 5.7495486613823696e-08, "loss": 0.3865, "step": 15733 }, { "epoch": 0.9440211195776085, "grad_norm": 1.4018468856811523, "learning_rate": 5.7372773136162066e-08, "loss": 0.3849, "step": 15734 }, { "epoch": 0.9440811183776324, "grad_norm": 1.3289899826049805, "learning_rate": 5.7250189670846664e-08, "loss": 0.3941, "step": 15735 }, { "epoch": 0.9441411171776565, "grad_norm": 1.347098469734192, "learning_rate": 5.712773622250622e-08, "loss": 0.3739, "step": 15736 }, { "epoch": 0.9442011159776804, "grad_norm": 1.2938252687454224, "learning_rate": 5.7005412795765205e-08, "loss": 0.3337, "step": 15737 }, { "epoch": 0.9442611147777045, "grad_norm": 1.3212906122207642, "learning_rate": 5.6883219395243825e-08, "loss": 0.3803, "step": 15738 }, { "epoch": 0.9443211135777284, "grad_norm": 1.3422545194625854, "learning_rate": 5.6761156025556034e-08, "loss": 0.3532, "step": 15739 }, { "epoch": 0.9443811123777525, "grad_norm": 1.3815993070602417, "learning_rate": 5.6639222691312716e-08, "loss": 0.3546, "step": 15740 }, { "epoch": 0.9444411111777764, "grad_norm": 1.4595158100128174, "learning_rate": 5.651741939711774e-08, "loss": 0.4139, "step": 15741 }, { "epoch": 0.9445011099778005, "grad_norm": 1.3891587257385254, "learning_rate": 5.639574614757109e-08, "loss": 0.372, "step": 15742 }, { "epoch": 0.9445611087778244, "grad_norm": 1.25898277759552, "learning_rate": 5.627420294726809e-08, "loss": 0.3065, "step": 15743 }, { "epoch": 0.9446211075778485, "grad_norm": 1.3390589952468872, "learning_rate": 5.615278980079941e-08, "loss": 0.3302, "step": 15744 }, { "epoch": 0.9446811063778724, "grad_norm": 1.420135498046875, "learning_rate": 5.6031506712749475e-08, "loss": 0.3761, "step": 15745 }, { "epoch": 0.9447411051778964, "grad_norm": 1.3164228200912476, "learning_rate": 5.591035368769848e-08, "loss": 0.3013, "step": 15746 }, { "epoch": 0.9448011039779204, "grad_norm": 1.2587196826934814, "learning_rate": 5.578933073022269e-08, "loss": 0.3389, "step": 15747 }, { "epoch": 0.9448611027779444, "grad_norm": 1.514989972114563, "learning_rate": 5.566843784489178e-08, "loss": 0.3596, "step": 15748 }, { "epoch": 0.9449211015779685, "grad_norm": 1.5944576263427734, "learning_rate": 5.554767503627156e-08, "loss": 0.3884, "step": 15749 }, { "epoch": 0.9449811003779924, "grad_norm": 1.3650648593902588, "learning_rate": 5.5427042308923144e-08, "loss": 0.3097, "step": 15750 }, { "epoch": 0.9450410991780165, "grad_norm": 1.4003093242645264, "learning_rate": 5.5306539667401845e-08, "loss": 0.3609, "step": 15751 }, { "epoch": 0.9451010979780404, "grad_norm": 1.3130871057510376, "learning_rate": 5.518616711625868e-08, "loss": 0.364, "step": 15752 }, { "epoch": 0.9451610967780645, "grad_norm": 1.4257557392120361, "learning_rate": 5.5065924660038844e-08, "loss": 0.3928, "step": 15753 }, { "epoch": 0.9452210955780884, "grad_norm": 1.2636109590530396, "learning_rate": 5.4945812303284045e-08, "loss": 0.3503, "step": 15754 }, { "epoch": 0.9452810943781125, "grad_norm": 1.3395781517028809, "learning_rate": 5.482583005053054e-08, "loss": 0.3492, "step": 15755 }, { "epoch": 0.9453410931781364, "grad_norm": 1.2787848711013794, "learning_rate": 5.470597790630954e-08, "loss": 0.3792, "step": 15756 }, { "epoch": 0.9454010919781605, "grad_norm": 1.411338210105896, "learning_rate": 5.458625587514681e-08, "loss": 0.3694, "step": 15757 }, { "epoch": 0.9454610907781844, "grad_norm": 1.3402210474014282, "learning_rate": 5.4466663961563854e-08, "loss": 0.351, "step": 15758 }, { "epoch": 0.9455210895782085, "grad_norm": 1.3383289575576782, "learning_rate": 5.4347202170077104e-08, "loss": 0.3746, "step": 15759 }, { "epoch": 0.9455810883782324, "grad_norm": 1.2854496240615845, "learning_rate": 5.4227870505198335e-08, "loss": 0.4015, "step": 15760 }, { "epoch": 0.9456410871782565, "grad_norm": 1.3391690254211426, "learning_rate": 5.41086689714339e-08, "loss": 0.3952, "step": 15761 }, { "epoch": 0.9457010859782804, "grad_norm": 1.4002933502197266, "learning_rate": 5.3989597573285474e-08, "loss": 0.3274, "step": 15762 }, { "epoch": 0.9457610847783045, "grad_norm": 1.3646198511123657, "learning_rate": 5.387065631525045e-08, "loss": 0.3604, "step": 15763 }, { "epoch": 0.9458210835783284, "grad_norm": 1.3723021745681763, "learning_rate": 5.375184520182002e-08, "loss": 0.4121, "step": 15764 }, { "epoch": 0.9458810823783524, "grad_norm": 1.3358290195465088, "learning_rate": 5.36331642374811e-08, "loss": 0.3701, "step": 15765 }, { "epoch": 0.9459410811783764, "grad_norm": 1.218581199645996, "learning_rate": 5.351461342671554e-08, "loss": 0.3359, "step": 15766 }, { "epoch": 0.9460010799784004, "grad_norm": 1.354384422302246, "learning_rate": 5.339619277400209e-08, "loss": 0.3727, "step": 15767 }, { "epoch": 0.9460610787784244, "grad_norm": 1.3597609996795654, "learning_rate": 5.3277902283810576e-08, "loss": 0.3949, "step": 15768 }, { "epoch": 0.9461210775784484, "grad_norm": 1.3306925296783447, "learning_rate": 5.3159741960610015e-08, "loss": 0.3669, "step": 15769 }, { "epoch": 0.9461810763784725, "grad_norm": 1.4053950309753418, "learning_rate": 5.304171180886208e-08, "loss": 0.3447, "step": 15770 }, { "epoch": 0.9462410751784964, "grad_norm": 1.2765650749206543, "learning_rate": 5.2923811833024915e-08, "loss": 0.3266, "step": 15771 }, { "epoch": 0.9463010739785205, "grad_norm": 1.3599412441253662, "learning_rate": 5.280604203754968e-08, "loss": 0.3746, "step": 15772 }, { "epoch": 0.9463610727785444, "grad_norm": 1.3383504152297974, "learning_rate": 5.2688402426885214e-08, "loss": 0.3604, "step": 15773 }, { "epoch": 0.9464210715785685, "grad_norm": 1.3701997995376587, "learning_rate": 5.2570893005474126e-08, "loss": 0.368, "step": 15774 }, { "epoch": 0.9464810703785924, "grad_norm": 1.3564633131027222, "learning_rate": 5.245351377775398e-08, "loss": 0.392, "step": 15775 }, { "epoch": 0.9465410691786165, "grad_norm": 1.2430795431137085, "learning_rate": 5.233626474815728e-08, "loss": 0.3654, "step": 15776 }, { "epoch": 0.9466010679786404, "grad_norm": 1.2467237710952759, "learning_rate": 5.221914592111226e-08, "loss": 0.3318, "step": 15777 }, { "epoch": 0.9466610667786645, "grad_norm": 1.245471715927124, "learning_rate": 5.2102157301042503e-08, "loss": 0.3573, "step": 15778 }, { "epoch": 0.9467210655786884, "grad_norm": 1.3183366060256958, "learning_rate": 5.198529889236536e-08, "loss": 0.391, "step": 15779 }, { "epoch": 0.9467810643787125, "grad_norm": 1.373258113861084, "learning_rate": 5.1868570699494295e-08, "loss": 0.3868, "step": 15780 }, { "epoch": 0.9468410631787364, "grad_norm": 1.3240559101104736, "learning_rate": 5.175197272683812e-08, "loss": 0.431, "step": 15781 }, { "epoch": 0.9469010619787604, "grad_norm": 1.210614562034607, "learning_rate": 5.163550497879943e-08, "loss": 0.3724, "step": 15782 }, { "epoch": 0.9469610607787844, "grad_norm": 1.4150878190994263, "learning_rate": 5.15191674597773e-08, "loss": 0.3665, "step": 15783 }, { "epoch": 0.9470210595788084, "grad_norm": 1.291060447692871, "learning_rate": 5.1402960174164235e-08, "loss": 0.363, "step": 15784 }, { "epoch": 0.9470810583788324, "grad_norm": 1.4143552780151367, "learning_rate": 5.128688312634999e-08, "loss": 0.3339, "step": 15785 }, { "epoch": 0.9471410571788564, "grad_norm": 1.258900761604309, "learning_rate": 5.1170936320718505e-08, "loss": 0.3833, "step": 15786 }, { "epoch": 0.9472010559788804, "grad_norm": 1.2776843309402466, "learning_rate": 5.1055119761647114e-08, "loss": 0.371, "step": 15787 }, { "epoch": 0.9472610547789044, "grad_norm": 1.3284525871276855, "learning_rate": 5.0939433453510816e-08, "loss": 0.3572, "step": 15788 }, { "epoch": 0.9473210535789284, "grad_norm": 1.2780600786209106, "learning_rate": 5.0823877400678007e-08, "loss": 0.3529, "step": 15789 }, { "epoch": 0.9473810523789524, "grad_norm": 1.4574103355407715, "learning_rate": 5.0708451607513194e-08, "loss": 0.3755, "step": 15790 }, { "epoch": 0.9474410511789764, "grad_norm": 1.2910751104354858, "learning_rate": 5.059315607837545e-08, "loss": 0.3284, "step": 15791 }, { "epoch": 0.9475010499790004, "grad_norm": 1.4064561128616333, "learning_rate": 5.0477990817618e-08, "loss": 0.3815, "step": 15792 }, { "epoch": 0.9475610487790245, "grad_norm": 1.3869459629058838, "learning_rate": 5.036295582959138e-08, "loss": 0.3767, "step": 15793 }, { "epoch": 0.9476210475790484, "grad_norm": 1.17483651638031, "learning_rate": 5.0248051118639505e-08, "loss": 0.3382, "step": 15794 }, { "epoch": 0.9476810463790725, "grad_norm": 1.1935274600982666, "learning_rate": 5.0133276689101224e-08, "loss": 0.3303, "step": 15795 }, { "epoch": 0.9477410451790964, "grad_norm": 1.4150676727294922, "learning_rate": 5.001863254531191e-08, "loss": 0.4305, "step": 15796 }, { "epoch": 0.9478010439791205, "grad_norm": 1.4462034702301025, "learning_rate": 4.990411869160072e-08, "loss": 0.361, "step": 15797 }, { "epoch": 0.9478610427791444, "grad_norm": 1.3972660303115845, "learning_rate": 4.9789735132292513e-08, "loss": 0.3773, "step": 15798 }, { "epoch": 0.9479210415791685, "grad_norm": 1.2648508548736572, "learning_rate": 4.967548187170673e-08, "loss": 0.3396, "step": 15799 }, { "epoch": 0.9479810403791924, "grad_norm": 1.3377035856246948, "learning_rate": 4.9561358914158136e-08, "loss": 0.3366, "step": 15800 }, { "epoch": 0.9480410391792164, "grad_norm": 1.4907195568084717, "learning_rate": 4.9447366263957235e-08, "loss": 0.3701, "step": 15801 }, { "epoch": 0.9481010379792404, "grad_norm": 1.3511698246002197, "learning_rate": 4.9333503925408304e-08, "loss": 0.372, "step": 15802 }, { "epoch": 0.9481610367792644, "grad_norm": 1.4215030670166016, "learning_rate": 4.921977190281213e-08, "loss": 0.3892, "step": 15803 }, { "epoch": 0.9482210355792884, "grad_norm": 1.370628833770752, "learning_rate": 4.910617020046287e-08, "loss": 0.4061, "step": 15804 }, { "epoch": 0.9482810343793124, "grad_norm": 1.3270894289016724, "learning_rate": 4.8992698822652e-08, "loss": 0.3426, "step": 15805 }, { "epoch": 0.9483410331793364, "grad_norm": 1.4287487268447876, "learning_rate": 4.887935777366359e-08, "loss": 0.3273, "step": 15806 }, { "epoch": 0.9484010319793604, "grad_norm": 1.4220432043075562, "learning_rate": 4.87661470577786e-08, "loss": 0.3708, "step": 15807 }, { "epoch": 0.9484610307793844, "grad_norm": 1.4524774551391602, "learning_rate": 4.865306667927294e-08, "loss": 0.376, "step": 15808 }, { "epoch": 0.9485210295794084, "grad_norm": 1.391080617904663, "learning_rate": 4.854011664241631e-08, "loss": 0.3662, "step": 15809 }, { "epoch": 0.9485810283794324, "grad_norm": 1.284376621246338, "learning_rate": 4.8427296951474905e-08, "loss": 0.3398, "step": 15810 }, { "epoch": 0.9486410271794564, "grad_norm": 1.3307033777236938, "learning_rate": 4.8314607610708713e-08, "loss": 0.323, "step": 15811 }, { "epoch": 0.9487010259794804, "grad_norm": 1.3269200325012207, "learning_rate": 4.8202048624374216e-08, "loss": 0.3951, "step": 15812 }, { "epoch": 0.9487610247795044, "grad_norm": 1.401715874671936, "learning_rate": 4.808961999672206e-08, "loss": 0.4167, "step": 15813 }, { "epoch": 0.9488210235795284, "grad_norm": 1.3090927600860596, "learning_rate": 4.7977321731997866e-08, "loss": 0.3334, "step": 15814 }, { "epoch": 0.9488810223795524, "grad_norm": 1.2441024780273438, "learning_rate": 4.786515383444334e-08, "loss": 0.3259, "step": 15815 }, { "epoch": 0.9489410211795765, "grad_norm": 1.3932652473449707, "learning_rate": 4.77531163082936e-08, "loss": 0.3671, "step": 15816 }, { "epoch": 0.9490010199796004, "grad_norm": 1.4093592166900635, "learning_rate": 4.764120915778064e-08, "loss": 0.3698, "step": 15817 }, { "epoch": 0.9490610187796245, "grad_norm": 1.4127893447875977, "learning_rate": 4.7529432387129864e-08, "loss": 0.3449, "step": 15818 }, { "epoch": 0.9491210175796484, "grad_norm": 1.308592438697815, "learning_rate": 4.741778600056318e-08, "loss": 0.3534, "step": 15819 }, { "epoch": 0.9491810163796724, "grad_norm": 1.3060293197631836, "learning_rate": 4.7306270002296644e-08, "loss": 0.415, "step": 15820 }, { "epoch": 0.9492410151796964, "grad_norm": 1.3590502738952637, "learning_rate": 4.719488439654207e-08, "loss": 0.3734, "step": 15821 }, { "epoch": 0.9493010139797204, "grad_norm": 1.3696990013122559, "learning_rate": 4.70836291875058e-08, "loss": 0.3689, "step": 15822 }, { "epoch": 0.9493610127797444, "grad_norm": 1.355020523071289, "learning_rate": 4.6972504379389155e-08, "loss": 0.3648, "step": 15823 }, { "epoch": 0.9494210115797684, "grad_norm": 1.425874948501587, "learning_rate": 4.686150997638955e-08, "loss": 0.4108, "step": 15824 }, { "epoch": 0.9494810103797924, "grad_norm": 1.3002784252166748, "learning_rate": 4.675064598269779e-08, "loss": 0.3202, "step": 15825 }, { "epoch": 0.9495410091798164, "grad_norm": 1.3106647729873657, "learning_rate": 4.663991240250121e-08, "loss": 0.2928, "step": 15826 }, { "epoch": 0.9496010079798404, "grad_norm": 1.4945980310440063, "learning_rate": 4.652930923998205e-08, "loss": 0.4016, "step": 15827 }, { "epoch": 0.9496610067798644, "grad_norm": 1.3322265148162842, "learning_rate": 4.641883649931677e-08, "loss": 0.3367, "step": 15828 }, { "epoch": 0.9497210055798884, "grad_norm": 1.2095935344696045, "learning_rate": 4.630849418467753e-08, "loss": 0.3459, "step": 15829 }, { "epoch": 0.9497810043799124, "grad_norm": 1.4178849458694458, "learning_rate": 4.619828230023104e-08, "loss": 0.3579, "step": 15830 }, { "epoch": 0.9498410031799364, "grad_norm": 1.2945308685302734, "learning_rate": 4.608820085014015e-08, "loss": 0.3369, "step": 15831 }, { "epoch": 0.9499010019799604, "grad_norm": 1.420763373374939, "learning_rate": 4.597824983856225e-08, "loss": 0.3609, "step": 15832 }, { "epoch": 0.9499610007799844, "grad_norm": 1.4264572858810425, "learning_rate": 4.586842926964929e-08, "loss": 0.4042, "step": 15833 }, { "epoch": 0.9500209995800084, "grad_norm": 1.345712661743164, "learning_rate": 4.575873914754858e-08, "loss": 0.3362, "step": 15834 }, { "epoch": 0.9500809983800323, "grad_norm": 1.2570366859436035, "learning_rate": 4.564917947640273e-08, "loss": 0.4, "step": 15835 }, { "epoch": 0.9501409971800564, "grad_norm": 1.2715530395507812, "learning_rate": 4.5539750260349734e-08, "loss": 0.348, "step": 15836 }, { "epoch": 0.9502009959800803, "grad_norm": 1.354499101638794, "learning_rate": 4.5430451503521705e-08, "loss": 0.3415, "step": 15837 }, { "epoch": 0.9502609947801044, "grad_norm": 1.2842531204223633, "learning_rate": 4.532128321004652e-08, "loss": 0.326, "step": 15838 }, { "epoch": 0.9503209935801284, "grad_norm": 1.4503812789916992, "learning_rate": 4.5212245384047e-08, "loss": 0.4398, "step": 15839 }, { "epoch": 0.9503809923801524, "grad_norm": 1.3369601964950562, "learning_rate": 4.510333802964089e-08, "loss": 0.3783, "step": 15840 }, { "epoch": 0.9504409911801764, "grad_norm": 1.380053997039795, "learning_rate": 4.499456115094169e-08, "loss": 0.3599, "step": 15841 }, { "epoch": 0.9505009899802004, "grad_norm": 1.3909224271774292, "learning_rate": 4.4885914752056276e-08, "loss": 0.384, "step": 15842 }, { "epoch": 0.9505609887802244, "grad_norm": 1.37173330783844, "learning_rate": 4.477739883708881e-08, "loss": 0.3539, "step": 15843 }, { "epoch": 0.9506209875802484, "grad_norm": 1.3898321390151978, "learning_rate": 4.466901341013724e-08, "loss": 0.3589, "step": 15844 }, { "epoch": 0.9506809863802724, "grad_norm": 1.3616036176681519, "learning_rate": 4.456075847529406e-08, "loss": 0.3876, "step": 15845 }, { "epoch": 0.9507409851802964, "grad_norm": 1.3231626749038696, "learning_rate": 4.445263403664829e-08, "loss": 0.3741, "step": 15846 }, { "epoch": 0.9508009839803204, "grad_norm": 1.4669264554977417, "learning_rate": 4.4344640098283094e-08, "loss": 0.3775, "step": 15847 }, { "epoch": 0.9508609827803444, "grad_norm": 1.3285139799118042, "learning_rate": 4.4236776664277e-08, "loss": 0.3616, "step": 15848 }, { "epoch": 0.9509209815803684, "grad_norm": 1.3714667558670044, "learning_rate": 4.412904373870346e-08, "loss": 0.3265, "step": 15849 }, { "epoch": 0.9509809803803924, "grad_norm": 1.4944186210632324, "learning_rate": 4.4021441325630904e-08, "loss": 0.409, "step": 15850 }, { "epoch": 0.9510409791804164, "grad_norm": 1.271280288696289, "learning_rate": 4.391396942912307e-08, "loss": 0.3502, "step": 15851 }, { "epoch": 0.9511009779804404, "grad_norm": 1.4939831495285034, "learning_rate": 4.3806628053239035e-08, "loss": 0.3906, "step": 15852 }, { "epoch": 0.9511609767804644, "grad_norm": 1.3721375465393066, "learning_rate": 4.369941720203207e-08, "loss": 0.3803, "step": 15853 }, { "epoch": 0.9512209755804883, "grad_norm": 1.38939368724823, "learning_rate": 4.359233687955116e-08, "loss": 0.3781, "step": 15854 }, { "epoch": 0.9512809743805124, "grad_norm": 1.4180690050125122, "learning_rate": 4.348538708984062e-08, "loss": 0.3877, "step": 15855 }, { "epoch": 0.9513409731805363, "grad_norm": 1.501547932624817, "learning_rate": 4.337856783693894e-08, "loss": 0.3621, "step": 15856 }, { "epoch": 0.9514009719805604, "grad_norm": 1.279340147972107, "learning_rate": 4.327187912488034e-08, "loss": 0.3586, "step": 15857 }, { "epoch": 0.9514609707805843, "grad_norm": 1.4200519323349, "learning_rate": 4.316532095769399e-08, "loss": 0.402, "step": 15858 }, { "epoch": 0.9515209695806084, "grad_norm": 1.3536667823791504, "learning_rate": 4.305889333940477e-08, "loss": 0.3519, "step": 15859 }, { "epoch": 0.9515809683806324, "grad_norm": 1.333735704421997, "learning_rate": 4.295259627403059e-08, "loss": 0.3287, "step": 15860 }, { "epoch": 0.9516409671806564, "grad_norm": 1.327475666999817, "learning_rate": 4.2846429765587004e-08, "loss": 0.3624, "step": 15861 }, { "epoch": 0.9517009659806804, "grad_norm": 1.4445292949676514, "learning_rate": 4.2740393818082586e-08, "loss": 0.3713, "step": 15862 }, { "epoch": 0.9517609647807044, "grad_norm": 1.2525548934936523, "learning_rate": 4.26344884355232e-08, "loss": 0.3249, "step": 15863 }, { "epoch": 0.9518209635807284, "grad_norm": 1.5040735006332397, "learning_rate": 4.252871362190652e-08, "loss": 0.4134, "step": 15864 }, { "epoch": 0.9518809623807524, "grad_norm": 1.3374073505401611, "learning_rate": 4.242306938122869e-08, "loss": 0.362, "step": 15865 }, { "epoch": 0.9519409611807764, "grad_norm": 1.21433687210083, "learning_rate": 4.231755571747847e-08, "loss": 0.3477, "step": 15866 }, { "epoch": 0.9520009599808004, "grad_norm": 1.518856406211853, "learning_rate": 4.2212172634641106e-08, "loss": 0.3629, "step": 15867 }, { "epoch": 0.9520609587808244, "grad_norm": 1.2019301652908325, "learning_rate": 4.210692013669642e-08, "loss": 0.3399, "step": 15868 }, { "epoch": 0.9521209575808484, "grad_norm": 1.302826166152954, "learning_rate": 4.2001798227619177e-08, "loss": 0.3844, "step": 15869 }, { "epoch": 0.9521809563808724, "grad_norm": 1.4139915704727173, "learning_rate": 4.189680691137948e-08, "loss": 0.4176, "step": 15870 }, { "epoch": 0.9522409551808964, "grad_norm": 1.290328860282898, "learning_rate": 4.1791946191942375e-08, "loss": 0.379, "step": 15871 }, { "epoch": 0.9523009539809204, "grad_norm": 1.2987353801727295, "learning_rate": 4.1687216073267864e-08, "loss": 0.4091, "step": 15872 }, { "epoch": 0.9523609527809443, "grad_norm": 1.4656612873077393, "learning_rate": 4.158261655931128e-08, "loss": 0.4212, "step": 15873 }, { "epoch": 0.9524209515809684, "grad_norm": 1.359100580215454, "learning_rate": 4.147814765402252e-08, "loss": 0.376, "step": 15874 }, { "epoch": 0.9524809503809923, "grad_norm": 1.1939888000488281, "learning_rate": 4.137380936134721e-08, "loss": 0.3524, "step": 15875 }, { "epoch": 0.9525409491810164, "grad_norm": 1.2575500011444092, "learning_rate": 4.1269601685225534e-08, "loss": 0.3702, "step": 15876 }, { "epoch": 0.9526009479810403, "grad_norm": 1.222170352935791, "learning_rate": 4.1165524629593e-08, "loss": 0.3078, "step": 15877 }, { "epoch": 0.9526609467810644, "grad_norm": 1.2997276782989502, "learning_rate": 4.106157819838085e-08, "loss": 0.3924, "step": 15878 }, { "epoch": 0.9527209455810883, "grad_norm": 1.3556565046310425, "learning_rate": 4.095776239551335e-08, "loss": 0.3677, "step": 15879 }, { "epoch": 0.9527809443811124, "grad_norm": 1.3194873332977295, "learning_rate": 4.085407722491202e-08, "loss": 0.4056, "step": 15880 }, { "epoch": 0.9528409431811363, "grad_norm": 1.318433165550232, "learning_rate": 4.075052269049217e-08, "loss": 0.3571, "step": 15881 }, { "epoch": 0.9529009419811604, "grad_norm": 1.3641475439071655, "learning_rate": 4.064709879616485e-08, "loss": 0.3683, "step": 15882 }, { "epoch": 0.9529609407811844, "grad_norm": 1.42573881149292, "learning_rate": 4.054380554583603e-08, "loss": 0.3557, "step": 15883 }, { "epoch": 0.9530209395812084, "grad_norm": 1.3226382732391357, "learning_rate": 4.044064294340627e-08, "loss": 0.325, "step": 15884 }, { "epoch": 0.9530809383812324, "grad_norm": 1.3796180486679077, "learning_rate": 4.033761099277183e-08, "loss": 0.4063, "step": 15885 }, { "epoch": 0.9531409371812564, "grad_norm": 1.2551847696304321, "learning_rate": 4.0234709697823944e-08, "loss": 0.3571, "step": 15886 }, { "epoch": 0.9532009359812804, "grad_norm": 1.1988483667373657, "learning_rate": 4.0131939062447996e-08, "loss": 0.3642, "step": 15887 }, { "epoch": 0.9532609347813044, "grad_norm": 1.3086880445480347, "learning_rate": 4.0029299090525495e-08, "loss": 0.3465, "step": 15888 }, { "epoch": 0.9533209335813284, "grad_norm": 1.2768269777297974, "learning_rate": 3.9926789785932894e-08, "loss": 0.3371, "step": 15889 }, { "epoch": 0.9533809323813524, "grad_norm": 1.2777115106582642, "learning_rate": 3.9824411152541596e-08, "loss": 0.3853, "step": 15890 }, { "epoch": 0.9534409311813764, "grad_norm": 1.3538634777069092, "learning_rate": 3.972216319421756e-08, "loss": 0.4003, "step": 15891 }, { "epoch": 0.9535009299814003, "grad_norm": 1.3119875192642212, "learning_rate": 3.962004591482249e-08, "loss": 0.3272, "step": 15892 }, { "epoch": 0.9535609287814244, "grad_norm": 1.3172799348831177, "learning_rate": 3.9518059318212616e-08, "loss": 0.3572, "step": 15893 }, { "epoch": 0.9536209275814483, "grad_norm": 1.443834662437439, "learning_rate": 3.9416203408240316e-08, "loss": 0.3757, "step": 15894 }, { "epoch": 0.9536809263814724, "grad_norm": 1.3045724630355835, "learning_rate": 3.9314478188750947e-08, "loss": 0.3718, "step": 15895 }, { "epoch": 0.9537409251814963, "grad_norm": 1.4434527158737183, "learning_rate": 3.921288366358716e-08, "loss": 0.3914, "step": 15896 }, { "epoch": 0.9538009239815204, "grad_norm": 1.4885281324386597, "learning_rate": 3.9111419836585395e-08, "loss": 0.3583, "step": 15897 }, { "epoch": 0.9538609227815443, "grad_norm": 1.2149006128311157, "learning_rate": 3.90100867115778e-08, "loss": 0.3505, "step": 15898 }, { "epoch": 0.9539209215815684, "grad_norm": 1.299296498298645, "learning_rate": 3.89088842923907e-08, "loss": 0.4122, "step": 15899 }, { "epoch": 0.9539809203815923, "grad_norm": 1.3896000385284424, "learning_rate": 3.8807812582846546e-08, "loss": 0.3605, "step": 15900 }, { "epoch": 0.9540409191816164, "grad_norm": 1.168338418006897, "learning_rate": 3.870687158676195e-08, "loss": 0.3115, "step": 15901 }, { "epoch": 0.9541009179816403, "grad_norm": 1.217685341835022, "learning_rate": 3.860606130794963e-08, "loss": 0.3373, "step": 15902 }, { "epoch": 0.9541609167816644, "grad_norm": 1.5534133911132812, "learning_rate": 3.850538175021573e-08, "loss": 0.3767, "step": 15903 }, { "epoch": 0.9542209155816883, "grad_norm": 1.609676480293274, "learning_rate": 3.840483291736324e-08, "loss": 0.3972, "step": 15904 }, { "epoch": 0.9542809143817124, "grad_norm": 1.1559489965438843, "learning_rate": 3.830441481318936e-08, "loss": 0.3116, "step": 15905 }, { "epoch": 0.9543409131817364, "grad_norm": 1.2997701168060303, "learning_rate": 3.82041274414866e-08, "loss": 0.3815, "step": 15906 }, { "epoch": 0.9544009119817604, "grad_norm": 1.327760934829712, "learning_rate": 3.8103970806041276e-08, "loss": 0.3641, "step": 15907 }, { "epoch": 0.9544609107817844, "grad_norm": 1.3576077222824097, "learning_rate": 3.8003944910636965e-08, "loss": 0.3461, "step": 15908 }, { "epoch": 0.9545209095818084, "grad_norm": 1.384154200553894, "learning_rate": 3.790404975905104e-08, "loss": 0.3536, "step": 15909 }, { "epoch": 0.9545809083818324, "grad_norm": 1.2992429733276367, "learning_rate": 3.780428535505581e-08, "loss": 0.331, "step": 15910 }, { "epoch": 0.9546409071818563, "grad_norm": 1.2942206859588623, "learning_rate": 3.7704651702418936e-08, "loss": 0.3509, "step": 15911 }, { "epoch": 0.9547009059818804, "grad_norm": 1.4662752151489258, "learning_rate": 3.760514880490301e-08, "loss": 0.4242, "step": 15912 }, { "epoch": 0.9547609047819043, "grad_norm": 1.2035982608795166, "learning_rate": 3.750577666626598e-08, "loss": 0.3267, "step": 15913 }, { "epoch": 0.9548209035819284, "grad_norm": 1.442083716392517, "learning_rate": 3.740653529026111e-08, "loss": 0.3899, "step": 15914 }, { "epoch": 0.9548809023819523, "grad_norm": 1.3005049228668213, "learning_rate": 3.730742468063547e-08, "loss": 0.3595, "step": 15915 }, { "epoch": 0.9549409011819764, "grad_norm": 1.453360915184021, "learning_rate": 3.7208444841132225e-08, "loss": 0.3979, "step": 15916 }, { "epoch": 0.9550008999820003, "grad_norm": 1.3159416913986206, "learning_rate": 3.710959577549028e-08, "loss": 0.385, "step": 15917 }, { "epoch": 0.9550608987820244, "grad_norm": 1.3487695455551147, "learning_rate": 3.701087748744114e-08, "loss": 0.3378, "step": 15918 }, { "epoch": 0.9551208975820483, "grad_norm": 1.4331045150756836, "learning_rate": 3.691228998071438e-08, "loss": 0.3564, "step": 15919 }, { "epoch": 0.9551808963820724, "grad_norm": 1.2829620838165283, "learning_rate": 3.681383325903259e-08, "loss": 0.3872, "step": 15920 }, { "epoch": 0.9552408951820963, "grad_norm": 1.336897373199463, "learning_rate": 3.671550732611445e-08, "loss": 0.364, "step": 15921 }, { "epoch": 0.9553008939821204, "grad_norm": 1.4877023696899414, "learning_rate": 3.661731218567205e-08, "loss": 0.4496, "step": 15922 }, { "epoch": 0.9553608927821443, "grad_norm": 1.3542149066925049, "learning_rate": 3.6519247841415134e-08, "loss": 0.4071, "step": 15923 }, { "epoch": 0.9554208915821684, "grad_norm": 1.3364620208740234, "learning_rate": 3.642131429704648e-08, "loss": 0.3441, "step": 15924 }, { "epoch": 0.9554808903821923, "grad_norm": 1.3400673866271973, "learning_rate": 3.6323511556264954e-08, "loss": 0.3554, "step": 15925 }, { "epoch": 0.9555408891822164, "grad_norm": 1.245207667350769, "learning_rate": 3.622583962276399e-08, "loss": 0.3462, "step": 15926 }, { "epoch": 0.9556008879822404, "grad_norm": 1.1725795269012451, "learning_rate": 3.612829850023197e-08, "loss": 0.3297, "step": 15927 }, { "epoch": 0.9556608867822644, "grad_norm": 1.3920994997024536, "learning_rate": 3.603088819235262e-08, "loss": 0.3816, "step": 15928 }, { "epoch": 0.9557208855822884, "grad_norm": 1.2694122791290283, "learning_rate": 3.5933608702805375e-08, "loss": 0.315, "step": 15929 }, { "epoch": 0.9557808843823123, "grad_norm": 1.291894555091858, "learning_rate": 3.5836460035262695e-08, "loss": 0.3613, "step": 15930 }, { "epoch": 0.9558408831823364, "grad_norm": 1.4161051511764526, "learning_rate": 3.57394421933947e-08, "loss": 0.3712, "step": 15931 }, { "epoch": 0.9559008819823603, "grad_norm": 1.3915156126022339, "learning_rate": 3.56425551808649e-08, "loss": 0.3945, "step": 15932 }, { "epoch": 0.9559608807823844, "grad_norm": 1.3544888496398926, "learning_rate": 3.554579900133214e-08, "loss": 0.3916, "step": 15933 }, { "epoch": 0.9560208795824083, "grad_norm": 1.343001365661621, "learning_rate": 3.544917365845024e-08, "loss": 0.3523, "step": 15934 }, { "epoch": 0.9560808783824324, "grad_norm": 1.3876121044158936, "learning_rate": 3.535267915586909e-08, "loss": 0.3582, "step": 15935 }, { "epoch": 0.9561408771824563, "grad_norm": 1.2562987804412842, "learning_rate": 3.5256315497232017e-08, "loss": 0.345, "step": 15936 }, { "epoch": 0.9562008759824804, "grad_norm": 1.3555032014846802, "learning_rate": 3.5160082686178815e-08, "loss": 0.3568, "step": 15937 }, { "epoch": 0.9562608747825043, "grad_norm": 1.2975261211395264, "learning_rate": 3.506398072634309e-08, "loss": 0.4145, "step": 15938 }, { "epoch": 0.9563208735825284, "grad_norm": 1.4860494136810303, "learning_rate": 3.4968009621354934e-08, "loss": 0.3887, "step": 15939 }, { "epoch": 0.9563808723825523, "grad_norm": 1.3019620180130005, "learning_rate": 3.4872169374838616e-08, "loss": 0.3606, "step": 15940 }, { "epoch": 0.9564408711825764, "grad_norm": 1.3416296243667603, "learning_rate": 3.477645999041296e-08, "loss": 0.355, "step": 15941 }, { "epoch": 0.9565008699826003, "grad_norm": 1.2320853471755981, "learning_rate": 3.4680881471692905e-08, "loss": 0.3943, "step": 15942 }, { "epoch": 0.9565608687826244, "grad_norm": 1.366762399673462, "learning_rate": 3.4585433822288346e-08, "loss": 0.3307, "step": 15943 }, { "epoch": 0.9566208675826483, "grad_norm": 1.3760563135147095, "learning_rate": 3.449011704580335e-08, "loss": 0.3478, "step": 15944 }, { "epoch": 0.9566808663826724, "grad_norm": 1.4937331676483154, "learning_rate": 3.439493114583769e-08, "loss": 0.3388, "step": 15945 }, { "epoch": 0.9567408651826963, "grad_norm": 1.451637625694275, "learning_rate": 3.4299876125986115e-08, "loss": 0.4067, "step": 15946 }, { "epoch": 0.9568008639827204, "grad_norm": 1.3967632055282593, "learning_rate": 3.4204951989839084e-08, "loss": 0.3565, "step": 15947 }, { "epoch": 0.9568608627827443, "grad_norm": 1.3232412338256836, "learning_rate": 3.411015874098044e-08, "loss": 0.3317, "step": 15948 }, { "epoch": 0.9569208615827683, "grad_norm": 1.349554181098938, "learning_rate": 3.401549638299056e-08, "loss": 0.3628, "step": 15949 }, { "epoch": 0.9569808603827924, "grad_norm": 1.3493984937667847, "learning_rate": 3.392096491944474e-08, "loss": 0.4142, "step": 15950 }, { "epoch": 0.9570408591828163, "grad_norm": 1.501284122467041, "learning_rate": 3.382656435391246e-08, "loss": 0.3944, "step": 15951 }, { "epoch": 0.9571008579828404, "grad_norm": 1.2728509902954102, "learning_rate": 3.373229468995892e-08, "loss": 0.3641, "step": 15952 }, { "epoch": 0.9571608567828643, "grad_norm": 1.3692032098770142, "learning_rate": 3.363815593114428e-08, "loss": 0.3753, "step": 15953 }, { "epoch": 0.9572208555828884, "grad_norm": 1.437231421470642, "learning_rate": 3.3544148081023625e-08, "loss": 0.3719, "step": 15954 }, { "epoch": 0.9572808543829123, "grad_norm": 1.3978266716003418, "learning_rate": 3.345027114314819e-08, "loss": 0.3403, "step": 15955 }, { "epoch": 0.9573408531829364, "grad_norm": 1.296758770942688, "learning_rate": 3.3356525121061406e-08, "loss": 0.3624, "step": 15956 }, { "epoch": 0.9574008519829603, "grad_norm": 1.2932497262954712, "learning_rate": 3.3262910018305556e-08, "loss": 0.3624, "step": 15957 }, { "epoch": 0.9574608507829844, "grad_norm": 1.3694264888763428, "learning_rate": 3.3169425838414364e-08, "loss": 0.3718, "step": 15958 }, { "epoch": 0.9575208495830083, "grad_norm": 1.4650990962982178, "learning_rate": 3.307607258491962e-08, "loss": 0.3934, "step": 15959 }, { "epoch": 0.9575808483830324, "grad_norm": 1.261788249015808, "learning_rate": 3.298285026134651e-08, "loss": 0.3904, "step": 15960 }, { "epoch": 0.9576408471830563, "grad_norm": 1.194108247756958, "learning_rate": 3.288975887121514e-08, "loss": 0.3384, "step": 15961 }, { "epoch": 0.9577008459830804, "grad_norm": 1.3457276821136475, "learning_rate": 3.279679841804139e-08, "loss": 0.3562, "step": 15962 }, { "epoch": 0.9577608447831043, "grad_norm": 1.4111489057540894, "learning_rate": 3.2703968905336444e-08, "loss": 0.3624, "step": 15963 }, { "epoch": 0.9578208435831284, "grad_norm": 1.3571767807006836, "learning_rate": 3.261127033660527e-08, "loss": 0.351, "step": 15964 }, { "epoch": 0.9578808423831523, "grad_norm": 1.5308725833892822, "learning_rate": 3.2518702715348956e-08, "loss": 0.3892, "step": 15965 }, { "epoch": 0.9579408411831764, "grad_norm": 1.227292776107788, "learning_rate": 3.242626604506354e-08, "loss": 0.3947, "step": 15966 }, { "epoch": 0.9580008399832003, "grad_norm": 1.3011623620986938, "learning_rate": 3.2333960329240017e-08, "loss": 0.3868, "step": 15967 }, { "epoch": 0.9580608387832243, "grad_norm": 1.3710216283798218, "learning_rate": 3.2241785571363923e-08, "loss": 0.365, "step": 15968 }, { "epoch": 0.9581208375832483, "grad_norm": 1.2986431121826172, "learning_rate": 3.214974177491653e-08, "loss": 0.3767, "step": 15969 }, { "epoch": 0.9581808363832723, "grad_norm": 1.4401828050613403, "learning_rate": 3.205782894337406e-08, "loss": 0.377, "step": 15970 }, { "epoch": 0.9582408351832963, "grad_norm": 1.2871348857879639, "learning_rate": 3.1966047080207285e-08, "loss": 0.3533, "step": 15971 }, { "epoch": 0.9583008339833203, "grad_norm": 1.3064110279083252, "learning_rate": 3.1874396188882714e-08, "loss": 0.3499, "step": 15972 }, { "epoch": 0.9583608327833444, "grad_norm": 1.5498230457305908, "learning_rate": 3.1782876272861414e-08, "loss": 0.4378, "step": 15973 }, { "epoch": 0.9584208315833683, "grad_norm": 1.320760726928711, "learning_rate": 3.169148733559979e-08, "loss": 0.3796, "step": 15974 }, { "epoch": 0.9584808303833924, "grad_norm": 1.3590930700302124, "learning_rate": 3.160022938054918e-08, "loss": 0.3204, "step": 15975 }, { "epoch": 0.9585408291834163, "grad_norm": 1.4237357378005981, "learning_rate": 3.1509102411155894e-08, "loss": 0.3848, "step": 15976 }, { "epoch": 0.9586008279834404, "grad_norm": 1.2657737731933594, "learning_rate": 3.141810643086118e-08, "loss": 0.383, "step": 15977 }, { "epoch": 0.9586608267834643, "grad_norm": 1.3060224056243896, "learning_rate": 3.132724144310201e-08, "loss": 0.3468, "step": 15978 }, { "epoch": 0.9587208255834884, "grad_norm": 1.2674123048782349, "learning_rate": 3.12365074513099e-08, "loss": 0.3161, "step": 15979 }, { "epoch": 0.9587808243835123, "grad_norm": 1.3412314653396606, "learning_rate": 3.1145904458910575e-08, "loss": 0.3684, "step": 15980 }, { "epoch": 0.9588408231835364, "grad_norm": 1.3785123825073242, "learning_rate": 3.1055432469327014e-08, "loss": 0.4066, "step": 15981 }, { "epoch": 0.9589008219835603, "grad_norm": 1.364433765411377, "learning_rate": 3.0965091485975206e-08, "loss": 0.3633, "step": 15982 }, { "epoch": 0.9589608207835844, "grad_norm": 1.3836636543273926, "learning_rate": 3.087488151226647e-08, "loss": 0.3248, "step": 15983 }, { "epoch": 0.9590208195836083, "grad_norm": 1.3054633140563965, "learning_rate": 3.0784802551608644e-08, "loss": 0.3087, "step": 15984 }, { "epoch": 0.9590808183836323, "grad_norm": 1.3558056354522705, "learning_rate": 3.0694854607402957e-08, "loss": 0.3667, "step": 15985 }, { "epoch": 0.9591408171836563, "grad_norm": 1.1935327053070068, "learning_rate": 3.060503768304673e-08, "loss": 0.3489, "step": 15986 }, { "epoch": 0.9592008159836803, "grad_norm": 1.3526943922042847, "learning_rate": 3.0515351781931486e-08, "loss": 0.3752, "step": 15987 }, { "epoch": 0.9592608147837043, "grad_norm": 1.39609694480896, "learning_rate": 3.042579690744446e-08, "loss": 0.4245, "step": 15988 }, { "epoch": 0.9593208135837283, "grad_norm": 1.3591346740722656, "learning_rate": 3.0336373062967825e-08, "loss": 0.3492, "step": 15989 }, { "epoch": 0.9593808123837523, "grad_norm": 1.206977367401123, "learning_rate": 3.024708025187872e-08, "loss": 0.3646, "step": 15990 }, { "epoch": 0.9594408111837763, "grad_norm": 1.2403315305709839, "learning_rate": 3.0157918477549226e-08, "loss": 0.3754, "step": 15991 }, { "epoch": 0.9595008099838003, "grad_norm": 1.3370383977890015, "learning_rate": 3.006888774334637e-08, "loss": 0.3468, "step": 15992 }, { "epoch": 0.9595608087838243, "grad_norm": 1.3995031118392944, "learning_rate": 2.997998805263291e-08, "loss": 0.3611, "step": 15993 }, { "epoch": 0.9596208075838483, "grad_norm": 1.266993522644043, "learning_rate": 2.9891219408765765e-08, "loss": 0.3416, "step": 15994 }, { "epoch": 0.9596808063838723, "grad_norm": 1.4038153886795044, "learning_rate": 2.9802581815097206e-08, "loss": 0.351, "step": 15995 }, { "epoch": 0.9597408051838964, "grad_norm": 1.421243667602539, "learning_rate": 2.9714075274975602e-08, "loss": 0.3487, "step": 15996 }, { "epoch": 0.9598008039839203, "grad_norm": 1.3260172605514526, "learning_rate": 2.9625699791742342e-08, "loss": 0.3744, "step": 15997 }, { "epoch": 0.9598608027839444, "grad_norm": 1.415547251701355, "learning_rate": 2.9537455368735698e-08, "loss": 0.3418, "step": 15998 }, { "epoch": 0.9599208015839683, "grad_norm": 1.3223631381988525, "learning_rate": 2.9449342009287727e-08, "loss": 0.3662, "step": 15999 }, { "epoch": 0.9599808003839924, "grad_norm": 1.3004200458526611, "learning_rate": 2.936135971672621e-08, "loss": 0.3964, "step": 16000 } ], "logging_steps": 1.0, "max_steps": 16667, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.752510618796032e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }