{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3251, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030759766225776686, "grad_norm": 0.5577891701246337, "learning_rate": 1.020408163265306e-06, "loss": 1.0798, "step": 1 }, { "epoch": 0.0006151953245155337, "grad_norm": 0.5779764580408586, "learning_rate": 2.040816326530612e-06, "loss": 1.0804, "step": 2 }, { "epoch": 0.0009227929867733005, "grad_norm": 0.5798437323526674, "learning_rate": 3.0612244897959185e-06, "loss": 1.026, "step": 3 }, { "epoch": 0.0012303906490310674, "grad_norm": 0.449357295379226, "learning_rate": 4.081632653061224e-06, "loss": 1.0361, "step": 4 }, { "epoch": 0.0015379883112888342, "grad_norm": 0.7044276595609955, "learning_rate": 5.102040816326531e-06, "loss": 1.0984, "step": 5 }, { "epoch": 0.001845585973546601, "grad_norm": 0.6060813569369938, "learning_rate": 6.122448979591837e-06, "loss": 1.0356, "step": 6 }, { "epoch": 0.002153183635804368, "grad_norm": 0.46315574692623007, "learning_rate": 7.142857142857143e-06, "loss": 1.0255, "step": 7 }, { "epoch": 0.002460781298062135, "grad_norm": 0.542230604538605, "learning_rate": 8.163265306122448e-06, "loss": 1.1513, "step": 8 }, { "epoch": 0.0027683789603199014, "grad_norm": 0.6113580171272444, "learning_rate": 9.183673469387756e-06, "loss": 1.121, "step": 9 }, { "epoch": 0.0030759766225776685, "grad_norm": 0.5751928975106643, "learning_rate": 1.0204081632653061e-05, "loss": 0.957, "step": 10 }, { "epoch": 0.003383574284835435, "grad_norm": 0.5820091965470482, "learning_rate": 1.1224489795918369e-05, "loss": 0.9904, "step": 11 }, { "epoch": 0.003691171947093202, "grad_norm": 0.7354110005324208, "learning_rate": 1.2244897959183674e-05, "loss": 1.0247, "step": 12 }, { "epoch": 0.003998769609350969, "grad_norm": 0.7798512889943024, "learning_rate": 1.3265306122448982e-05, "loss": 1.1354, "step": 13 }, { "epoch": 0.004306367271608736, "grad_norm": 0.4898079160543595, "learning_rate": 1.4285714285714285e-05, "loss": 1.0357, "step": 14 }, { "epoch": 0.004613964933866502, "grad_norm": 0.5734944344826018, "learning_rate": 1.5306122448979594e-05, "loss": 1.1152, "step": 15 }, { "epoch": 0.00492156259612427, "grad_norm": 0.6555247527083641, "learning_rate": 1.6326530612244897e-05, "loss": 1.1787, "step": 16 }, { "epoch": 0.005229160258382036, "grad_norm": 0.5223166924996843, "learning_rate": 1.7346938775510206e-05, "loss": 0.9246, "step": 17 }, { "epoch": 0.005536757920639803, "grad_norm": 0.46675704908779214, "learning_rate": 1.836734693877551e-05, "loss": 0.9333, "step": 18 }, { "epoch": 0.00584435558289757, "grad_norm": 0.5973277503716244, "learning_rate": 1.9387755102040817e-05, "loss": 1.2447, "step": 19 }, { "epoch": 0.006151953245155337, "grad_norm": 0.8661503080838212, "learning_rate": 2.0408163265306123e-05, "loss": 1.0647, "step": 20 }, { "epoch": 0.0064595509074131035, "grad_norm": 0.7941594705161089, "learning_rate": 2.1428571428571428e-05, "loss": 0.8933, "step": 21 }, { "epoch": 0.00676714856967087, "grad_norm": 0.4074823621791599, "learning_rate": 2.2448979591836737e-05, "loss": 0.788, "step": 22 }, { "epoch": 0.0070747462319286376, "grad_norm": 0.420633833549099, "learning_rate": 2.3469387755102043e-05, "loss": 0.811, "step": 23 }, { "epoch": 0.007382343894186404, "grad_norm": 0.5310584631124889, "learning_rate": 2.448979591836735e-05, "loss": 0.9631, "step": 24 }, { "epoch": 0.007689941556444171, "grad_norm": 1.0913600487237682, "learning_rate": 2.5510204081632654e-05, "loss": 1.0922, "step": 25 }, { "epoch": 0.007997539218701938, "grad_norm": 0.4996132449165011, "learning_rate": 2.6530612244897963e-05, "loss": 0.8244, "step": 26 }, { "epoch": 0.008305136880959704, "grad_norm": 0.6163186893292472, "learning_rate": 2.7551020408163265e-05, "loss": 0.927, "step": 27 }, { "epoch": 0.008612734543217471, "grad_norm": 0.9057748075716977, "learning_rate": 2.857142857142857e-05, "loss": 1.0989, "step": 28 }, { "epoch": 0.008920332205475239, "grad_norm": 0.4119266817655138, "learning_rate": 2.959183673469388e-05, "loss": 0.9225, "step": 29 }, { "epoch": 0.009227929867733005, "grad_norm": 0.5770873733595778, "learning_rate": 3.061224489795919e-05, "loss": 0.9584, "step": 30 }, { "epoch": 0.009535527529990772, "grad_norm": 0.7081276106272124, "learning_rate": 3.1632653061224494e-05, "loss": 0.9284, "step": 31 }, { "epoch": 0.00984312519224854, "grad_norm": 0.5236207954759156, "learning_rate": 3.265306122448979e-05, "loss": 0.9616, "step": 32 }, { "epoch": 0.010150722854506305, "grad_norm": 0.6566473304005301, "learning_rate": 3.36734693877551e-05, "loss": 1.0586, "step": 33 }, { "epoch": 0.010458320516764073, "grad_norm": 0.6906103494743673, "learning_rate": 3.469387755102041e-05, "loss": 0.9036, "step": 34 }, { "epoch": 0.01076591817902184, "grad_norm": 0.5887834275035978, "learning_rate": 3.571428571428572e-05, "loss": 0.9716, "step": 35 }, { "epoch": 0.011073515841279606, "grad_norm": 0.7537885616185995, "learning_rate": 3.673469387755102e-05, "loss": 1.1181, "step": 36 }, { "epoch": 0.011381113503537373, "grad_norm": 0.4014965420625961, "learning_rate": 3.775510204081633e-05, "loss": 0.8664, "step": 37 }, { "epoch": 0.01168871116579514, "grad_norm": 0.8875318358973115, "learning_rate": 3.8775510204081634e-05, "loss": 1.0479, "step": 38 }, { "epoch": 0.011996308828052906, "grad_norm": 0.830321981109944, "learning_rate": 3.979591836734694e-05, "loss": 1.0209, "step": 39 }, { "epoch": 0.012303906490310674, "grad_norm": 0.837232341127796, "learning_rate": 4.0816326530612245e-05, "loss": 0.8882, "step": 40 }, { "epoch": 0.012611504152568441, "grad_norm": 0.46601564146218594, "learning_rate": 4.183673469387756e-05, "loss": 0.7841, "step": 41 }, { "epoch": 0.012919101814826207, "grad_norm": 0.4923467455982663, "learning_rate": 4.2857142857142856e-05, "loss": 0.9687, "step": 42 }, { "epoch": 0.013226699477083975, "grad_norm": 0.5149347764837889, "learning_rate": 4.387755102040816e-05, "loss": 0.9569, "step": 43 }, { "epoch": 0.01353429713934174, "grad_norm": 0.6269342039753788, "learning_rate": 4.4897959183673474e-05, "loss": 0.8589, "step": 44 }, { "epoch": 0.013841894801599508, "grad_norm": 0.9853376287060243, "learning_rate": 4.591836734693878e-05, "loss": 1.0885, "step": 45 }, { "epoch": 0.014149492463857275, "grad_norm": 0.5421466370379916, "learning_rate": 4.6938775510204086e-05, "loss": 0.7629, "step": 46 }, { "epoch": 0.01445709012611504, "grad_norm": 0.6436295038955867, "learning_rate": 4.795918367346939e-05, "loss": 0.9695, "step": 47 }, { "epoch": 0.014764687788372808, "grad_norm": 0.5866440505573596, "learning_rate": 4.89795918367347e-05, "loss": 1.1372, "step": 48 }, { "epoch": 0.015072285450630576, "grad_norm": 1.131291013104062, "learning_rate": 5e-05, "loss": 1.0584, "step": 49 }, { "epoch": 0.015379883112888341, "grad_norm": 0.9687776644179933, "learning_rate": 5.102040816326531e-05, "loss": 1.1506, "step": 50 }, { "epoch": 0.015687480775146107, "grad_norm": 0.5374349371775545, "learning_rate": 5.2040816326530614e-05, "loss": 1.0897, "step": 51 }, { "epoch": 0.015995078437403876, "grad_norm": 0.6359898727055687, "learning_rate": 5.3061224489795926e-05, "loss": 0.8774, "step": 52 }, { "epoch": 0.016302676099661642, "grad_norm": 0.6824586835091433, "learning_rate": 5.408163265306123e-05, "loss": 0.8654, "step": 53 }, { "epoch": 0.016610273761919408, "grad_norm": 0.7667743632798805, "learning_rate": 5.510204081632653e-05, "loss": 0.9537, "step": 54 }, { "epoch": 0.016917871424177177, "grad_norm": 0.4588751634259903, "learning_rate": 5.6122448979591836e-05, "loss": 0.8961, "step": 55 }, { "epoch": 0.017225469086434943, "grad_norm": 0.659784919469143, "learning_rate": 5.714285714285714e-05, "loss": 0.9576, "step": 56 }, { "epoch": 0.01753306674869271, "grad_norm": 0.6388780029870741, "learning_rate": 5.816326530612245e-05, "loss": 1.1789, "step": 57 }, { "epoch": 0.017840664410950478, "grad_norm": 0.5818354596165453, "learning_rate": 5.918367346938776e-05, "loss": 0.9075, "step": 58 }, { "epoch": 0.018148262073208243, "grad_norm": 0.9208348058968859, "learning_rate": 6.0204081632653065e-05, "loss": 1.0758, "step": 59 }, { "epoch": 0.01845585973546601, "grad_norm": 0.5813988427792706, "learning_rate": 6.122448979591838e-05, "loss": 0.758, "step": 60 }, { "epoch": 0.01876345739772378, "grad_norm": 0.4688389182063472, "learning_rate": 6.224489795918368e-05, "loss": 0.8563, "step": 61 }, { "epoch": 0.019071055059981544, "grad_norm": 0.5028416380121905, "learning_rate": 6.326530612244899e-05, "loss": 0.8867, "step": 62 }, { "epoch": 0.01937865272223931, "grad_norm": 0.5038012932702003, "learning_rate": 6.428571428571429e-05, "loss": 0.8913, "step": 63 }, { "epoch": 0.01968625038449708, "grad_norm": 0.6274384294231521, "learning_rate": 6.530612244897959e-05, "loss": 1.0451, "step": 64 }, { "epoch": 0.019993848046754845, "grad_norm": 0.5194936735159731, "learning_rate": 6.63265306122449e-05, "loss": 0.8667, "step": 65 }, { "epoch": 0.02030144570901261, "grad_norm": 0.47449190142696734, "learning_rate": 6.73469387755102e-05, "loss": 1.0155, "step": 66 }, { "epoch": 0.02060904337127038, "grad_norm": 0.8733642768698384, "learning_rate": 6.836734693877551e-05, "loss": 0.9182, "step": 67 }, { "epoch": 0.020916641033528145, "grad_norm": 0.5884310355737608, "learning_rate": 6.938775510204082e-05, "loss": 0.9779, "step": 68 }, { "epoch": 0.02122423869578591, "grad_norm": 0.49643265802166026, "learning_rate": 7.040816326530612e-05, "loss": 0.9673, "step": 69 }, { "epoch": 0.02153183635804368, "grad_norm": 0.597376530956216, "learning_rate": 7.142857142857143e-05, "loss": 0.8982, "step": 70 }, { "epoch": 0.021839434020301446, "grad_norm": 0.4363329430050833, "learning_rate": 7.244897959183675e-05, "loss": 0.8169, "step": 71 }, { "epoch": 0.02214703168255921, "grad_norm": 0.9894153167756689, "learning_rate": 7.346938775510205e-05, "loss": 1.1401, "step": 72 }, { "epoch": 0.02245462934481698, "grad_norm": 0.4492875559340862, "learning_rate": 7.448979591836736e-05, "loss": 0.8434, "step": 73 }, { "epoch": 0.022762227007074746, "grad_norm": 0.7304604403473247, "learning_rate": 7.551020408163266e-05, "loss": 1.0202, "step": 74 }, { "epoch": 0.023069824669332512, "grad_norm": 0.6950037239295094, "learning_rate": 7.653061224489796e-05, "loss": 1.0472, "step": 75 }, { "epoch": 0.02337742233159028, "grad_norm": 0.5040925020539991, "learning_rate": 7.755102040816327e-05, "loss": 0.9203, "step": 76 }, { "epoch": 0.023685019993848047, "grad_norm": 1.0657502275576043, "learning_rate": 7.857142857142858e-05, "loss": 1.0644, "step": 77 }, { "epoch": 0.023992617656105813, "grad_norm": 0.6734223577821025, "learning_rate": 7.959183673469388e-05, "loss": 0.9636, "step": 78 }, { "epoch": 0.024300215318363582, "grad_norm": 0.6683424574139794, "learning_rate": 8.061224489795919e-05, "loss": 0.9157, "step": 79 }, { "epoch": 0.024607812980621348, "grad_norm": 0.8113981249329141, "learning_rate": 8.163265306122449e-05, "loss": 0.9711, "step": 80 }, { "epoch": 0.024915410642879113, "grad_norm": 0.6695015030419523, "learning_rate": 8.26530612244898e-05, "loss": 1.0935, "step": 81 }, { "epoch": 0.025223008305136883, "grad_norm": 0.9718459258506997, "learning_rate": 8.367346938775511e-05, "loss": 0.9026, "step": 82 }, { "epoch": 0.02553060596739465, "grad_norm": 0.563710966554852, "learning_rate": 8.469387755102041e-05, "loss": 0.9657, "step": 83 }, { "epoch": 0.025838203629652414, "grad_norm": 0.5656450360604616, "learning_rate": 8.571428571428571e-05, "loss": 0.7891, "step": 84 }, { "epoch": 0.02614580129191018, "grad_norm": 0.5256264185136631, "learning_rate": 8.673469387755102e-05, "loss": 0.9582, "step": 85 }, { "epoch": 0.02645339895416795, "grad_norm": 0.48021959536149195, "learning_rate": 8.775510204081632e-05, "loss": 0.9261, "step": 86 }, { "epoch": 0.026760996616425715, "grad_norm": 0.5348691702506266, "learning_rate": 8.877551020408164e-05, "loss": 1.119, "step": 87 }, { "epoch": 0.02706859427868348, "grad_norm": 0.4494230778444036, "learning_rate": 8.979591836734695e-05, "loss": 0.8309, "step": 88 }, { "epoch": 0.02737619194094125, "grad_norm": 0.6846739982185379, "learning_rate": 9.081632653061225e-05, "loss": 0.9042, "step": 89 }, { "epoch": 0.027683789603199015, "grad_norm": 0.7642663693477223, "learning_rate": 9.183673469387756e-05, "loss": 0.8458, "step": 90 }, { "epoch": 0.02799138726545678, "grad_norm": 0.6982189400227046, "learning_rate": 9.285714285714286e-05, "loss": 0.9062, "step": 91 }, { "epoch": 0.02829898492771455, "grad_norm": 0.5809823170209728, "learning_rate": 9.387755102040817e-05, "loss": 0.9434, "step": 92 }, { "epoch": 0.028606582589972316, "grad_norm": 0.5644127093007668, "learning_rate": 9.489795918367348e-05, "loss": 0.9854, "step": 93 }, { "epoch": 0.02891418025223008, "grad_norm": 0.5925407390942712, "learning_rate": 9.591836734693878e-05, "loss": 0.84, "step": 94 }, { "epoch": 0.02922177791448785, "grad_norm": 0.7341573635107181, "learning_rate": 9.693877551020408e-05, "loss": 1.1187, "step": 95 }, { "epoch": 0.029529375576745617, "grad_norm": 0.6018951040648721, "learning_rate": 9.79591836734694e-05, "loss": 0.791, "step": 96 }, { "epoch": 0.029836973239003382, "grad_norm": 0.5505414741749136, "learning_rate": 9.897959183673469e-05, "loss": 0.9852, "step": 97 }, { "epoch": 0.03014457090126115, "grad_norm": 0.5031295575066129, "learning_rate": 0.0001, "loss": 0.9738, "step": 98 }, { "epoch": 0.030452168563518917, "grad_norm": 0.7230071902793476, "learning_rate": 9.999997518057154e-05, "loss": 0.8606, "step": 99 }, { "epoch": 0.030759766225776683, "grad_norm": 0.5341094812505756, "learning_rate": 9.999990072231075e-05, "loss": 0.8854, "step": 100 }, { "epoch": 0.031067363888034452, "grad_norm": 0.45503400347227185, "learning_rate": 9.999977662529157e-05, "loss": 0.9946, "step": 101 }, { "epoch": 0.031374961550292214, "grad_norm": 0.5126524007417529, "learning_rate": 9.999960288963721e-05, "loss": 0.9734, "step": 102 }, { "epoch": 0.031682559212549984, "grad_norm": 0.9554864190653168, "learning_rate": 9.999937951552014e-05, "loss": 1.1145, "step": 103 }, { "epoch": 0.03199015687480775, "grad_norm": 0.4580458954809431, "learning_rate": 9.999910650316213e-05, "loss": 0.7829, "step": 104 }, { "epoch": 0.032297754537065515, "grad_norm": 0.4580458954809431, "learning_rate": 9.999910650316213e-05, "loss": 1.2124, "step": 105 }, { "epoch": 0.032605352199323284, "grad_norm": 0.3500082913195441, "learning_rate": 9.99987838528342e-05, "loss": 0.8049, "step": 106 }, { "epoch": 0.03291294986158105, "grad_norm": 0.4942296620770744, "learning_rate": 9.99984115648567e-05, "loss": 0.725, "step": 107 }, { "epoch": 0.033220547523838816, "grad_norm": 0.5181059276772563, "learning_rate": 9.99979896395992e-05, "loss": 0.8642, "step": 108 }, { "epoch": 0.033528145186096585, "grad_norm": 0.7190369891469014, "learning_rate": 9.999751807748059e-05, "loss": 0.9692, "step": 109 }, { "epoch": 0.033835742848354354, "grad_norm": 0.7886361381528014, "learning_rate": 9.999699687896904e-05, "loss": 0.9188, "step": 110 }, { "epoch": 0.034143340510612116, "grad_norm": 0.4987615930350996, "learning_rate": 9.999642604458196e-05, "loss": 0.8131, "step": 111 }, { "epoch": 0.034450938172869885, "grad_norm": 0.756996689520604, "learning_rate": 9.999580557488608e-05, "loss": 1.0506, "step": 112 }, { "epoch": 0.034758535835127655, "grad_norm": 0.4776435960799518, "learning_rate": 9.999513547049738e-05, "loss": 0.893, "step": 113 }, { "epoch": 0.03506613349738542, "grad_norm": 0.5772255963473967, "learning_rate": 9.99944157320811e-05, "loss": 0.9415, "step": 114 }, { "epoch": 0.035373731159643186, "grad_norm": 0.49121070493149105, "learning_rate": 9.999364636035183e-05, "loss": 0.9467, "step": 115 }, { "epoch": 0.035681328821900955, "grad_norm": 0.5650139104589449, "learning_rate": 9.999282735607334e-05, "loss": 0.7092, "step": 116 }, { "epoch": 0.03598892648415872, "grad_norm": 0.7122002484770324, "learning_rate": 9.999195872005874e-05, "loss": 0.9047, "step": 117 }, { "epoch": 0.03629652414641649, "grad_norm": 0.47568636405807163, "learning_rate": 9.999104045317041e-05, "loss": 0.8844, "step": 118 }, { "epoch": 0.036604121808674256, "grad_norm": 0.8715722155309008, "learning_rate": 9.999007255631993e-05, "loss": 1.0623, "step": 119 }, { "epoch": 0.03691171947093202, "grad_norm": 1.2292174208307345, "learning_rate": 9.998905503046827e-05, "loss": 1.1876, "step": 120 }, { "epoch": 0.03721931713318979, "grad_norm": 0.5400974567938481, "learning_rate": 9.998798787662557e-05, "loss": 1.0426, "step": 121 }, { "epoch": 0.03752691479544756, "grad_norm": 0.5453831466841583, "learning_rate": 9.998687109585128e-05, "loss": 1.0107, "step": 122 }, { "epoch": 0.03783451245770532, "grad_norm": 0.562852242493144, "learning_rate": 9.99857046892541e-05, "loss": 0.799, "step": 123 }, { "epoch": 0.03814211011996309, "grad_norm": 0.5264503332566274, "learning_rate": 9.998448865799205e-05, "loss": 0.8097, "step": 124 }, { "epoch": 0.03844970778222086, "grad_norm": 0.4389645811558664, "learning_rate": 9.998322300327234e-05, "loss": 0.9325, "step": 125 }, { "epoch": 0.03875730544447862, "grad_norm": 1.1719568300527785, "learning_rate": 9.998190772635151e-05, "loss": 1.4528, "step": 126 }, { "epoch": 0.03906490310673639, "grad_norm": 0.5870994062176509, "learning_rate": 9.998054282853532e-05, "loss": 0.9356, "step": 127 }, { "epoch": 0.03937250076899416, "grad_norm": 0.692828736338617, "learning_rate": 9.997912831117882e-05, "loss": 0.9178, "step": 128 }, { "epoch": 0.03968009843125192, "grad_norm": 0.5960212504167832, "learning_rate": 9.99776641756863e-05, "loss": 0.8661, "step": 129 }, { "epoch": 0.03998769609350969, "grad_norm": 1.3034709320922777, "learning_rate": 9.997615042351134e-05, "loss": 1.1842, "step": 130 }, { "epoch": 0.04029529375576746, "grad_norm": 0.9675896915594332, "learning_rate": 9.997458705615673e-05, "loss": 1.1895, "step": 131 }, { "epoch": 0.04060289141802522, "grad_norm": 0.6618954686386262, "learning_rate": 9.997297407517456e-05, "loss": 0.9353, "step": 132 }, { "epoch": 0.04091048908028299, "grad_norm": 1.55603914162057, "learning_rate": 9.997131148216617e-05, "loss": 1.1056, "step": 133 }, { "epoch": 0.04121808674254076, "grad_norm": 0.49914698099404037, "learning_rate": 9.996959927878213e-05, "loss": 0.9971, "step": 134 }, { "epoch": 0.04152568440479852, "grad_norm": 0.5272463842118625, "learning_rate": 9.996783746672229e-05, "loss": 0.9234, "step": 135 }, { "epoch": 0.04183328206705629, "grad_norm": 0.6584317502261908, "learning_rate": 9.996602604773573e-05, "loss": 0.9433, "step": 136 }, { "epoch": 0.04214087972931406, "grad_norm": 0.8247519485744463, "learning_rate": 9.996416502362077e-05, "loss": 1.0263, "step": 137 }, { "epoch": 0.04244847739157182, "grad_norm": 0.5758027147806282, "learning_rate": 9.996225439622501e-05, "loss": 0.9582, "step": 138 }, { "epoch": 0.04275607505382959, "grad_norm": 0.757137857951376, "learning_rate": 9.996029416744527e-05, "loss": 1.0458, "step": 139 }, { "epoch": 0.04306367271608736, "grad_norm": 0.5376672130611919, "learning_rate": 9.995828433922763e-05, "loss": 0.7818, "step": 140 }, { "epoch": 0.04337127037834512, "grad_norm": 0.5964528697823507, "learning_rate": 9.995622491356739e-05, "loss": 0.9737, "step": 141 }, { "epoch": 0.04367886804060289, "grad_norm": 0.5514324268406536, "learning_rate": 9.99541158925091e-05, "loss": 0.9249, "step": 142 }, { "epoch": 0.04398646570286066, "grad_norm": 0.4366290183039951, "learning_rate": 9.995195727814657e-05, "loss": 0.8848, "step": 143 }, { "epoch": 0.04429406336511842, "grad_norm": 0.4415735794210885, "learning_rate": 9.994974907262279e-05, "loss": 1.0469, "step": 144 }, { "epoch": 0.04460166102737619, "grad_norm": 0.5244648426084043, "learning_rate": 9.994749127813003e-05, "loss": 1.0426, "step": 145 }, { "epoch": 0.04490925868963396, "grad_norm": 0.7995442966523959, "learning_rate": 9.99451838969098e-05, "loss": 0.8321, "step": 146 }, { "epoch": 0.045216856351891724, "grad_norm": 0.4211232821278039, "learning_rate": 9.994282693125279e-05, "loss": 0.6938, "step": 147 }, { "epoch": 0.04552445401414949, "grad_norm": 0.688169286556489, "learning_rate": 9.994042038349893e-05, "loss": 1.0685, "step": 148 }, { "epoch": 0.04583205167640726, "grad_norm": 0.5910167326818927, "learning_rate": 9.99379642560374e-05, "loss": 0.817, "step": 149 }, { "epoch": 0.046139649338665024, "grad_norm": 0.45266446299081664, "learning_rate": 9.993545855130661e-05, "loss": 0.8178, "step": 150 }, { "epoch": 0.046447247000922794, "grad_norm": 0.3998387863235034, "learning_rate": 9.993290327179413e-05, "loss": 0.8912, "step": 151 }, { "epoch": 0.04675484466318056, "grad_norm": 0.5719044881609879, "learning_rate": 9.993029842003681e-05, "loss": 0.8175, "step": 152 }, { "epoch": 0.047062442325438325, "grad_norm": 0.5891070180431673, "learning_rate": 9.992764399862067e-05, "loss": 0.9911, "step": 153 }, { "epoch": 0.047370039987696094, "grad_norm": 0.5522572111316366, "learning_rate": 9.992494001018096e-05, "loss": 0.8722, "step": 154 }, { "epoch": 0.04767763764995386, "grad_norm": 0.6657519602950648, "learning_rate": 9.992218645740214e-05, "loss": 1.1011, "step": 155 }, { "epoch": 0.047985235312211626, "grad_norm": 0.6918867698615929, "learning_rate": 9.991938334301788e-05, "loss": 0.7481, "step": 156 }, { "epoch": 0.048292832974469395, "grad_norm": 0.5799552173794102, "learning_rate": 9.991653066981104e-05, "loss": 0.9027, "step": 157 }, { "epoch": 0.048600430636727164, "grad_norm": 0.6480955538447782, "learning_rate": 9.991362844061372e-05, "loss": 0.953, "step": 158 }, { "epoch": 0.048908028298984926, "grad_norm": 0.5031322993511298, "learning_rate": 9.991067665830713e-05, "loss": 1.0968, "step": 159 }, { "epoch": 0.049215625961242696, "grad_norm": 0.4811175673906853, "learning_rate": 9.990767532582177e-05, "loss": 0.6914, "step": 160 }, { "epoch": 0.049523223623500465, "grad_norm": 0.6148368055116926, "learning_rate": 9.990462444613728e-05, "loss": 0.9511, "step": 161 }, { "epoch": 0.04983082128575823, "grad_norm": 0.5685778763394168, "learning_rate": 9.990152402228252e-05, "loss": 0.8309, "step": 162 }, { "epoch": 0.050138418948015996, "grad_norm": 0.45435386036967446, "learning_rate": 9.98983740573355e-05, "loss": 0.889, "step": 163 }, { "epoch": 0.050446016610273765, "grad_norm": 0.5697485436722423, "learning_rate": 9.989517455442345e-05, "loss": 0.8656, "step": 164 }, { "epoch": 0.05075361427253153, "grad_norm": 0.4987515393738844, "learning_rate": 9.989192551672277e-05, "loss": 0.8734, "step": 165 }, { "epoch": 0.0510612119347893, "grad_norm": 0.6259048597359823, "learning_rate": 9.9888626947459e-05, "loss": 0.7758, "step": 166 }, { "epoch": 0.05136880959704706, "grad_norm": 0.5737373295404747, "learning_rate": 9.98852788499069e-05, "loss": 0.9664, "step": 167 }, { "epoch": 0.05167640725930483, "grad_norm": 0.49335161620665036, "learning_rate": 9.988188122739039e-05, "loss": 0.8682, "step": 168 }, { "epoch": 0.0519840049215626, "grad_norm": 0.4970616773008988, "learning_rate": 9.987843408328255e-05, "loss": 0.9441, "step": 169 }, { "epoch": 0.05229160258382036, "grad_norm": 0.44807912485448953, "learning_rate": 9.987493742100562e-05, "loss": 0.7695, "step": 170 }, { "epoch": 0.05259920024607813, "grad_norm": 0.5680523926814929, "learning_rate": 9.987139124403102e-05, "loss": 0.8846, "step": 171 }, { "epoch": 0.0529067979083359, "grad_norm": 0.9636800782024249, "learning_rate": 9.986779555587928e-05, "loss": 0.9654, "step": 172 }, { "epoch": 0.05321439557059366, "grad_norm": 0.47031513730583707, "learning_rate": 9.986415036012017e-05, "loss": 1.059, "step": 173 }, { "epoch": 0.05352199323285143, "grad_norm": 0.5848954335724252, "learning_rate": 9.986045566037251e-05, "loss": 0.826, "step": 174 }, { "epoch": 0.0538295908951092, "grad_norm": 0.6160876910992421, "learning_rate": 9.985671146030433e-05, "loss": 0.7901, "step": 175 }, { "epoch": 0.05413718855736696, "grad_norm": 0.45386004794225787, "learning_rate": 9.98529177636328e-05, "loss": 0.9684, "step": 176 }, { "epoch": 0.05444478621962473, "grad_norm": 0.49479861048558943, "learning_rate": 9.984907457412418e-05, "loss": 0.8543, "step": 177 }, { "epoch": 0.0547523838818825, "grad_norm": 0.4046306761168245, "learning_rate": 9.984518189559393e-05, "loss": 0.8966, "step": 178 }, { "epoch": 0.05505998154414026, "grad_norm": 0.5569953062929219, "learning_rate": 9.984123973190662e-05, "loss": 0.8757, "step": 179 }, { "epoch": 0.05536757920639803, "grad_norm": 0.6161296687267375, "learning_rate": 9.983724808697592e-05, "loss": 1.0977, "step": 180 }, { "epoch": 0.0556751768686558, "grad_norm": 0.43831576383414966, "learning_rate": 9.983320696476464e-05, "loss": 0.7064, "step": 181 }, { "epoch": 0.05598277453091356, "grad_norm": 0.41832659397314254, "learning_rate": 9.982911636928471e-05, "loss": 0.8037, "step": 182 }, { "epoch": 0.05629037219317133, "grad_norm": 0.4359273043618498, "learning_rate": 9.982497630459722e-05, "loss": 0.9649, "step": 183 }, { "epoch": 0.0565979698554291, "grad_norm": 0.5078476561312043, "learning_rate": 9.98207867748123e-05, "loss": 1.0191, "step": 184 }, { "epoch": 0.05690556751768686, "grad_norm": 0.6915474840031881, "learning_rate": 9.981654778408921e-05, "loss": 1.0612, "step": 185 }, { "epoch": 0.05721316517994463, "grad_norm": 0.3876097225616103, "learning_rate": 9.981225933663634e-05, "loss": 0.9622, "step": 186 }, { "epoch": 0.0575207628422024, "grad_norm": 1.1280642078518621, "learning_rate": 9.980792143671115e-05, "loss": 1.025, "step": 187 }, { "epoch": 0.05782836050446016, "grad_norm": 0.5666848431859418, "learning_rate": 9.980353408862025e-05, "loss": 1.0166, "step": 188 }, { "epoch": 0.05813595816671793, "grad_norm": 0.47970357802326047, "learning_rate": 9.979909729671924e-05, "loss": 1.058, "step": 189 }, { "epoch": 0.0584435558289757, "grad_norm": 0.4219202688612301, "learning_rate": 9.97946110654129e-05, "loss": 0.9357, "step": 190 }, { "epoch": 0.058751153491233464, "grad_norm": 0.728372050487061, "learning_rate": 9.979007539915504e-05, "loss": 0.9566, "step": 191 }, { "epoch": 0.05905875115349123, "grad_norm": 1.0144208394744743, "learning_rate": 9.978549030244858e-05, "loss": 1.0209, "step": 192 }, { "epoch": 0.059366348815749, "grad_norm": 0.6321171188828572, "learning_rate": 9.978085577984549e-05, "loss": 1.0539, "step": 193 }, { "epoch": 0.059673946478006765, "grad_norm": 0.4974836326640019, "learning_rate": 9.977617183594683e-05, "loss": 1.0407, "step": 194 }, { "epoch": 0.059981544140264534, "grad_norm": 0.49813622087963855, "learning_rate": 9.97714384754027e-05, "loss": 0.8541, "step": 195 }, { "epoch": 0.0602891418025223, "grad_norm": 0.46096923446185606, "learning_rate": 9.976665570291228e-05, "loss": 0.9491, "step": 196 }, { "epoch": 0.060596739464780065, "grad_norm": 0.5811553007522179, "learning_rate": 9.97618235232238e-05, "loss": 1.0795, "step": 197 }, { "epoch": 0.060904337127037834, "grad_norm": 0.5459256153365442, "learning_rate": 9.975694194113452e-05, "loss": 0.863, "step": 198 }, { "epoch": 0.061211934789295604, "grad_norm": 0.38003518344407006, "learning_rate": 9.975201096149078e-05, "loss": 0.9766, "step": 199 }, { "epoch": 0.061519532451553366, "grad_norm": 0.5154645022064231, "learning_rate": 9.974703058918794e-05, "loss": 0.8205, "step": 200 }, { "epoch": 0.061827130113811135, "grad_norm": 0.46757656823599214, "learning_rate": 9.97420008291704e-05, "loss": 0.8183, "step": 201 }, { "epoch": 0.062134727776068904, "grad_norm": 0.41303863904308463, "learning_rate": 9.973692168643159e-05, "loss": 0.8856, "step": 202 }, { "epoch": 0.06244232543832667, "grad_norm": 0.6819734575084967, "learning_rate": 9.973179316601396e-05, "loss": 0.7925, "step": 203 }, { "epoch": 0.06274992310058443, "grad_norm": 0.9989512419393756, "learning_rate": 9.972661527300899e-05, "loss": 1.0246, "step": 204 }, { "epoch": 0.0630575207628422, "grad_norm": 0.7349685551918532, "learning_rate": 9.972138801255719e-05, "loss": 0.8687, "step": 205 }, { "epoch": 0.06336511842509997, "grad_norm": 0.5153659658821848, "learning_rate": 9.971611138984803e-05, "loss": 0.8822, "step": 206 }, { "epoch": 0.06367271608735774, "grad_norm": 0.4664017814032727, "learning_rate": 9.971078541012007e-05, "loss": 0.8211, "step": 207 }, { "epoch": 0.0639803137496155, "grad_norm": 0.4888092696995329, "learning_rate": 9.970541007866076e-05, "loss": 0.9216, "step": 208 }, { "epoch": 0.06428791141187327, "grad_norm": 0.5629845853389912, "learning_rate": 9.969998540080667e-05, "loss": 0.8449, "step": 209 }, { "epoch": 0.06459550907413103, "grad_norm": 0.45450355129721853, "learning_rate": 9.969451138194326e-05, "loss": 1.0742, "step": 210 }, { "epoch": 0.0649031067363888, "grad_norm": 0.5200910742412174, "learning_rate": 9.968898802750501e-05, "loss": 0.8615, "step": 211 }, { "epoch": 0.06521070439864657, "grad_norm": 0.4845410863032798, "learning_rate": 9.96834153429754e-05, "loss": 0.9494, "step": 212 }, { "epoch": 0.06551830206090434, "grad_norm": 0.5604662140442985, "learning_rate": 9.967779333388683e-05, "loss": 0.9197, "step": 213 }, { "epoch": 0.0658258997231621, "grad_norm": 0.5322516273201185, "learning_rate": 9.967212200582073e-05, "loss": 1.1627, "step": 214 }, { "epoch": 0.06613349738541988, "grad_norm": 0.4444702567986962, "learning_rate": 9.966640136440747e-05, "loss": 0.9602, "step": 215 }, { "epoch": 0.06644109504767763, "grad_norm": 0.6909867540119452, "learning_rate": 9.966063141532635e-05, "loss": 0.9427, "step": 216 }, { "epoch": 0.0667486927099354, "grad_norm": 0.49536336304184464, "learning_rate": 9.965481216430566e-05, "loss": 0.9157, "step": 217 }, { "epoch": 0.06705629037219317, "grad_norm": 0.8786297563743781, "learning_rate": 9.96489436171226e-05, "loss": 0.8541, "step": 218 }, { "epoch": 0.06736388803445094, "grad_norm": 0.47173139959826094, "learning_rate": 9.964302577960334e-05, "loss": 0.8543, "step": 219 }, { "epoch": 0.06767148569670871, "grad_norm": 0.5166969669932514, "learning_rate": 9.963705865762298e-05, "loss": 0.9104, "step": 220 }, { "epoch": 0.06797908335896648, "grad_norm": 0.4273639203473428, "learning_rate": 9.963104225710554e-05, "loss": 0.9305, "step": 221 }, { "epoch": 0.06828668102122423, "grad_norm": 0.49126208049766074, "learning_rate": 9.962497658402397e-05, "loss": 0.8398, "step": 222 }, { "epoch": 0.068594278683482, "grad_norm": 0.4977418957450673, "learning_rate": 9.961886164440011e-05, "loss": 0.8819, "step": 223 }, { "epoch": 0.06890187634573977, "grad_norm": 0.639375646684987, "learning_rate": 9.961269744430476e-05, "loss": 0.736, "step": 224 }, { "epoch": 0.06920947400799754, "grad_norm": 0.47612144146988816, "learning_rate": 9.960648398985759e-05, "loss": 0.7985, "step": 225 }, { "epoch": 0.06951707167025531, "grad_norm": 0.5507955722677349, "learning_rate": 9.960022128722714e-05, "loss": 1.0643, "step": 226 }, { "epoch": 0.06982466933251308, "grad_norm": 0.7588019526933709, "learning_rate": 9.959390934263093e-05, "loss": 0.9028, "step": 227 }, { "epoch": 0.07013226699477083, "grad_norm": 0.5183374975575771, "learning_rate": 9.958754816233529e-05, "loss": 1.0002, "step": 228 }, { "epoch": 0.0704398646570286, "grad_norm": 0.46243542951115574, "learning_rate": 9.958113775265545e-05, "loss": 0.8799, "step": 229 }, { "epoch": 0.07074746231928637, "grad_norm": 0.5539595143299268, "learning_rate": 9.957467811995553e-05, "loss": 1.0118, "step": 230 }, { "epoch": 0.07105505998154414, "grad_norm": 1.0264460875372943, "learning_rate": 9.956816927064849e-05, "loss": 1.1442, "step": 231 }, { "epoch": 0.07136265764380191, "grad_norm": 0.40118877604141595, "learning_rate": 9.956161121119618e-05, "loss": 0.93, "step": 232 }, { "epoch": 0.07167025530605968, "grad_norm": 0.6257549225165372, "learning_rate": 9.955500394810929e-05, "loss": 0.9051, "step": 233 }, { "epoch": 0.07197785296831744, "grad_norm": 0.4841160532741648, "learning_rate": 9.954834748794735e-05, "loss": 0.7929, "step": 234 }, { "epoch": 0.0722854506305752, "grad_norm": 0.8131122579309631, "learning_rate": 9.954164183731875e-05, "loss": 1.1089, "step": 235 }, { "epoch": 0.07259304829283297, "grad_norm": 0.6678192804518168, "learning_rate": 9.953488700288073e-05, "loss": 0.9652, "step": 236 }, { "epoch": 0.07290064595509074, "grad_norm": 0.6044429345380323, "learning_rate": 9.952808299133929e-05, "loss": 1.013, "step": 237 }, { "epoch": 0.07320824361734851, "grad_norm": 0.5337885747556621, "learning_rate": 9.952122980944931e-05, "loss": 0.9026, "step": 238 }, { "epoch": 0.07351584127960628, "grad_norm": 0.49504239369326686, "learning_rate": 9.95143274640145e-05, "loss": 0.7834, "step": 239 }, { "epoch": 0.07382343894186404, "grad_norm": 0.5263537153545228, "learning_rate": 9.950737596188732e-05, "loss": 0.9806, "step": 240 }, { "epoch": 0.0741310366041218, "grad_norm": 0.742655166004385, "learning_rate": 9.950037530996908e-05, "loss": 1.0295, "step": 241 }, { "epoch": 0.07443863426637957, "grad_norm": 0.6369568673589012, "learning_rate": 9.949332551520986e-05, "loss": 0.8134, "step": 242 }, { "epoch": 0.07474623192863734, "grad_norm": 0.5623606702430118, "learning_rate": 9.948622658460853e-05, "loss": 0.8201, "step": 243 }, { "epoch": 0.07505382959089511, "grad_norm": 0.5804771241073865, "learning_rate": 9.947907852521277e-05, "loss": 1.0674, "step": 244 }, { "epoch": 0.07536142725315288, "grad_norm": 0.5235916104382192, "learning_rate": 9.947188134411897e-05, "loss": 0.8075, "step": 245 }, { "epoch": 0.07566902491541064, "grad_norm": 0.4346155467600669, "learning_rate": 9.946463504847234e-05, "loss": 1.0901, "step": 246 }, { "epoch": 0.0759766225776684, "grad_norm": 0.6209457719815343, "learning_rate": 9.945733964546687e-05, "loss": 0.9773, "step": 247 }, { "epoch": 0.07628422023992618, "grad_norm": 0.591292136826643, "learning_rate": 9.944999514234523e-05, "loss": 0.847, "step": 248 }, { "epoch": 0.07659181790218395, "grad_norm": 0.41833272419520506, "learning_rate": 9.94426015463989e-05, "loss": 0.8296, "step": 249 }, { "epoch": 0.07689941556444171, "grad_norm": 0.5124551763703769, "learning_rate": 9.943515886496805e-05, "loss": 0.8946, "step": 250 }, { "epoch": 0.07720701322669948, "grad_norm": 0.51824439172996, "learning_rate": 9.942766710544163e-05, "loss": 0.9317, "step": 251 }, { "epoch": 0.07751461088895724, "grad_norm": 0.7295966093666857, "learning_rate": 9.942012627525728e-05, "loss": 1.0153, "step": 252 }, { "epoch": 0.07782220855121501, "grad_norm": 0.7008034122471057, "learning_rate": 9.941253638190133e-05, "loss": 0.9176, "step": 253 }, { "epoch": 0.07812980621347278, "grad_norm": 0.4567118451948978, "learning_rate": 9.940489743290891e-05, "loss": 0.883, "step": 254 }, { "epoch": 0.07843740387573055, "grad_norm": 0.5750310138493478, "learning_rate": 9.939720943586375e-05, "loss": 1.013, "step": 255 }, { "epoch": 0.07874500153798832, "grad_norm": 0.49270480386462767, "learning_rate": 9.938947239839833e-05, "loss": 0.7831, "step": 256 }, { "epoch": 0.07905259920024608, "grad_norm": 0.7466035270222388, "learning_rate": 9.93816863281938e-05, "loss": 0.9033, "step": 257 }, { "epoch": 0.07936019686250384, "grad_norm": 0.5688353389331606, "learning_rate": 9.937385123298002e-05, "loss": 0.8393, "step": 258 }, { "epoch": 0.07966779452476161, "grad_norm": 0.3800963451491563, "learning_rate": 9.936596712053545e-05, "loss": 0.7982, "step": 259 }, { "epoch": 0.07997539218701938, "grad_norm": 0.6723969396464934, "learning_rate": 9.935803399868728e-05, "loss": 1.0578, "step": 260 }, { "epoch": 0.08028298984927715, "grad_norm": 0.6723969396464934, "learning_rate": 9.935803399868728e-05, "loss": 0.8872, "step": 261 }, { "epoch": 0.08059058751153492, "grad_norm": 0.5872958075118958, "learning_rate": 9.935005187531134e-05, "loss": 1.0649, "step": 262 }, { "epoch": 0.08089818517379269, "grad_norm": 0.7281730481904186, "learning_rate": 9.93420207583321e-05, "loss": 0.9605, "step": 263 }, { "epoch": 0.08120578283605044, "grad_norm": 0.7070863957537322, "learning_rate": 9.933394065572265e-05, "loss": 0.9699, "step": 264 }, { "epoch": 0.08151338049830821, "grad_norm": 0.7070863957537322, "learning_rate": 9.933394065572265e-05, "loss": 1.1192, "step": 265 }, { "epoch": 0.08182097816056598, "grad_norm": 0.4996154342330065, "learning_rate": 9.932581157550473e-05, "loss": 0.9468, "step": 266 }, { "epoch": 0.08212857582282375, "grad_norm": 0.5684862829211726, "learning_rate": 9.931763352574872e-05, "loss": 0.8647, "step": 267 }, { "epoch": 0.08243617348508152, "grad_norm": 0.804667654120429, "learning_rate": 9.930940651457361e-05, "loss": 0.9304, "step": 268 }, { "epoch": 0.08274377114733927, "grad_norm": 0.7324004470117025, "learning_rate": 9.930113055014696e-05, "loss": 0.9353, "step": 269 }, { "epoch": 0.08305136880959704, "grad_norm": 0.510319374307989, "learning_rate": 9.929280564068498e-05, "loss": 0.8696, "step": 270 }, { "epoch": 0.08335896647185481, "grad_norm": 0.5776599177972874, "learning_rate": 9.928443179445245e-05, "loss": 0.9933, "step": 271 }, { "epoch": 0.08366656413411258, "grad_norm": 0.6472824384999776, "learning_rate": 9.927600901976272e-05, "loss": 1.0196, "step": 272 }, { "epoch": 0.08397416179637035, "grad_norm": 0.49597159762038845, "learning_rate": 9.926753732497774e-05, "loss": 0.903, "step": 273 }, { "epoch": 0.08428175945862812, "grad_norm": 0.5688938155968704, "learning_rate": 9.925901671850799e-05, "loss": 0.9677, "step": 274 }, { "epoch": 0.08458935712088587, "grad_norm": 0.4728928529906866, "learning_rate": 9.925044720881257e-05, "loss": 0.9949, "step": 275 }, { "epoch": 0.08489695478314364, "grad_norm": 0.5247928224487225, "learning_rate": 9.924182880439907e-05, "loss": 1.003, "step": 276 }, { "epoch": 0.08520455244540141, "grad_norm": 0.518028909539051, "learning_rate": 9.923316151382364e-05, "loss": 0.8826, "step": 277 }, { "epoch": 0.08551215010765918, "grad_norm": 0.5917849623339221, "learning_rate": 9.9224445345691e-05, "loss": 0.8627, "step": 278 }, { "epoch": 0.08581974776991695, "grad_norm": 0.6004336659927695, "learning_rate": 9.921568030865433e-05, "loss": 0.8674, "step": 279 }, { "epoch": 0.08612734543217472, "grad_norm": 0.4469560992696198, "learning_rate": 9.920686641141536e-05, "loss": 0.946, "step": 280 }, { "epoch": 0.08643494309443248, "grad_norm": 0.39027648454648894, "learning_rate": 9.919800366272436e-05, "loss": 0.8222, "step": 281 }, { "epoch": 0.08674254075669025, "grad_norm": 0.47745056542601927, "learning_rate": 9.918909207138004e-05, "loss": 0.8968, "step": 282 }, { "epoch": 0.08705013841894801, "grad_norm": 0.6280268487293255, "learning_rate": 9.91801316462296e-05, "loss": 0.9807, "step": 283 }, { "epoch": 0.08735773608120578, "grad_norm": 0.9501825111888493, "learning_rate": 9.917112239616878e-05, "loss": 0.9991, "step": 284 }, { "epoch": 0.08766533374346355, "grad_norm": 0.7273503102000914, "learning_rate": 9.916206433014174e-05, "loss": 1.0748, "step": 285 }, { "epoch": 0.08797293140572132, "grad_norm": 0.5803737700431069, "learning_rate": 9.915295745714113e-05, "loss": 0.9856, "step": 286 }, { "epoch": 0.08828052906797908, "grad_norm": 0.8240658536954664, "learning_rate": 9.914380178620806e-05, "loss": 1.1836, "step": 287 }, { "epoch": 0.08858812673023685, "grad_norm": 0.6512562224764294, "learning_rate": 9.913459732643203e-05, "loss": 0.8134, "step": 288 }, { "epoch": 0.08889572439249462, "grad_norm": 0.6069021663221178, "learning_rate": 9.912534408695105e-05, "loss": 0.7816, "step": 289 }, { "epoch": 0.08920332205475238, "grad_norm": 0.6055481751894325, "learning_rate": 9.911604207695152e-05, "loss": 0.8752, "step": 290 }, { "epoch": 0.08951091971701015, "grad_norm": 0.43719455569074056, "learning_rate": 9.910669130566825e-05, "loss": 0.8692, "step": 291 }, { "epoch": 0.08981851737926792, "grad_norm": 0.4875365280076059, "learning_rate": 9.90972917823845e-05, "loss": 0.913, "step": 292 }, { "epoch": 0.09012611504152568, "grad_norm": 0.5388670069984749, "learning_rate": 9.908784351643186e-05, "loss": 0.7547, "step": 293 }, { "epoch": 0.09043371270378345, "grad_norm": 0.6412967546755158, "learning_rate": 9.907834651719038e-05, "loss": 0.9908, "step": 294 }, { "epoch": 0.09074131036604122, "grad_norm": 0.509683145254243, "learning_rate": 9.906880079408845e-05, "loss": 1.0091, "step": 295 }, { "epoch": 0.09104890802829899, "grad_norm": 0.6304739126396014, "learning_rate": 9.905920635660287e-05, "loss": 1.0577, "step": 296 }, { "epoch": 0.09135650569055676, "grad_norm": 0.6582505264889296, "learning_rate": 9.904956321425875e-05, "loss": 1.0744, "step": 297 }, { "epoch": 0.09166410335281452, "grad_norm": 0.4335242227388908, "learning_rate": 9.90398713766296e-05, "loss": 0.9951, "step": 298 }, { "epoch": 0.09197170101507228, "grad_norm": 0.6267749813727798, "learning_rate": 9.903013085333726e-05, "loss": 0.9317, "step": 299 }, { "epoch": 0.09227929867733005, "grad_norm": 0.3929346199752018, "learning_rate": 9.902034165405188e-05, "loss": 0.9875, "step": 300 }, { "epoch": 0.09258689633958782, "grad_norm": 0.6281552493839972, "learning_rate": 9.901050378849194e-05, "loss": 1.0594, "step": 301 }, { "epoch": 0.09289449400184559, "grad_norm": 0.5335532044627245, "learning_rate": 9.900061726642429e-05, "loss": 0.9645, "step": 302 }, { "epoch": 0.09320209166410336, "grad_norm": 0.4902684369441553, "learning_rate": 9.8990682097664e-05, "loss": 0.7985, "step": 303 }, { "epoch": 0.09350968932636113, "grad_norm": 0.47924691189186325, "learning_rate": 9.898069829207453e-05, "loss": 1.0214, "step": 304 }, { "epoch": 0.09381728698861888, "grad_norm": 0.594978148205919, "learning_rate": 9.897066585956751e-05, "loss": 0.8314, "step": 305 }, { "epoch": 0.09412488465087665, "grad_norm": 0.3805645863920452, "learning_rate": 9.896058481010297e-05, "loss": 0.9056, "step": 306 }, { "epoch": 0.09443248231313442, "grad_norm": 0.6236822540385285, "learning_rate": 9.89504551536891e-05, "loss": 1.0849, "step": 307 }, { "epoch": 0.09474007997539219, "grad_norm": 0.5266220517951483, "learning_rate": 9.894027690038244e-05, "loss": 0.9118, "step": 308 }, { "epoch": 0.09504767763764996, "grad_norm": 0.4440716558376362, "learning_rate": 9.893005006028766e-05, "loss": 1.0364, "step": 309 }, { "epoch": 0.09535527529990773, "grad_norm": 0.6753150055850206, "learning_rate": 9.891977464355779e-05, "loss": 0.9205, "step": 310 }, { "epoch": 0.09566287296216548, "grad_norm": 0.5690591115513413, "learning_rate": 9.890945066039401e-05, "loss": 0.9151, "step": 311 }, { "epoch": 0.09597047062442325, "grad_norm": 0.6532069164076945, "learning_rate": 9.889907812104573e-05, "loss": 0.8568, "step": 312 }, { "epoch": 0.09627806828668102, "grad_norm": 0.39979659459921985, "learning_rate": 9.888865703581058e-05, "loss": 0.7903, "step": 313 }, { "epoch": 0.09658566594893879, "grad_norm": 0.4739538201689892, "learning_rate": 9.887818741503436e-05, "loss": 0.818, "step": 314 }, { "epoch": 0.09689326361119656, "grad_norm": 0.42186047709662633, "learning_rate": 9.886766926911108e-05, "loss": 1.0092, "step": 315 }, { "epoch": 0.09720086127345433, "grad_norm": 0.8447370419612121, "learning_rate": 9.885710260848293e-05, "loss": 0.7761, "step": 316 }, { "epoch": 0.09750845893571208, "grad_norm": 0.42205337230310147, "learning_rate": 9.884648744364022e-05, "loss": 0.9257, "step": 317 }, { "epoch": 0.09781605659796985, "grad_norm": 0.5824443602834553, "learning_rate": 9.883582378512144e-05, "loss": 0.8977, "step": 318 }, { "epoch": 0.09812365426022762, "grad_norm": 0.5413068156636068, "learning_rate": 9.882511164351327e-05, "loss": 0.7838, "step": 319 }, { "epoch": 0.09843125192248539, "grad_norm": 0.5584395451307246, "learning_rate": 9.881435102945043e-05, "loss": 0.7357, "step": 320 }, { "epoch": 0.09873884958474316, "grad_norm": 0.6344115648098635, "learning_rate": 9.880354195361585e-05, "loss": 0.9589, "step": 321 }, { "epoch": 0.09904644724700093, "grad_norm": 0.5308566491149843, "learning_rate": 9.87926844267405e-05, "loss": 0.8917, "step": 322 }, { "epoch": 0.09935404490925868, "grad_norm": 0.6867831002659396, "learning_rate": 9.878177845960352e-05, "loss": 1.0095, "step": 323 }, { "epoch": 0.09966164257151645, "grad_norm": 0.5221669711147658, "learning_rate": 9.877082406303206e-05, "loss": 0.859, "step": 324 }, { "epoch": 0.09996924023377422, "grad_norm": 0.5806817962111172, "learning_rate": 9.875982124790144e-05, "loss": 0.9868, "step": 325 }, { "epoch": 0.10027683789603199, "grad_norm": 0.8240845877112476, "learning_rate": 9.874877002513499e-05, "loss": 1.0589, "step": 326 }, { "epoch": 0.10058443555828976, "grad_norm": 0.5760510619549615, "learning_rate": 9.87376704057041e-05, "loss": 0.85, "step": 327 }, { "epoch": 0.10089203322054753, "grad_norm": 0.40357434295781525, "learning_rate": 9.872652240062821e-05, "loss": 0.893, "step": 328 }, { "epoch": 0.10119963088280529, "grad_norm": 0.6318486752786399, "learning_rate": 9.871532602097483e-05, "loss": 0.8915, "step": 329 }, { "epoch": 0.10150722854506306, "grad_norm": 0.5373026247812862, "learning_rate": 9.870408127785947e-05, "loss": 0.9389, "step": 330 }, { "epoch": 0.10181482620732082, "grad_norm": 0.4519829732143563, "learning_rate": 9.86927881824456e-05, "loss": 0.836, "step": 331 }, { "epoch": 0.1021224238695786, "grad_norm": 0.5365823102345553, "learning_rate": 9.868144674594483e-05, "loss": 0.968, "step": 332 }, { "epoch": 0.10243002153183636, "grad_norm": 0.5215485099079162, "learning_rate": 9.867005697961662e-05, "loss": 0.8479, "step": 333 }, { "epoch": 0.10273761919409412, "grad_norm": 0.5353671013278603, "learning_rate": 9.865861889476849e-05, "loss": 0.8714, "step": 334 }, { "epoch": 0.10304521685635189, "grad_norm": 0.489519032063143, "learning_rate": 9.86471325027559e-05, "loss": 0.8626, "step": 335 }, { "epoch": 0.10335281451860966, "grad_norm": 0.4667554273193375, "learning_rate": 9.863559781498231e-05, "loss": 0.8567, "step": 336 }, { "epoch": 0.10366041218086743, "grad_norm": 0.4450407264568858, "learning_rate": 9.862401484289904e-05, "loss": 0.9304, "step": 337 }, { "epoch": 0.1039680098431252, "grad_norm": 0.4310433354042716, "learning_rate": 9.861238359800544e-05, "loss": 1.0306, "step": 338 }, { "epoch": 0.10427560750538296, "grad_norm": 0.4805310394617585, "learning_rate": 9.860070409184872e-05, "loss": 0.9194, "step": 339 }, { "epoch": 0.10458320516764072, "grad_norm": 0.5523440145360943, "learning_rate": 9.858897633602404e-05, "loss": 0.902, "step": 340 }, { "epoch": 0.10489080282989849, "grad_norm": 0.5019600049267624, "learning_rate": 9.857720034217445e-05, "loss": 0.9489, "step": 341 }, { "epoch": 0.10519840049215626, "grad_norm": 0.6758759562247958, "learning_rate": 9.856537612199091e-05, "loss": 1.0526, "step": 342 }, { "epoch": 0.10550599815441403, "grad_norm": 0.7079566363389402, "learning_rate": 9.855350368721218e-05, "loss": 0.8833, "step": 343 }, { "epoch": 0.1058135958166718, "grad_norm": 0.568676760534386, "learning_rate": 9.854158304962497e-05, "loss": 0.8938, "step": 344 }, { "epoch": 0.10612119347892957, "grad_norm": 0.7415369300487349, "learning_rate": 9.852961422106384e-05, "loss": 1.0843, "step": 345 }, { "epoch": 0.10642879114118732, "grad_norm": 0.5389945547575693, "learning_rate": 9.851759721341112e-05, "loss": 0.9051, "step": 346 }, { "epoch": 0.10673638880344509, "grad_norm": 0.40340297706024286, "learning_rate": 9.850553203859706e-05, "loss": 0.8696, "step": 347 }, { "epoch": 0.10704398646570286, "grad_norm": 0.44352931770513576, "learning_rate": 9.84934187085997e-05, "loss": 0.8372, "step": 348 }, { "epoch": 0.10735158412796063, "grad_norm": 0.47806229129079986, "learning_rate": 9.848125723544483e-05, "loss": 0.9451, "step": 349 }, { "epoch": 0.1076591817902184, "grad_norm": 0.37634014394953386, "learning_rate": 9.84690476312061e-05, "loss": 0.8097, "step": 350 }, { "epoch": 0.10796677945247617, "grad_norm": 0.6071641851349969, "learning_rate": 9.845678990800495e-05, "loss": 0.9624, "step": 351 }, { "epoch": 0.10827437711473392, "grad_norm": 0.6801294247228366, "learning_rate": 9.844448407801054e-05, "loss": 1.0195, "step": 352 }, { "epoch": 0.10858197477699169, "grad_norm": 0.7458252803210295, "learning_rate": 9.843213015343983e-05, "loss": 1.0079, "step": 353 }, { "epoch": 0.10888957243924946, "grad_norm": 0.6067568884408963, "learning_rate": 9.841972814655752e-05, "loss": 0.8607, "step": 354 }, { "epoch": 0.10919717010150723, "grad_norm": 0.5347434589691976, "learning_rate": 9.840727806967602e-05, "loss": 0.9773, "step": 355 }, { "epoch": 0.109504767763765, "grad_norm": 0.6028255819919122, "learning_rate": 9.839477993515549e-05, "loss": 0.8299, "step": 356 }, { "epoch": 0.10981236542602277, "grad_norm": 0.5029297052538465, "learning_rate": 9.838223375540381e-05, "loss": 0.9169, "step": 357 }, { "epoch": 0.11011996308828052, "grad_norm": 0.4507626236492062, "learning_rate": 9.83696395428765e-05, "loss": 0.7632, "step": 358 }, { "epoch": 0.11042756075053829, "grad_norm": 0.667146647171421, "learning_rate": 9.835699731007685e-05, "loss": 1.0559, "step": 359 }, { "epoch": 0.11073515841279606, "grad_norm": 0.656517761880081, "learning_rate": 9.834430706955576e-05, "loss": 0.9724, "step": 360 }, { "epoch": 0.11104275607505383, "grad_norm": 0.5382197448328123, "learning_rate": 9.83315688339118e-05, "loss": 0.6874, "step": 361 }, { "epoch": 0.1113503537373116, "grad_norm": 0.49466430198900374, "learning_rate": 9.831878261579121e-05, "loss": 1.0385, "step": 362 }, { "epoch": 0.11165795139956937, "grad_norm": 0.5419851893105792, "learning_rate": 9.830594842788786e-05, "loss": 0.9529, "step": 363 }, { "epoch": 0.11196554906182712, "grad_norm": 0.7938281403762939, "learning_rate": 9.829306628294323e-05, "loss": 1.1033, "step": 364 }, { "epoch": 0.1122731467240849, "grad_norm": 0.47709166469868197, "learning_rate": 9.828013619374643e-05, "loss": 0.8147, "step": 365 }, { "epoch": 0.11258074438634266, "grad_norm": 0.45279209692978867, "learning_rate": 9.826715817313415e-05, "loss": 0.9392, "step": 366 }, { "epoch": 0.11288834204860043, "grad_norm": 0.3812277306074597, "learning_rate": 9.825413223399066e-05, "loss": 0.8137, "step": 367 }, { "epoch": 0.1131959397108582, "grad_norm": 0.43905264458516113, "learning_rate": 9.824105838924783e-05, "loss": 0.8004, "step": 368 }, { "epoch": 0.11350353737311597, "grad_norm": 0.559643781744488, "learning_rate": 9.822793665188507e-05, "loss": 0.9613, "step": 369 }, { "epoch": 0.11381113503537373, "grad_norm": 0.5287658991178541, "learning_rate": 9.821476703492935e-05, "loss": 0.9759, "step": 370 }, { "epoch": 0.1141187326976315, "grad_norm": 0.4850125830895025, "learning_rate": 9.820154955145515e-05, "loss": 0.8399, "step": 371 }, { "epoch": 0.11442633035988926, "grad_norm": 0.4797500844502075, "learning_rate": 9.818828421458449e-05, "loss": 0.8542, "step": 372 }, { "epoch": 0.11473392802214703, "grad_norm": 0.5062298088404634, "learning_rate": 9.81749710374869e-05, "loss": 0.9114, "step": 373 }, { "epoch": 0.1150415256844048, "grad_norm": 0.5806356314306944, "learning_rate": 9.816161003337939e-05, "loss": 1.0479, "step": 374 }, { "epoch": 0.11534912334666257, "grad_norm": 0.7459282081056069, "learning_rate": 9.814820121552646e-05, "loss": 0.6972, "step": 375 }, { "epoch": 0.11565672100892033, "grad_norm": 0.4242417868009228, "learning_rate": 9.813474459724009e-05, "loss": 0.8385, "step": 376 }, { "epoch": 0.1159643186711781, "grad_norm": 0.7734749386468672, "learning_rate": 9.812124019187967e-05, "loss": 1.0849, "step": 377 }, { "epoch": 0.11627191633343587, "grad_norm": 0.45774986376528537, "learning_rate": 9.810768801285211e-05, "loss": 0.974, "step": 378 }, { "epoch": 0.11657951399569363, "grad_norm": 0.45774986376528537, "learning_rate": 9.810768801285211e-05, "loss": 0.9444, "step": 379 }, { "epoch": 0.1168871116579514, "grad_norm": 0.4995929230928023, "learning_rate": 9.809408807361166e-05, "loss": 1.0169, "step": 380 }, { "epoch": 0.11719470932020917, "grad_norm": 0.46666500001969136, "learning_rate": 9.808044038766006e-05, "loss": 1.0748, "step": 381 }, { "epoch": 0.11750230698246693, "grad_norm": 0.44953154189100103, "learning_rate": 9.806674496854642e-05, "loss": 1.0218, "step": 382 }, { "epoch": 0.1178099046447247, "grad_norm": 0.5873426235917734, "learning_rate": 9.805300182986722e-05, "loss": 0.7757, "step": 383 }, { "epoch": 0.11811750230698247, "grad_norm": 1.4740944958873174, "learning_rate": 9.803921098526634e-05, "loss": 1.032, "step": 384 }, { "epoch": 0.11842509996924024, "grad_norm": 0.5411778535669716, "learning_rate": 9.802537244843503e-05, "loss": 1.0261, "step": 385 }, { "epoch": 0.118732697631498, "grad_norm": 0.509641487107193, "learning_rate": 9.801148623311185e-05, "loss": 0.7777, "step": 386 }, { "epoch": 0.11904029529375577, "grad_norm": 0.6624832339532679, "learning_rate": 9.799755235308275e-05, "loss": 1.1455, "step": 387 }, { "epoch": 0.11934789295601353, "grad_norm": 0.5533724720149226, "learning_rate": 9.798357082218092e-05, "loss": 0.9059, "step": 388 }, { "epoch": 0.1196554906182713, "grad_norm": 0.4437095276638631, "learning_rate": 9.796954165428693e-05, "loss": 0.7747, "step": 389 }, { "epoch": 0.11996308828052907, "grad_norm": 0.7329221954786337, "learning_rate": 9.795546486332864e-05, "loss": 1.0505, "step": 390 }, { "epoch": 0.12027068594278684, "grad_norm": 0.5496193513289316, "learning_rate": 9.794134046328113e-05, "loss": 1.0588, "step": 391 }, { "epoch": 0.1205782836050446, "grad_norm": 0.49164435692170394, "learning_rate": 9.79271684681668e-05, "loss": 0.9799, "step": 392 }, { "epoch": 0.12088588126730238, "grad_norm": 0.7409815230341003, "learning_rate": 9.791294889205527e-05, "loss": 0.9037, "step": 393 }, { "epoch": 0.12119347892956013, "grad_norm": 1.1271539583121861, "learning_rate": 9.789868174906343e-05, "loss": 1.0414, "step": 394 }, { "epoch": 0.1215010765918179, "grad_norm": 0.520098042329394, "learning_rate": 9.788436705335535e-05, "loss": 0.915, "step": 395 }, { "epoch": 0.12180867425407567, "grad_norm": 0.4789467369837577, "learning_rate": 9.787000481914234e-05, "loss": 0.8494, "step": 396 }, { "epoch": 0.12211627191633344, "grad_norm": 0.5886454575814745, "learning_rate": 9.785559506068293e-05, "loss": 1.063, "step": 397 }, { "epoch": 0.12242386957859121, "grad_norm": 0.4388480166857076, "learning_rate": 9.784113779228274e-05, "loss": 0.8852, "step": 398 }, { "epoch": 0.12273146724084896, "grad_norm": 0.5971523761270808, "learning_rate": 9.782663302829467e-05, "loss": 0.9145, "step": 399 }, { "epoch": 0.12303906490310673, "grad_norm": 0.5240819358748007, "learning_rate": 9.781208078311868e-05, "loss": 0.931, "step": 400 }, { "epoch": 0.1233466625653645, "grad_norm": 0.6281653717004211, "learning_rate": 9.779748107120193e-05, "loss": 0.9535, "step": 401 }, { "epoch": 0.12365426022762227, "grad_norm": 0.48195197618795416, "learning_rate": 9.778283390703866e-05, "loss": 0.7576, "step": 402 }, { "epoch": 0.12396185788988004, "grad_norm": 0.5436115637086535, "learning_rate": 9.776813930517025e-05, "loss": 0.9367, "step": 403 }, { "epoch": 0.12426945555213781, "grad_norm": 0.6512888121457896, "learning_rate": 9.775339728018519e-05, "loss": 1.0265, "step": 404 }, { "epoch": 0.12457705321439556, "grad_norm": 0.5428547656088007, "learning_rate": 9.773860784671898e-05, "loss": 0.9896, "step": 405 }, { "epoch": 0.12488465087665333, "grad_norm": 0.5168185834618915, "learning_rate": 9.772377101945425e-05, "loss": 0.9366, "step": 406 }, { "epoch": 0.12519224853891112, "grad_norm": 0.42683091708173293, "learning_rate": 9.770888681312068e-05, "loss": 0.8703, "step": 407 }, { "epoch": 0.12549984620116886, "grad_norm": 0.5660354656101034, "learning_rate": 9.769395524249496e-05, "loss": 1.0103, "step": 408 }, { "epoch": 0.12580744386342663, "grad_norm": 0.7544428422353587, "learning_rate": 9.767897632240078e-05, "loss": 1.152, "step": 409 }, { "epoch": 0.1261150415256844, "grad_norm": 0.5191803143571235, "learning_rate": 9.766395006770892e-05, "loss": 0.8397, "step": 410 }, { "epoch": 0.12642263918794217, "grad_norm": 0.6761468536457538, "learning_rate": 9.764887649333707e-05, "loss": 1.0614, "step": 411 }, { "epoch": 0.12673023685019993, "grad_norm": 0.506029042110719, "learning_rate": 9.763375561424993e-05, "loss": 0.8594, "step": 412 }, { "epoch": 0.1270378345124577, "grad_norm": 0.5133094887774973, "learning_rate": 9.761858744545919e-05, "loss": 0.9003, "step": 413 }, { "epoch": 0.12734543217471547, "grad_norm": 0.5318307078405642, "learning_rate": 9.760337200202343e-05, "loss": 1.0012, "step": 414 }, { "epoch": 0.12765302983697324, "grad_norm": 0.6835203302615059, "learning_rate": 9.758810929904821e-05, "loss": 0.8276, "step": 415 }, { "epoch": 0.127960627499231, "grad_norm": 0.710473955612664, "learning_rate": 9.757279935168599e-05, "loss": 0.9335, "step": 416 }, { "epoch": 0.12826822516148878, "grad_norm": 0.5735604387130525, "learning_rate": 9.755744217513613e-05, "loss": 0.8047, "step": 417 }, { "epoch": 0.12857582282374655, "grad_norm": 0.6736643356268602, "learning_rate": 9.75420377846449e-05, "loss": 0.9761, "step": 418 }, { "epoch": 0.12888342048600432, "grad_norm": 0.5515380263279844, "learning_rate": 9.752658619550543e-05, "loss": 0.9193, "step": 419 }, { "epoch": 0.12919101814826206, "grad_norm": 0.4389191772482329, "learning_rate": 9.751108742305765e-05, "loss": 0.7708, "step": 420 }, { "epoch": 0.12949861581051983, "grad_norm": 0.5275961649500277, "learning_rate": 9.749554148268847e-05, "loss": 1.0343, "step": 421 }, { "epoch": 0.1298062134727776, "grad_norm": 0.6490424882367767, "learning_rate": 9.747994838983148e-05, "loss": 0.9402, "step": 422 }, { "epoch": 0.13011381113503537, "grad_norm": 1.716292569617628, "learning_rate": 9.746430815996716e-05, "loss": 1.1193, "step": 423 }, { "epoch": 0.13042140879729314, "grad_norm": 0.5563801417310668, "learning_rate": 9.74486208086228e-05, "loss": 0.8677, "step": 424 }, { "epoch": 0.1307290064595509, "grad_norm": 0.9120609135903618, "learning_rate": 9.743288635137242e-05, "loss": 1.1114, "step": 425 }, { "epoch": 0.13103660412180868, "grad_norm": 0.5153543028923092, "learning_rate": 9.741710480383684e-05, "loss": 0.9328, "step": 426 }, { "epoch": 0.13134420178406644, "grad_norm": 0.9271297733415661, "learning_rate": 9.74012761816836e-05, "loss": 0.9233, "step": 427 }, { "epoch": 0.1316517994463242, "grad_norm": 0.7810235651874855, "learning_rate": 9.738540050062701e-05, "loss": 1.0117, "step": 428 }, { "epoch": 0.13195939710858198, "grad_norm": 0.4275007809696667, "learning_rate": 9.736947777642809e-05, "loss": 0.8151, "step": 429 }, { "epoch": 0.13226699477083975, "grad_norm": 0.6701034791119228, "learning_rate": 9.735350802489453e-05, "loss": 1.0329, "step": 430 }, { "epoch": 0.13257459243309752, "grad_norm": 0.5071626069665283, "learning_rate": 9.733749126188076e-05, "loss": 0.9383, "step": 431 }, { "epoch": 0.13288219009535526, "grad_norm": 0.8433467365312484, "learning_rate": 9.732142750328785e-05, "loss": 0.9758, "step": 432 }, { "epoch": 0.13318978775761303, "grad_norm": 1.004600376816891, "learning_rate": 9.730531676506354e-05, "loss": 1.1324, "step": 433 }, { "epoch": 0.1334973854198708, "grad_norm": 0.5122348991998965, "learning_rate": 9.728915906320218e-05, "loss": 0.9612, "step": 434 }, { "epoch": 0.13380498308212857, "grad_norm": 0.6008774024576562, "learning_rate": 9.727295441374479e-05, "loss": 0.7707, "step": 435 }, { "epoch": 0.13411258074438634, "grad_norm": 0.5290478633266458, "learning_rate": 9.725670283277896e-05, "loss": 0.924, "step": 436 }, { "epoch": 0.1344201784066441, "grad_norm": 0.48939135372702847, "learning_rate": 9.724040433643888e-05, "loss": 0.8462, "step": 437 }, { "epoch": 0.13472777606890188, "grad_norm": 0.5600012671255058, "learning_rate": 9.722405894090535e-05, "loss": 0.9321, "step": 438 }, { "epoch": 0.13503537373115965, "grad_norm": 0.6086452049892668, "learning_rate": 9.72076666624057e-05, "loss": 0.7759, "step": 439 }, { "epoch": 0.13534297139341742, "grad_norm": 0.5638735658447757, "learning_rate": 9.719122751721381e-05, "loss": 0.9462, "step": 440 }, { "epoch": 0.13565056905567519, "grad_norm": 0.5628570233116197, "learning_rate": 9.717474152165006e-05, "loss": 0.7635, "step": 441 }, { "epoch": 0.13595816671793295, "grad_norm": 0.5617987201330505, "learning_rate": 9.715820869208139e-05, "loss": 0.954, "step": 442 }, { "epoch": 0.13626576438019072, "grad_norm": 0.5249585867387402, "learning_rate": 9.714162904492122e-05, "loss": 0.9119, "step": 443 }, { "epoch": 0.13657336204244847, "grad_norm": 0.7419120219940741, "learning_rate": 9.712500259662944e-05, "loss": 0.8787, "step": 444 }, { "epoch": 0.13688095970470623, "grad_norm": 1.0515188331551681, "learning_rate": 9.710832936371242e-05, "loss": 1.1028, "step": 445 }, { "epoch": 0.137188557366964, "grad_norm": 1.0294489904905275, "learning_rate": 9.709160936272293e-05, "loss": 0.8704, "step": 446 }, { "epoch": 0.13749615502922177, "grad_norm": 0.6991961060198285, "learning_rate": 9.707484261026022e-05, "loss": 0.9033, "step": 447 }, { "epoch": 0.13780375269147954, "grad_norm": 0.8667412969494815, "learning_rate": 9.705802912296998e-05, "loss": 1.1004, "step": 448 }, { "epoch": 0.1381113503537373, "grad_norm": 1.0884377176441455, "learning_rate": 9.70411689175442e-05, "loss": 0.934, "step": 449 }, { "epoch": 0.13841894801599508, "grad_norm": 0.7593830820408658, "learning_rate": 9.702426201072132e-05, "loss": 1.2369, "step": 450 }, { "epoch": 0.13872654567825285, "grad_norm": 0.5347334532210213, "learning_rate": 9.700730841928616e-05, "loss": 1.0546, "step": 451 }, { "epoch": 0.13903414334051062, "grad_norm": 0.49401112415926285, "learning_rate": 9.699030816006983e-05, "loss": 0.8188, "step": 452 }, { "epoch": 0.1393417410027684, "grad_norm": 0.5894677698286278, "learning_rate": 9.697326124994978e-05, "loss": 0.8122, "step": 453 }, { "epoch": 0.13964933866502616, "grad_norm": 0.5160193390766538, "learning_rate": 9.695616770584983e-05, "loss": 0.808, "step": 454 }, { "epoch": 0.13995693632728393, "grad_norm": 0.7775830880971135, "learning_rate": 9.693902754474006e-05, "loss": 1.0012, "step": 455 }, { "epoch": 0.14026453398954167, "grad_norm": 0.4628239084549329, "learning_rate": 9.69218407836368e-05, "loss": 0.8434, "step": 456 }, { "epoch": 0.14057213165179944, "grad_norm": 0.7712236560397122, "learning_rate": 9.69046074396027e-05, "loss": 0.9957, "step": 457 }, { "epoch": 0.1408797293140572, "grad_norm": 0.49700082100266507, "learning_rate": 9.688732752974662e-05, "loss": 0.969, "step": 458 }, { "epoch": 0.14118732697631498, "grad_norm": 0.6319294872795008, "learning_rate": 9.687000107122366e-05, "loss": 0.8608, "step": 459 }, { "epoch": 0.14149492463857274, "grad_norm": 0.3927003068824733, "learning_rate": 9.685262808123513e-05, "loss": 0.828, "step": 460 }, { "epoch": 0.1418025223008305, "grad_norm": 0.5045756483650362, "learning_rate": 9.683520857702855e-05, "loss": 0.9416, "step": 461 }, { "epoch": 0.14211011996308828, "grad_norm": 0.6693355667581474, "learning_rate": 9.681774257589757e-05, "loss": 0.7609, "step": 462 }, { "epoch": 0.14241771762534605, "grad_norm": 0.5268010297867736, "learning_rate": 9.680023009518209e-05, "loss": 1.0936, "step": 463 }, { "epoch": 0.14272531528760382, "grad_norm": 0.49151550131941446, "learning_rate": 9.678267115226805e-05, "loss": 0.9746, "step": 464 }, { "epoch": 0.1430329129498616, "grad_norm": 0.5054203727573393, "learning_rate": 9.67650657645876e-05, "loss": 1.0357, "step": 465 }, { "epoch": 0.14334051061211936, "grad_norm": 0.4432968686620812, "learning_rate": 9.674741394961895e-05, "loss": 0.8798, "step": 466 }, { "epoch": 0.1436481082743771, "grad_norm": 0.8270631152793273, "learning_rate": 9.672971572488642e-05, "loss": 0.9439, "step": 467 }, { "epoch": 0.14395570593663487, "grad_norm": 0.5688985904493176, "learning_rate": 9.671197110796042e-05, "loss": 0.9134, "step": 468 }, { "epoch": 0.14426330359889264, "grad_norm": 0.5040631726884094, "learning_rate": 9.669418011645739e-05, "loss": 1.0246, "step": 469 }, { "epoch": 0.1445709012611504, "grad_norm": 0.610917709734192, "learning_rate": 9.66763427680398e-05, "loss": 0.9097, "step": 470 }, { "epoch": 0.14487849892340818, "grad_norm": 0.6423508385189078, "learning_rate": 9.665845908041619e-05, "loss": 1.0337, "step": 471 }, { "epoch": 0.14518609658566595, "grad_norm": 0.6133150725736878, "learning_rate": 9.664052907134104e-05, "loss": 0.9126, "step": 472 }, { "epoch": 0.14549369424792372, "grad_norm": 0.7403694551500478, "learning_rate": 9.662255275861492e-05, "loss": 0.7895, "step": 473 }, { "epoch": 0.14580129191018149, "grad_norm": 0.5038208760223967, "learning_rate": 9.660453016008422e-05, "loss": 0.9712, "step": 474 }, { "epoch": 0.14610888957243925, "grad_norm": 0.8184091072987113, "learning_rate": 9.658646129364143e-05, "loss": 0.8265, "step": 475 }, { "epoch": 0.14641648723469702, "grad_norm": 1.1049565062781628, "learning_rate": 9.656834617722487e-05, "loss": 1.1523, "step": 476 }, { "epoch": 0.1467240848969548, "grad_norm": 0.6894865686394416, "learning_rate": 9.655018482881882e-05, "loss": 1.0285, "step": 477 }, { "epoch": 0.14703168255921256, "grad_norm": 0.5184208810572554, "learning_rate": 9.653197726645347e-05, "loss": 0.9498, "step": 478 }, { "epoch": 0.1473392802214703, "grad_norm": 0.5882971681009814, "learning_rate": 9.651372350820485e-05, "loss": 0.924, "step": 479 }, { "epoch": 0.14764687788372807, "grad_norm": 0.6103557558026834, "learning_rate": 9.649542357219487e-05, "loss": 0.9117, "step": 480 }, { "epoch": 0.14795447554598584, "grad_norm": 0.5955215815198164, "learning_rate": 9.647707747659131e-05, "loss": 0.8888, "step": 481 }, { "epoch": 0.1482620732082436, "grad_norm": 0.5784025127487576, "learning_rate": 9.645868523960776e-05, "loss": 0.9712, "step": 482 }, { "epoch": 0.14856967087050138, "grad_norm": 0.7243776695949798, "learning_rate": 9.644024687950358e-05, "loss": 1.0606, "step": 483 }, { "epoch": 0.14887726853275915, "grad_norm": 0.9076410205381858, "learning_rate": 9.642176241458397e-05, "loss": 0.8686, "step": 484 }, { "epoch": 0.14918486619501692, "grad_norm": 0.5117108093698172, "learning_rate": 9.640323186319988e-05, "loss": 1.0238, "step": 485 }, { "epoch": 0.1494924638572747, "grad_norm": 0.6838580805609422, "learning_rate": 9.638465524374802e-05, "loss": 0.7866, "step": 486 }, { "epoch": 0.14980006151953246, "grad_norm": 0.5954337997411121, "learning_rate": 9.636603257467084e-05, "loss": 0.9998, "step": 487 }, { "epoch": 0.15010765918179023, "grad_norm": 0.4021771645312165, "learning_rate": 9.63473638744565e-05, "loss": 0.8089, "step": 488 }, { "epoch": 0.150415256844048, "grad_norm": 0.4913022255654996, "learning_rate": 9.632864916163885e-05, "loss": 0.8553, "step": 489 }, { "epoch": 0.15072285450630576, "grad_norm": 0.8806441833067338, "learning_rate": 9.630988845479743e-05, "loss": 0.9844, "step": 490 }, { "epoch": 0.1510304521685635, "grad_norm": 0.4582538851224948, "learning_rate": 9.629108177255744e-05, "loss": 0.8778, "step": 491 }, { "epoch": 0.15133804983082128, "grad_norm": 1.2136180112630108, "learning_rate": 9.627222913358974e-05, "loss": 1.2742, "step": 492 }, { "epoch": 0.15164564749307904, "grad_norm": 0.6011678000199039, "learning_rate": 9.625333055661077e-05, "loss": 1.0693, "step": 493 }, { "epoch": 0.1519532451553368, "grad_norm": 0.6416574206224576, "learning_rate": 9.623438606038262e-05, "loss": 0.7926, "step": 494 }, { "epoch": 0.15226084281759458, "grad_norm": 0.5580427339257634, "learning_rate": 9.621539566371296e-05, "loss": 1.0587, "step": 495 }, { "epoch": 0.15256844047985235, "grad_norm": 0.45841453889788175, "learning_rate": 9.619635938545501e-05, "loss": 0.9129, "step": 496 }, { "epoch": 0.15287603814211012, "grad_norm": 0.5324827164122097, "learning_rate": 9.617727724450755e-05, "loss": 0.9264, "step": 497 }, { "epoch": 0.1531836358043679, "grad_norm": 0.5147073389863203, "learning_rate": 9.615814925981493e-05, "loss": 0.9182, "step": 498 }, { "epoch": 0.15349123346662566, "grad_norm": 0.7964813776580522, "learning_rate": 9.613897545036692e-05, "loss": 0.8369, "step": 499 }, { "epoch": 0.15379883112888343, "grad_norm": 0.5217958605517174, "learning_rate": 9.611975583519888e-05, "loss": 0.754, "step": 500 }, { "epoch": 0.1541064287911412, "grad_norm": 0.4573769376554386, "learning_rate": 9.610049043339159e-05, "loss": 0.8148, "step": 501 }, { "epoch": 0.15441402645339897, "grad_norm": 0.5872419983469469, "learning_rate": 9.60811792640713e-05, "loss": 0.9663, "step": 502 }, { "epoch": 0.1547216241156567, "grad_norm": 0.5862178004470919, "learning_rate": 9.60618223464097e-05, "loss": 0.9651, "step": 503 }, { "epoch": 0.15502922177791448, "grad_norm": 0.5447352237222575, "learning_rate": 9.604241969962389e-05, "loss": 1.0355, "step": 504 }, { "epoch": 0.15533681944017225, "grad_norm": 0.8079671704567697, "learning_rate": 9.602297134297637e-05, "loss": 1.0259, "step": 505 }, { "epoch": 0.15564441710243002, "grad_norm": 0.42303241307390704, "learning_rate": 9.600347729577506e-05, "loss": 0.8775, "step": 506 }, { "epoch": 0.15595201476468779, "grad_norm": 0.5896738432500559, "learning_rate": 9.598393757737315e-05, "loss": 1.0277, "step": 507 }, { "epoch": 0.15625961242694555, "grad_norm": 0.4773975081096993, "learning_rate": 9.596435220716926e-05, "loss": 0.8515, "step": 508 }, { "epoch": 0.15656721008920332, "grad_norm": 0.5528997069150645, "learning_rate": 9.59447212046073e-05, "loss": 0.95, "step": 509 }, { "epoch": 0.1568748077514611, "grad_norm": 0.6688650729342976, "learning_rate": 9.592504458917645e-05, "loss": 1.0002, "step": 510 }, { "epoch": 0.15718240541371886, "grad_norm": 0.5696123964451176, "learning_rate": 9.590532238041124e-05, "loss": 0.9987, "step": 511 }, { "epoch": 0.15749000307597663, "grad_norm": 0.6992836707904995, "learning_rate": 9.588555459789141e-05, "loss": 0.96, "step": 512 }, { "epoch": 0.1577976007382344, "grad_norm": 0.44703815953759424, "learning_rate": 9.586574126124197e-05, "loss": 0.9172, "step": 513 }, { "epoch": 0.15810519840049217, "grad_norm": 0.4983959305227416, "learning_rate": 9.584588239013313e-05, "loss": 0.8672, "step": 514 }, { "epoch": 0.1584127960627499, "grad_norm": 0.4915936306291697, "learning_rate": 9.582597800428034e-05, "loss": 0.7957, "step": 515 }, { "epoch": 0.15872039372500768, "grad_norm": 0.6186526345695246, "learning_rate": 9.580602812344423e-05, "loss": 0.8809, "step": 516 }, { "epoch": 0.15902799138726545, "grad_norm": 0.5913245917801176, "learning_rate": 9.578603276743055e-05, "loss": 0.9195, "step": 517 }, { "epoch": 0.15933558904952322, "grad_norm": 0.4920161620700873, "learning_rate": 9.576599195609027e-05, "loss": 0.9461, "step": 518 }, { "epoch": 0.159643186711781, "grad_norm": 0.4500870944328313, "learning_rate": 9.574590570931943e-05, "loss": 0.9387, "step": 519 }, { "epoch": 0.15995078437403876, "grad_norm": 0.5489471268012888, "learning_rate": 9.572577404705919e-05, "loss": 0.7938, "step": 520 }, { "epoch": 0.16025838203629653, "grad_norm": 0.534561199423146, "learning_rate": 9.570559698929583e-05, "loss": 0.9761, "step": 521 }, { "epoch": 0.1605659796985543, "grad_norm": 0.5790288931445658, "learning_rate": 9.568537455606064e-05, "loss": 0.8784, "step": 522 }, { "epoch": 0.16087357736081206, "grad_norm": 0.5582868210937241, "learning_rate": 9.566510676743001e-05, "loss": 0.8855, "step": 523 }, { "epoch": 0.16118117502306983, "grad_norm": 0.5786348762917921, "learning_rate": 9.564479364352532e-05, "loss": 1.0997, "step": 524 }, { "epoch": 0.1614887726853276, "grad_norm": 0.5495284886229521, "learning_rate": 9.5624435204513e-05, "loss": 0.8913, "step": 525 }, { "epoch": 0.16179637034758537, "grad_norm": 0.494674161419271, "learning_rate": 9.560403147060444e-05, "loss": 0.9677, "step": 526 }, { "epoch": 0.1621039680098431, "grad_norm": 0.591026998136495, "learning_rate": 9.558358246205596e-05, "loss": 0.9591, "step": 527 }, { "epoch": 0.16241156567210088, "grad_norm": 0.8199054985269383, "learning_rate": 9.556308819916892e-05, "loss": 1.1829, "step": 528 }, { "epoch": 0.16271916333435865, "grad_norm": 0.5184715461848468, "learning_rate": 9.554254870228954e-05, "loss": 0.8574, "step": 529 }, { "epoch": 0.16302676099661642, "grad_norm": 0.6956400333263423, "learning_rate": 9.552196399180895e-05, "loss": 0.8711, "step": 530 }, { "epoch": 0.1633343586588742, "grad_norm": 0.5183706739807571, "learning_rate": 9.550133408816317e-05, "loss": 0.9329, "step": 531 }, { "epoch": 0.16364195632113196, "grad_norm": 0.5083150814320941, "learning_rate": 9.548065901183313e-05, "loss": 0.953, "step": 532 }, { "epoch": 0.16394955398338973, "grad_norm": 0.7018707566362257, "learning_rate": 9.545993878334455e-05, "loss": 0.795, "step": 533 }, { "epoch": 0.1642571516456475, "grad_norm": 0.47750882136438955, "learning_rate": 9.543917342326801e-05, "loss": 0.9569, "step": 534 }, { "epoch": 0.16456474930790527, "grad_norm": 0.6348463519174269, "learning_rate": 9.541836295221888e-05, "loss": 0.801, "step": 535 }, { "epoch": 0.16487234697016304, "grad_norm": 0.6333301879987985, "learning_rate": 9.539750739085734e-05, "loss": 0.865, "step": 536 }, { "epoch": 0.1651799446324208, "grad_norm": 0.41364012380893417, "learning_rate": 9.537660675988827e-05, "loss": 0.825, "step": 537 }, { "epoch": 0.16548754229467855, "grad_norm": 0.41389410101241936, "learning_rate": 9.535566108006136e-05, "loss": 0.9378, "step": 538 }, { "epoch": 0.16579513995693632, "grad_norm": 0.6032203128432413, "learning_rate": 9.533467037217102e-05, "loss": 0.7863, "step": 539 }, { "epoch": 0.16610273761919409, "grad_norm": 0.4660668809989375, "learning_rate": 9.531363465705633e-05, "loss": 0.8372, "step": 540 }, { "epoch": 0.16641033528145185, "grad_norm": 0.4526479807435066, "learning_rate": 9.529255395560106e-05, "loss": 0.8162, "step": 541 }, { "epoch": 0.16671793294370962, "grad_norm": 0.5400141753302231, "learning_rate": 9.527142828873365e-05, "loss": 1.0322, "step": 542 }, { "epoch": 0.1670255306059674, "grad_norm": 0.5764503235577437, "learning_rate": 9.525025767742722e-05, "loss": 0.9511, "step": 543 }, { "epoch": 0.16733312826822516, "grad_norm": 0.5394001159167261, "learning_rate": 9.52290421426994e-05, "loss": 0.9967, "step": 544 }, { "epoch": 0.16764072593048293, "grad_norm": 0.8298929912172576, "learning_rate": 9.520778170561252e-05, "loss": 0.9478, "step": 545 }, { "epoch": 0.1679483235927407, "grad_norm": 0.5333912566040707, "learning_rate": 9.518647638727348e-05, "loss": 0.9867, "step": 546 }, { "epoch": 0.16825592125499847, "grad_norm": 0.580016001543317, "learning_rate": 9.516512620883368e-05, "loss": 0.9027, "step": 547 }, { "epoch": 0.16856351891725624, "grad_norm": 0.5367899562574459, "learning_rate": 9.514373119148912e-05, "loss": 0.8716, "step": 548 }, { "epoch": 0.168871116579514, "grad_norm": 0.40743447159308255, "learning_rate": 9.512229135648023e-05, "loss": 0.8389, "step": 549 }, { "epoch": 0.16917871424177175, "grad_norm": 0.49256137215085743, "learning_rate": 9.510080672509205e-05, "loss": 0.8603, "step": 550 }, { "epoch": 0.16948631190402952, "grad_norm": 0.48818729623059415, "learning_rate": 9.507927731865398e-05, "loss": 0.943, "step": 551 }, { "epoch": 0.1697939095662873, "grad_norm": 0.5925487422288278, "learning_rate": 9.505770315853997e-05, "loss": 0.995, "step": 552 }, { "epoch": 0.17010150722854506, "grad_norm": 0.7473337851486419, "learning_rate": 9.503608426616832e-05, "loss": 1.0271, "step": 553 }, { "epoch": 0.17040910489080283, "grad_norm": 0.6612850629595883, "learning_rate": 9.501442066300177e-05, "loss": 0.9373, "step": 554 }, { "epoch": 0.1707167025530606, "grad_norm": 0.5064381875957509, "learning_rate": 9.499271237054748e-05, "loss": 0.7951, "step": 555 }, { "epoch": 0.17102430021531836, "grad_norm": 2.8057237679254667, "learning_rate": 9.497095941035691e-05, "loss": 0.8225, "step": 556 }, { "epoch": 0.17133189787757613, "grad_norm": 0.44178920045148257, "learning_rate": 9.494916180402592e-05, "loss": 0.8891, "step": 557 }, { "epoch": 0.1716394955398339, "grad_norm": 0.44778885044090366, "learning_rate": 9.492731957319467e-05, "loss": 0.7978, "step": 558 }, { "epoch": 0.17194709320209167, "grad_norm": 0.46581064322982135, "learning_rate": 9.490543273954765e-05, "loss": 0.9173, "step": 559 }, { "epoch": 0.17225469086434944, "grad_norm": 0.4787945515756404, "learning_rate": 9.488350132481358e-05, "loss": 0.8994, "step": 560 }, { "epoch": 0.1725622885266072, "grad_norm": 0.5897786610421002, "learning_rate": 9.486152535076548e-05, "loss": 0.9288, "step": 561 }, { "epoch": 0.17286988618886495, "grad_norm": 0.4855002697959986, "learning_rate": 9.483950483922059e-05, "loss": 0.9696, "step": 562 }, { "epoch": 0.17317748385112272, "grad_norm": 0.4771800755001369, "learning_rate": 9.481743981204037e-05, "loss": 0.8645, "step": 563 }, { "epoch": 0.1734850815133805, "grad_norm": 0.407720617715291, "learning_rate": 9.479533029113048e-05, "loss": 0.9941, "step": 564 }, { "epoch": 0.17379267917563826, "grad_norm": 0.6354021173339313, "learning_rate": 9.477317629844074e-05, "loss": 1.1091, "step": 565 }, { "epoch": 0.17410027683789603, "grad_norm": 0.5082986107722856, "learning_rate": 9.475097785596513e-05, "loss": 0.9128, "step": 566 }, { "epoch": 0.1744078745001538, "grad_norm": 0.5741639357304903, "learning_rate": 9.472873498574175e-05, "loss": 0.7116, "step": 567 }, { "epoch": 0.17471547216241157, "grad_norm": 0.6072156553336965, "learning_rate": 9.470644770985283e-05, "loss": 0.9267, "step": 568 }, { "epoch": 0.17502306982466934, "grad_norm": 0.4636513256954245, "learning_rate": 9.468411605042466e-05, "loss": 0.849, "step": 569 }, { "epoch": 0.1753306674869271, "grad_norm": 3.257939142138657, "learning_rate": 9.46617400296276e-05, "loss": 0.9142, "step": 570 }, { "epoch": 0.17563826514918487, "grad_norm": 0.8758344151187017, "learning_rate": 9.463931966967604e-05, "loss": 1.1672, "step": 571 }, { "epoch": 0.17594586281144264, "grad_norm": 0.4997790323677103, "learning_rate": 9.461685499282843e-05, "loss": 0.9822, "step": 572 }, { "epoch": 0.1762534604737004, "grad_norm": 0.6185113470597146, "learning_rate": 9.459434602138714e-05, "loss": 0.8948, "step": 573 }, { "epoch": 0.17656105813595815, "grad_norm": 0.4749986561023048, "learning_rate": 9.457179277769862e-05, "loss": 0.9099, "step": 574 }, { "epoch": 0.17686865579821592, "grad_norm": 0.7210421353225284, "learning_rate": 9.454919528415318e-05, "loss": 0.8359, "step": 575 }, { "epoch": 0.1771762534604737, "grad_norm": 0.5825452060433567, "learning_rate": 9.45265535631851e-05, "loss": 1.0291, "step": 576 }, { "epoch": 0.17748385112273146, "grad_norm": 0.8609639665657612, "learning_rate": 9.450386763727255e-05, "loss": 1.0164, "step": 577 }, { "epoch": 0.17779144878498923, "grad_norm": 0.4216627222677017, "learning_rate": 9.448113752893764e-05, "loss": 0.8734, "step": 578 }, { "epoch": 0.178099046447247, "grad_norm": 0.4721383807480442, "learning_rate": 9.445836326074625e-05, "loss": 0.8969, "step": 579 }, { "epoch": 0.17840664410950477, "grad_norm": 0.5134106360307391, "learning_rate": 9.443554485530818e-05, "loss": 1.0386, "step": 580 }, { "epoch": 0.17871424177176254, "grad_norm": 1.7540174017470893, "learning_rate": 9.441268233527704e-05, "loss": 0.8745, "step": 581 }, { "epoch": 0.1790218394340203, "grad_norm": 0.5736321298262833, "learning_rate": 9.438977572335017e-05, "loss": 0.8182, "step": 582 }, { "epoch": 0.17932943709627808, "grad_norm": 0.6402309159417457, "learning_rate": 9.436682504226878e-05, "loss": 1.0591, "step": 583 }, { "epoch": 0.17963703475853585, "grad_norm": 0.7109828442386259, "learning_rate": 9.434383031481774e-05, "loss": 0.8931, "step": 584 }, { "epoch": 0.17994463242079362, "grad_norm": 0.5334971109498884, "learning_rate": 9.432079156382572e-05, "loss": 0.9749, "step": 585 }, { "epoch": 0.18025223008305136, "grad_norm": 0.44690838552344886, "learning_rate": 9.429770881216506e-05, "loss": 0.8252, "step": 586 }, { "epoch": 0.18055982774530913, "grad_norm": 0.3733892067670396, "learning_rate": 9.427458208275176e-05, "loss": 0.7695, "step": 587 }, { "epoch": 0.1808674254075669, "grad_norm": 0.4015960180094185, "learning_rate": 9.425141139854554e-05, "loss": 0.854, "step": 588 }, { "epoch": 0.18117502306982466, "grad_norm": 0.6303755476309907, "learning_rate": 9.422819678254972e-05, "loss": 0.8357, "step": 589 }, { "epoch": 0.18148262073208243, "grad_norm": 0.5001130743820338, "learning_rate": 9.420493825781124e-05, "loss": 1.058, "step": 590 }, { "epoch": 0.1817902183943402, "grad_norm": 0.9323525077523238, "learning_rate": 9.418163584742061e-05, "loss": 0.7619, "step": 591 }, { "epoch": 0.18209781605659797, "grad_norm": 0.5046964928372939, "learning_rate": 9.415828957451195e-05, "loss": 0.8836, "step": 592 }, { "epoch": 0.18240541371885574, "grad_norm": 0.398465873811954, "learning_rate": 9.413489946226292e-05, "loss": 0.6764, "step": 593 }, { "epoch": 0.1827130113811135, "grad_norm": 0.567973663892291, "learning_rate": 9.411146553389467e-05, "loss": 0.9882, "step": 594 }, { "epoch": 0.18302060904337128, "grad_norm": 0.5052888007463298, "learning_rate": 9.408798781267185e-05, "loss": 0.8733, "step": 595 }, { "epoch": 0.18332820670562905, "grad_norm": 0.4298790750427489, "learning_rate": 9.406446632190264e-05, "loss": 0.9535, "step": 596 }, { "epoch": 0.18363580436788682, "grad_norm": 0.5633667223127097, "learning_rate": 9.404090108493863e-05, "loss": 0.9646, "step": 597 }, { "epoch": 0.18394340203014456, "grad_norm": 1.142430248508107, "learning_rate": 9.401729212517483e-05, "loss": 1.0042, "step": 598 }, { "epoch": 0.18425099969240233, "grad_norm": 0.5521927936858935, "learning_rate": 9.399363946604968e-05, "loss": 0.898, "step": 599 }, { "epoch": 0.1845585973546601, "grad_norm": 0.4149562850999181, "learning_rate": 9.396994313104503e-05, "loss": 0.7863, "step": 600 }, { "epoch": 0.18486619501691787, "grad_norm": 0.4946605738691128, "learning_rate": 9.394620314368603e-05, "loss": 0.9459, "step": 601 }, { "epoch": 0.18517379267917564, "grad_norm": 0.5492266620067096, "learning_rate": 9.392241952754119e-05, "loss": 1.0558, "step": 602 }, { "epoch": 0.1854813903414334, "grad_norm": 0.43441260950977917, "learning_rate": 9.389859230622238e-05, "loss": 0.836, "step": 603 }, { "epoch": 0.18578898800369117, "grad_norm": 0.5547626752993869, "learning_rate": 9.387472150338468e-05, "loss": 0.9461, "step": 604 }, { "epoch": 0.18609658566594894, "grad_norm": 0.4774189416614037, "learning_rate": 9.385080714272648e-05, "loss": 0.9299, "step": 605 }, { "epoch": 0.1864041833282067, "grad_norm": 0.5094805984248562, "learning_rate": 9.382684924798943e-05, "loss": 0.9621, "step": 606 }, { "epoch": 0.18671178099046448, "grad_norm": 0.7379505720607175, "learning_rate": 9.380284784295839e-05, "loss": 1.0857, "step": 607 }, { "epoch": 0.18701937865272225, "grad_norm": 0.8295982134289316, "learning_rate": 9.377880295146137e-05, "loss": 0.8088, "step": 608 }, { "epoch": 0.18732697631498, "grad_norm": 0.7865787878234144, "learning_rate": 9.37547145973696e-05, "loss": 0.8692, "step": 609 }, { "epoch": 0.18763457397723776, "grad_norm": 0.7800906386780294, "learning_rate": 9.373058280459747e-05, "loss": 1.0068, "step": 610 }, { "epoch": 0.18794217163949553, "grad_norm": 0.5677353194878035, "learning_rate": 9.370640759710245e-05, "loss": 0.8718, "step": 611 }, { "epoch": 0.1882497693017533, "grad_norm": 0.47073557905393276, "learning_rate": 9.368218899888515e-05, "loss": 0.8998, "step": 612 }, { "epoch": 0.18855736696401107, "grad_norm": 0.6122298112881751, "learning_rate": 9.36579270339892e-05, "loss": 0.7083, "step": 613 }, { "epoch": 0.18886496462626884, "grad_norm": 0.6953603417561534, "learning_rate": 9.36336217265014e-05, "loss": 1.1229, "step": 614 }, { "epoch": 0.1891725622885266, "grad_norm": 0.8051829575587143, "learning_rate": 9.360927310055143e-05, "loss": 0.835, "step": 615 }, { "epoch": 0.18948015995078438, "grad_norm": 0.4818014390644875, "learning_rate": 9.358488118031208e-05, "loss": 0.9437, "step": 616 }, { "epoch": 0.18978775761304215, "grad_norm": 0.5579356921833144, "learning_rate": 9.35604459899991e-05, "loss": 0.7947, "step": 617 }, { "epoch": 0.19009535527529992, "grad_norm": 0.4958020776158694, "learning_rate": 9.353596755387117e-05, "loss": 0.9319, "step": 618 }, { "epoch": 0.19040295293755768, "grad_norm": 0.46364412652174514, "learning_rate": 9.351144589622993e-05, "loss": 0.7335, "step": 619 }, { "epoch": 0.19071055059981545, "grad_norm": 0.512513657975706, "learning_rate": 9.348688104141992e-05, "loss": 0.8234, "step": 620 }, { "epoch": 0.1910181482620732, "grad_norm": 0.7276199722178242, "learning_rate": 9.346227301382857e-05, "loss": 1.0592, "step": 621 }, { "epoch": 0.19132574592433096, "grad_norm": 0.40989072701894735, "learning_rate": 9.343762183788615e-05, "loss": 0.942, "step": 622 }, { "epoch": 0.19163334358658873, "grad_norm": 0.7984097767668485, "learning_rate": 9.341292753806581e-05, "loss": 1.1235, "step": 623 }, { "epoch": 0.1919409412488465, "grad_norm": 0.5277293842469067, "learning_rate": 9.338819013888347e-05, "loss": 1.039, "step": 624 }, { "epoch": 0.19224853891110427, "grad_norm": 0.6937149690100091, "learning_rate": 9.336340966489785e-05, "loss": 1.0586, "step": 625 }, { "epoch": 0.19255613657336204, "grad_norm": 0.6010528894990324, "learning_rate": 9.333858614071045e-05, "loss": 0.8281, "step": 626 }, { "epoch": 0.1928637342356198, "grad_norm": 0.46766270007058447, "learning_rate": 9.33137195909655e-05, "loss": 0.95, "step": 627 }, { "epoch": 0.19317133189787758, "grad_norm": 0.5917830860793384, "learning_rate": 9.328881004034993e-05, "loss": 1.0135, "step": 628 }, { "epoch": 0.19347892956013535, "grad_norm": 0.7905695128486918, "learning_rate": 9.326385751359338e-05, "loss": 1.0188, "step": 629 }, { "epoch": 0.19378652722239312, "grad_norm": 0.49038090857205197, "learning_rate": 9.323886203546814e-05, "loss": 0.8715, "step": 630 }, { "epoch": 0.1940941248846509, "grad_norm": 0.6673392304036585, "learning_rate": 9.321382363078917e-05, "loss": 1.0354, "step": 631 }, { "epoch": 0.19440172254690866, "grad_norm": 0.504439282886794, "learning_rate": 9.318874232441399e-05, "loss": 0.829, "step": 632 }, { "epoch": 0.1947093202091664, "grad_norm": 0.8088872019553863, "learning_rate": 9.316361814124278e-05, "loss": 1.194, "step": 633 }, { "epoch": 0.19501691787142417, "grad_norm": 0.5252287556013454, "learning_rate": 9.313845110621823e-05, "loss": 1.0347, "step": 634 }, { "epoch": 0.19532451553368194, "grad_norm": 0.7520397066890867, "learning_rate": 9.311324124432564e-05, "loss": 0.8975, "step": 635 }, { "epoch": 0.1956321131959397, "grad_norm": 0.5702995212074724, "learning_rate": 9.308798858059274e-05, "loss": 0.8056, "step": 636 }, { "epoch": 0.19593971085819747, "grad_norm": 0.4367557643781176, "learning_rate": 9.30626931400898e-05, "loss": 0.9172, "step": 637 }, { "epoch": 0.19624730852045524, "grad_norm": 0.7225766200574042, "learning_rate": 9.303735494792958e-05, "loss": 1.0628, "step": 638 }, { "epoch": 0.196554906182713, "grad_norm": 0.5148156817792424, "learning_rate": 9.301197402926724e-05, "loss": 0.9654, "step": 639 }, { "epoch": 0.19686250384497078, "grad_norm": 0.3801199370437178, "learning_rate": 9.298655040930039e-05, "loss": 0.9466, "step": 640 }, { "epoch": 0.19717010150722855, "grad_norm": 0.8374042431315506, "learning_rate": 9.296108411326901e-05, "loss": 0.846, "step": 641 }, { "epoch": 0.19747769916948632, "grad_norm": 0.6878562252356205, "learning_rate": 9.293557516645544e-05, "loss": 0.9974, "step": 642 }, { "epoch": 0.1977852968317441, "grad_norm": 0.48992708346607733, "learning_rate": 9.29100235941844e-05, "loss": 0.8887, "step": 643 }, { "epoch": 0.19809289449400186, "grad_norm": 0.44342558500162393, "learning_rate": 9.28844294218229e-05, "loss": 0.7754, "step": 644 }, { "epoch": 0.1984004921562596, "grad_norm": 2.7061042729134117, "learning_rate": 9.285879267478026e-05, "loss": 0.9467, "step": 645 }, { "epoch": 0.19870808981851737, "grad_norm": 0.5109030927148676, "learning_rate": 9.283311337850804e-05, "loss": 0.9102, "step": 646 }, { "epoch": 0.19901568748077514, "grad_norm": 0.44543880202335245, "learning_rate": 9.280739155850008e-05, "loss": 0.9099, "step": 647 }, { "epoch": 0.1993232851430329, "grad_norm": 0.6624705062260507, "learning_rate": 9.278162724029238e-05, "loss": 0.9489, "step": 648 }, { "epoch": 0.19963088280529068, "grad_norm": 0.5714461626133392, "learning_rate": 9.27558204494632e-05, "loss": 0.9568, "step": 649 }, { "epoch": 0.19993848046754845, "grad_norm": 0.5903403794820923, "learning_rate": 9.272997121163293e-05, "loss": 1.0049, "step": 650 }, { "epoch": 0.20024607812980622, "grad_norm": 0.4801083832339588, "learning_rate": 9.270407955246408e-05, "loss": 0.9358, "step": 651 }, { "epoch": 0.20055367579206398, "grad_norm": 0.5078309711882896, "learning_rate": 9.26781454976613e-05, "loss": 0.8087, "step": 652 }, { "epoch": 0.20086127345432175, "grad_norm": 0.46734784690540543, "learning_rate": 9.265216907297134e-05, "loss": 0.9003, "step": 653 }, { "epoch": 0.20116887111657952, "grad_norm": 0.44346030159376293, "learning_rate": 9.2626150304183e-05, "loss": 0.9442, "step": 654 }, { "epoch": 0.2014764687788373, "grad_norm": 0.4576923464084174, "learning_rate": 9.260008921712712e-05, "loss": 0.9774, "step": 655 }, { "epoch": 0.20178406644109506, "grad_norm": 0.7689247356581705, "learning_rate": 9.257398583767653e-05, "loss": 1.004, "step": 656 }, { "epoch": 0.2020916641033528, "grad_norm": 0.5668455657681024, "learning_rate": 9.25478401917461e-05, "loss": 0.8587, "step": 657 }, { "epoch": 0.20239926176561057, "grad_norm": 0.4808737111205069, "learning_rate": 9.252165230529262e-05, "loss": 0.9321, "step": 658 }, { "epoch": 0.20270685942786834, "grad_norm": 0.41238956191003573, "learning_rate": 9.249542220431479e-05, "loss": 0.8331, "step": 659 }, { "epoch": 0.2030144570901261, "grad_norm": 0.47994760190148733, "learning_rate": 9.246914991485332e-05, "loss": 0.9275, "step": 660 }, { "epoch": 0.20332205475238388, "grad_norm": 0.7549238982799021, "learning_rate": 9.244283546299068e-05, "loss": 0.9204, "step": 661 }, { "epoch": 0.20362965241464165, "grad_norm": 0.7076872081210229, "learning_rate": 9.241647887485128e-05, "loss": 1.152, "step": 662 }, { "epoch": 0.20393725007689942, "grad_norm": 0.4203486499452716, "learning_rate": 9.239008017660133e-05, "loss": 0.8373, "step": 663 }, { "epoch": 0.2042448477391572, "grad_norm": 0.46690317367536005, "learning_rate": 9.236363939444887e-05, "loss": 0.8534, "step": 664 }, { "epoch": 0.20455244540141496, "grad_norm": 0.4921465612203679, "learning_rate": 9.23371565546437e-05, "loss": 0.8999, "step": 665 }, { "epoch": 0.20486004306367273, "grad_norm": 0.43368057137777105, "learning_rate": 9.231063168347736e-05, "loss": 0.7973, "step": 666 }, { "epoch": 0.2051676407259305, "grad_norm": 0.5524968459218266, "learning_rate": 9.228406480728317e-05, "loss": 0.953, "step": 667 }, { "epoch": 0.20547523838818824, "grad_norm": 0.46802747647306875, "learning_rate": 9.225745595243607e-05, "loss": 0.9703, "step": 668 }, { "epoch": 0.205782836050446, "grad_norm": 0.49506818836899746, "learning_rate": 9.223080514535276e-05, "loss": 0.9341, "step": 669 }, { "epoch": 0.20609043371270377, "grad_norm": 0.5880229272008389, "learning_rate": 9.220411241249153e-05, "loss": 1.048, "step": 670 }, { "epoch": 0.20639803137496154, "grad_norm": 0.5917838373395155, "learning_rate": 9.217737778035235e-05, "loss": 0.9561, "step": 671 }, { "epoch": 0.2067056290372193, "grad_norm": 0.4924013952288795, "learning_rate": 9.21506012754767e-05, "loss": 0.8824, "step": 672 }, { "epoch": 0.20701322669947708, "grad_norm": 0.7479942240378945, "learning_rate": 9.212378292444774e-05, "loss": 0.8387, "step": 673 }, { "epoch": 0.20732082436173485, "grad_norm": 0.5255192788455201, "learning_rate": 9.209692275389006e-05, "loss": 0.9664, "step": 674 }, { "epoch": 0.20762842202399262, "grad_norm": 0.42701991996856564, "learning_rate": 9.207002079046984e-05, "loss": 0.7888, "step": 675 }, { "epoch": 0.2079360196862504, "grad_norm": 0.7728054168876196, "learning_rate": 9.204307706089476e-05, "loss": 0.9433, "step": 676 }, { "epoch": 0.20824361734850816, "grad_norm": 0.6124818998495521, "learning_rate": 9.201609159191394e-05, "loss": 0.8994, "step": 677 }, { "epoch": 0.20855121501076593, "grad_norm": 0.6188017002031408, "learning_rate": 9.198906441031788e-05, "loss": 0.7334, "step": 678 }, { "epoch": 0.2088588126730237, "grad_norm": 0.5601553953248974, "learning_rate": 9.196199554293861e-05, "loss": 0.8951, "step": 679 }, { "epoch": 0.20916641033528144, "grad_norm": 1.381114330214073, "learning_rate": 9.193488501664945e-05, "loss": 0.9495, "step": 680 }, { "epoch": 0.2094740079975392, "grad_norm": 0.5684620039292936, "learning_rate": 9.190773285836511e-05, "loss": 0.9816, "step": 681 }, { "epoch": 0.20978160565979698, "grad_norm": 0.48966103636875247, "learning_rate": 9.188053909504165e-05, "loss": 0.8935, "step": 682 }, { "epoch": 0.21008920332205475, "grad_norm": 0.4874016095557329, "learning_rate": 9.185330375367642e-05, "loss": 0.8978, "step": 683 }, { "epoch": 0.21039680098431252, "grad_norm": 0.5606815976203694, "learning_rate": 9.182602686130802e-05, "loss": 0.9008, "step": 684 }, { "epoch": 0.21070439864657028, "grad_norm": 0.7921015718268661, "learning_rate": 9.179870844501632e-05, "loss": 0.9059, "step": 685 }, { "epoch": 0.21101199630882805, "grad_norm": 0.583669057055102, "learning_rate": 9.177134853192247e-05, "loss": 0.7743, "step": 686 }, { "epoch": 0.21131959397108582, "grad_norm": 0.5450870212062936, "learning_rate": 9.17439471491887e-05, "loss": 1.0039, "step": 687 }, { "epoch": 0.2116271916333436, "grad_norm": 0.5215468832050781, "learning_rate": 9.171650432401853e-05, "loss": 0.8905, "step": 688 }, { "epoch": 0.21193478929560136, "grad_norm": 0.5613680710923972, "learning_rate": 9.168902008365654e-05, "loss": 0.9362, "step": 689 }, { "epoch": 0.21224238695785913, "grad_norm": 0.5519383495102979, "learning_rate": 9.166149445538847e-05, "loss": 0.9013, "step": 690 }, { "epoch": 0.2125499846201169, "grad_norm": 0.4800275301021484, "learning_rate": 9.163392746654113e-05, "loss": 1.0333, "step": 691 }, { "epoch": 0.21285758228237464, "grad_norm": 0.5457263603659662, "learning_rate": 9.160631914448238e-05, "loss": 1.0217, "step": 692 }, { "epoch": 0.2131651799446324, "grad_norm": 0.49505628609045543, "learning_rate": 9.157866951662118e-05, "loss": 0.8736, "step": 693 }, { "epoch": 0.21347277760689018, "grad_norm": 0.8688640756658613, "learning_rate": 9.155097861040738e-05, "loss": 0.9486, "step": 694 }, { "epoch": 0.21378037526914795, "grad_norm": 0.4781978363744288, "learning_rate": 9.152324645333193e-05, "loss": 0.7367, "step": 695 }, { "epoch": 0.21408797293140572, "grad_norm": 0.8326820412571897, "learning_rate": 9.149547307292665e-05, "loss": 0.9307, "step": 696 }, { "epoch": 0.2143955705936635, "grad_norm": 0.4743251016518917, "learning_rate": 9.146765849676434e-05, "loss": 0.8918, "step": 697 }, { "epoch": 0.21470316825592126, "grad_norm": 1.1573498223737608, "learning_rate": 9.143980275245867e-05, "loss": 1.2229, "step": 698 }, { "epoch": 0.21501076591817903, "grad_norm": 0.6314824617420126, "learning_rate": 9.141190586766419e-05, "loss": 0.9357, "step": 699 }, { "epoch": 0.2153183635804368, "grad_norm": 0.5923456131935663, "learning_rate": 9.138396787007626e-05, "loss": 0.9546, "step": 700 }, { "epoch": 0.21562596124269456, "grad_norm": 0.44674245907220644, "learning_rate": 9.135598878743112e-05, "loss": 0.9056, "step": 701 }, { "epoch": 0.21593355890495233, "grad_norm": 0.4422674254357493, "learning_rate": 9.132796864750574e-05, "loss": 0.9084, "step": 702 }, { "epoch": 0.2162411565672101, "grad_norm": 0.48088883739218063, "learning_rate": 9.129990747811791e-05, "loss": 0.8885, "step": 703 }, { "epoch": 0.21654875422946784, "grad_norm": 0.721009875384976, "learning_rate": 9.127180530712609e-05, "loss": 0.8861, "step": 704 }, { "epoch": 0.2168563518917256, "grad_norm": 0.6032776114676812, "learning_rate": 9.124366216242945e-05, "loss": 1.0573, "step": 705 }, { "epoch": 0.21716394955398338, "grad_norm": 0.5735336760403741, "learning_rate": 9.12154780719679e-05, "loss": 1.1305, "step": 706 }, { "epoch": 0.21747154721624115, "grad_norm": 0.3965512817743132, "learning_rate": 9.118725306372196e-05, "loss": 0.9276, "step": 707 }, { "epoch": 0.21777914487849892, "grad_norm": 0.4952137176253096, "learning_rate": 9.115898716571276e-05, "loss": 0.9075, "step": 708 }, { "epoch": 0.2180867425407567, "grad_norm": 0.9458241402095061, "learning_rate": 9.113068040600204e-05, "loss": 1.0992, "step": 709 }, { "epoch": 0.21839434020301446, "grad_norm": 0.4644687531357606, "learning_rate": 9.110233281269209e-05, "loss": 0.6867, "step": 710 }, { "epoch": 0.21870193786527223, "grad_norm": 0.5718228683283549, "learning_rate": 9.107394441392577e-05, "loss": 1.0456, "step": 711 }, { "epoch": 0.21900953552753, "grad_norm": 0.5415487852920586, "learning_rate": 9.10455152378864e-05, "loss": 0.8223, "step": 712 }, { "epoch": 0.21931713318978777, "grad_norm": 0.7308830885227864, "learning_rate": 9.101704531279788e-05, "loss": 0.9307, "step": 713 }, { "epoch": 0.21962473085204554, "grad_norm": 0.5390077710712387, "learning_rate": 9.098853466692445e-05, "loss": 0.9569, "step": 714 }, { "epoch": 0.2199323285143033, "grad_norm": 0.5551385266759938, "learning_rate": 9.095998332857084e-05, "loss": 0.9716, "step": 715 }, { "epoch": 0.22023992617656105, "grad_norm": 0.4106578903945982, "learning_rate": 9.093139132608218e-05, "loss": 0.7486, "step": 716 }, { "epoch": 0.22054752383881882, "grad_norm": 0.460790412883257, "learning_rate": 9.090275868784393e-05, "loss": 0.998, "step": 717 }, { "epoch": 0.22085512150107658, "grad_norm": 0.5696929668429274, "learning_rate": 9.087408544228194e-05, "loss": 1.0049, "step": 718 }, { "epoch": 0.22116271916333435, "grad_norm": 0.6497631428092479, "learning_rate": 9.084537161786234e-05, "loss": 0.8325, "step": 719 }, { "epoch": 0.22147031682559212, "grad_norm": 0.4420704240067625, "learning_rate": 9.081661724309158e-05, "loss": 0.8392, "step": 720 }, { "epoch": 0.2217779144878499, "grad_norm": 0.4275989631309164, "learning_rate": 9.07878223465163e-05, "loss": 0.7862, "step": 721 }, { "epoch": 0.22208551215010766, "grad_norm": 1.405003825315643, "learning_rate": 9.075898695672347e-05, "loss": 1.2105, "step": 722 }, { "epoch": 0.22239310981236543, "grad_norm": 0.6085879663704785, "learning_rate": 9.073011110234017e-05, "loss": 0.9802, "step": 723 }, { "epoch": 0.2227007074746232, "grad_norm": 0.613268270266097, "learning_rate": 9.070119481203372e-05, "loss": 1.2149, "step": 724 }, { "epoch": 0.22300830513688097, "grad_norm": 0.5385138659168507, "learning_rate": 9.067223811451151e-05, "loss": 1.0209, "step": 725 }, { "epoch": 0.22331590279913874, "grad_norm": 0.5424848667982509, "learning_rate": 9.064324103852111e-05, "loss": 0.9345, "step": 726 }, { "epoch": 0.2236235004613965, "grad_norm": 0.5478570728174912, "learning_rate": 9.061420361285016e-05, "loss": 0.7688, "step": 727 }, { "epoch": 0.22393109812365425, "grad_norm": 0.6751785518933391, "learning_rate": 9.058512586632634e-05, "loss": 1.0684, "step": 728 }, { "epoch": 0.22423869578591202, "grad_norm": 0.5530551318845965, "learning_rate": 9.055600782781737e-05, "loss": 1.1924, "step": 729 }, { "epoch": 0.2245462934481698, "grad_norm": 0.617922158729447, "learning_rate": 9.052684952623098e-05, "loss": 1.0642, "step": 730 }, { "epoch": 0.22485389111042756, "grad_norm": 0.43084810330292744, "learning_rate": 9.049765099051488e-05, "loss": 0.8785, "step": 731 }, { "epoch": 0.22516148877268533, "grad_norm": 0.5565906141474967, "learning_rate": 9.04684122496567e-05, "loss": 0.7249, "step": 732 }, { "epoch": 0.2254690864349431, "grad_norm": 1.107495814232026, "learning_rate": 9.043913333268397e-05, "loss": 1.1344, "step": 733 }, { "epoch": 0.22577668409720086, "grad_norm": 0.5317167737686448, "learning_rate": 9.040981426866415e-05, "loss": 0.7444, "step": 734 }, { "epoch": 0.22608428175945863, "grad_norm": 0.5354553111479923, "learning_rate": 9.038045508670453e-05, "loss": 0.7951, "step": 735 }, { "epoch": 0.2263918794217164, "grad_norm": 0.4152632907668809, "learning_rate": 9.035105581595223e-05, "loss": 0.8226, "step": 736 }, { "epoch": 0.22669947708397417, "grad_norm": 0.4798579182415868, "learning_rate": 9.032161648559421e-05, "loss": 0.8365, "step": 737 }, { "epoch": 0.22700707474623194, "grad_norm": 0.5715568127763203, "learning_rate": 9.029213712485712e-05, "loss": 1.0787, "step": 738 }, { "epoch": 0.22731467240848968, "grad_norm": 0.5300923668337472, "learning_rate": 9.02626177630074e-05, "loss": 0.9311, "step": 739 }, { "epoch": 0.22762227007074745, "grad_norm": 0.5503816199478871, "learning_rate": 9.023305842935122e-05, "loss": 0.6648, "step": 740 }, { "epoch": 0.22792986773300522, "grad_norm": 0.9504414053527629, "learning_rate": 9.02034591532344e-05, "loss": 1.0638, "step": 741 }, { "epoch": 0.228237465395263, "grad_norm": 0.5113685030296833, "learning_rate": 9.017381996404242e-05, "loss": 0.8468, "step": 742 }, { "epoch": 0.22854506305752076, "grad_norm": 0.6038035415940373, "learning_rate": 9.01441408912004e-05, "loss": 1.0688, "step": 743 }, { "epoch": 0.22885266071977853, "grad_norm": 0.5491911478726014, "learning_rate": 9.011442196417303e-05, "loss": 0.8285, "step": 744 }, { "epoch": 0.2291602583820363, "grad_norm": 0.8627465779748048, "learning_rate": 9.00846632124646e-05, "loss": 0.9664, "step": 745 }, { "epoch": 0.22946785604429407, "grad_norm": 0.6813456704022024, "learning_rate": 9.005486466561889e-05, "loss": 1.1976, "step": 746 }, { "epoch": 0.22977545370655184, "grad_norm": 0.5675785762728124, "learning_rate": 9.002502635321926e-05, "loss": 0.8809, "step": 747 }, { "epoch": 0.2300830513688096, "grad_norm": 0.6700963293722536, "learning_rate": 8.999514830488847e-05, "loss": 0.8778, "step": 748 }, { "epoch": 0.23039064903106737, "grad_norm": 0.6455836446942617, "learning_rate": 8.996523055028876e-05, "loss": 0.8509, "step": 749 }, { "epoch": 0.23069824669332514, "grad_norm": 0.43485174265945725, "learning_rate": 8.993527311912183e-05, "loss": 0.8699, "step": 750 }, { "epoch": 0.23100584435558288, "grad_norm": 0.4629782714623143, "learning_rate": 8.990527604112868e-05, "loss": 1.0304, "step": 751 }, { "epoch": 0.23131344201784065, "grad_norm": 0.6432230999348771, "learning_rate": 8.987523934608977e-05, "loss": 0.9861, "step": 752 }, { "epoch": 0.23162103968009842, "grad_norm": 0.52235127323496, "learning_rate": 8.984516306382482e-05, "loss": 0.8822, "step": 753 }, { "epoch": 0.2319286373423562, "grad_norm": 0.6104057153214245, "learning_rate": 8.981504722419288e-05, "loss": 1.0385, "step": 754 }, { "epoch": 0.23223623500461396, "grad_norm": 0.692089892213474, "learning_rate": 8.978489185709227e-05, "loss": 1.0161, "step": 755 }, { "epoch": 0.23254383266687173, "grad_norm": 0.4636628681762921, "learning_rate": 8.975469699246055e-05, "loss": 0.9039, "step": 756 }, { "epoch": 0.2328514303291295, "grad_norm": 0.46578831850655983, "learning_rate": 8.972446266027448e-05, "loss": 0.9302, "step": 757 }, { "epoch": 0.23315902799138727, "grad_norm": 0.7471157301002397, "learning_rate": 8.969418889055002e-05, "loss": 0.8283, "step": 758 }, { "epoch": 0.23346662565364504, "grad_norm": 0.4640623747324746, "learning_rate": 8.96638757133423e-05, "loss": 0.9878, "step": 759 }, { "epoch": 0.2337742233159028, "grad_norm": 0.7099930429085172, "learning_rate": 8.96335231587455e-05, "loss": 0.9644, "step": 760 }, { "epoch": 0.23408182097816058, "grad_norm": 0.5342674348977824, "learning_rate": 8.9603131256893e-05, "loss": 1.0491, "step": 761 }, { "epoch": 0.23438941864041835, "grad_norm": 0.6304749300870255, "learning_rate": 8.957270003795713e-05, "loss": 0.7915, "step": 762 }, { "epoch": 0.2346970163026761, "grad_norm": 0.5189934183841733, "learning_rate": 8.954222953214934e-05, "loss": 1.0373, "step": 763 }, { "epoch": 0.23500461396493386, "grad_norm": 0.579366099459142, "learning_rate": 8.951171976972004e-05, "loss": 0.9005, "step": 764 }, { "epoch": 0.23531221162719163, "grad_norm": 0.4181715646669672, "learning_rate": 8.948117078095865e-05, "loss": 0.897, "step": 765 }, { "epoch": 0.2356198092894494, "grad_norm": 0.4404837316096133, "learning_rate": 8.945058259619348e-05, "loss": 0.9109, "step": 766 }, { "epoch": 0.23592740695170716, "grad_norm": 0.43051146312539956, "learning_rate": 8.941995524579177e-05, "loss": 0.9264, "step": 767 }, { "epoch": 0.23623500461396493, "grad_norm": 0.499012118084279, "learning_rate": 8.93892887601597e-05, "loss": 0.8469, "step": 768 }, { "epoch": 0.2365426022762227, "grad_norm": 0.9857610861878131, "learning_rate": 8.935858316974221e-05, "loss": 1.0196, "step": 769 }, { "epoch": 0.23685019993848047, "grad_norm": 0.5085690844019934, "learning_rate": 8.932783850502313e-05, "loss": 0.8454, "step": 770 }, { "epoch": 0.23715779760073824, "grad_norm": 0.5046490903075415, "learning_rate": 8.929705479652506e-05, "loss": 1.0085, "step": 771 }, { "epoch": 0.237465395262996, "grad_norm": 0.44568002114636923, "learning_rate": 8.926623207480936e-05, "loss": 0.8607, "step": 772 }, { "epoch": 0.23777299292525378, "grad_norm": 0.8636408870437451, "learning_rate": 8.923537037047612e-05, "loss": 0.9621, "step": 773 }, { "epoch": 0.23808059058751155, "grad_norm": 0.5672999891351304, "learning_rate": 8.920446971416412e-05, "loss": 0.9816, "step": 774 }, { "epoch": 0.2383881882497693, "grad_norm": 0.8824967750681804, "learning_rate": 8.917353013655086e-05, "loss": 0.8567, "step": 775 }, { "epoch": 0.23869578591202706, "grad_norm": 0.47958615220124284, "learning_rate": 8.914255166835242e-05, "loss": 0.8934, "step": 776 }, { "epoch": 0.23900338357428483, "grad_norm": 0.6033624692228035, "learning_rate": 8.911153434032353e-05, "loss": 0.9083, "step": 777 }, { "epoch": 0.2393109812365426, "grad_norm": 0.6621975700032717, "learning_rate": 8.908047818325747e-05, "loss": 0.813, "step": 778 }, { "epoch": 0.23961857889880037, "grad_norm": 0.6223434481330985, "learning_rate": 8.904938322798608e-05, "loss": 1.0089, "step": 779 }, { "epoch": 0.23992617656105814, "grad_norm": 0.601409507594365, "learning_rate": 8.901824950537974e-05, "loss": 0.8036, "step": 780 }, { "epoch": 0.2402337742233159, "grad_norm": 0.6035042357525328, "learning_rate": 8.89870770463473e-05, "loss": 0.9686, "step": 781 }, { "epoch": 0.24054137188557367, "grad_norm": 0.5011485345542368, "learning_rate": 8.895586588183603e-05, "loss": 0.8711, "step": 782 }, { "epoch": 0.24084896954783144, "grad_norm": 1.0548024679495902, "learning_rate": 8.892461604283169e-05, "loss": 0.727, "step": 783 }, { "epoch": 0.2411565672100892, "grad_norm": 0.6132400580272943, "learning_rate": 8.88933275603584e-05, "loss": 1.0078, "step": 784 }, { "epoch": 0.24146416487234698, "grad_norm": 0.5113243300280685, "learning_rate": 8.886200046547865e-05, "loss": 0.9312, "step": 785 }, { "epoch": 0.24177176253460475, "grad_norm": 0.5032745418889611, "learning_rate": 8.883063478929326e-05, "loss": 0.8135, "step": 786 }, { "epoch": 0.2420793601968625, "grad_norm": 0.9286173380976788, "learning_rate": 8.879923056294137e-05, "loss": 1.0943, "step": 787 }, { "epoch": 0.24238695785912026, "grad_norm": 0.5265140355751455, "learning_rate": 8.876778781760035e-05, "loss": 0.988, "step": 788 }, { "epoch": 0.24269455552137803, "grad_norm": 0.6893639264943746, "learning_rate": 8.873630658448587e-05, "loss": 0.8523, "step": 789 }, { "epoch": 0.2430021531836358, "grad_norm": 0.41557461648006905, "learning_rate": 8.870478689485175e-05, "loss": 0.8111, "step": 790 }, { "epoch": 0.24330975084589357, "grad_norm": 0.5246383543041417, "learning_rate": 8.867322877999004e-05, "loss": 0.8136, "step": 791 }, { "epoch": 0.24361734850815134, "grad_norm": 0.7585161369352121, "learning_rate": 8.86416322712309e-05, "loss": 0.994, "step": 792 }, { "epoch": 0.2439249461704091, "grad_norm": 0.5004537492044725, "learning_rate": 8.860999739994262e-05, "loss": 0.8577, "step": 793 }, { "epoch": 0.24423254383266688, "grad_norm": 0.6850406286418592, "learning_rate": 8.85783241975316e-05, "loss": 0.8609, "step": 794 }, { "epoch": 0.24454014149492465, "grad_norm": 0.7864138364308642, "learning_rate": 8.854661269544226e-05, "loss": 0.9848, "step": 795 }, { "epoch": 0.24484773915718241, "grad_norm": 0.5750409603894429, "learning_rate": 8.851486292515704e-05, "loss": 0.9736, "step": 796 }, { "epoch": 0.24515533681944018, "grad_norm": 0.5507658536535197, "learning_rate": 8.84830749181964e-05, "loss": 0.8285, "step": 797 }, { "epoch": 0.24546293448169793, "grad_norm": 0.49998691967243697, "learning_rate": 8.845124870611874e-05, "loss": 0.8059, "step": 798 }, { "epoch": 0.2457705321439557, "grad_norm": 0.9636473549880924, "learning_rate": 8.84193843205204e-05, "loss": 0.8037, "step": 799 }, { "epoch": 0.24607812980621346, "grad_norm": 0.9348761906520175, "learning_rate": 8.838748179303562e-05, "loss": 1.0943, "step": 800 }, { "epoch": 0.24638572746847123, "grad_norm": 0.4486513733824951, "learning_rate": 8.835554115533649e-05, "loss": 0.5535, "step": 801 }, { "epoch": 0.246693325130729, "grad_norm": 0.621296039402373, "learning_rate": 8.832356243913293e-05, "loss": 0.8876, "step": 802 }, { "epoch": 0.24700092279298677, "grad_norm": 0.5932111680606399, "learning_rate": 8.829154567617272e-05, "loss": 1.0634, "step": 803 }, { "epoch": 0.24730852045524454, "grad_norm": 0.8347412598235264, "learning_rate": 8.825949089824132e-05, "loss": 1.1168, "step": 804 }, { "epoch": 0.2476161181175023, "grad_norm": 0.45759714861885675, "learning_rate": 8.822739813716203e-05, "loss": 0.9033, "step": 805 }, { "epoch": 0.24792371577976008, "grad_norm": 0.600800996479016, "learning_rate": 8.819526742479577e-05, "loss": 0.975, "step": 806 }, { "epoch": 0.24823131344201785, "grad_norm": 0.5967417716553742, "learning_rate": 8.816309879304121e-05, "loss": 1.0159, "step": 807 }, { "epoch": 0.24853891110427562, "grad_norm": 0.5394867668856901, "learning_rate": 8.813089227383461e-05, "loss": 0.8768, "step": 808 }, { "epoch": 0.2488465087665334, "grad_norm": 0.6172041124496896, "learning_rate": 8.809864789914987e-05, "loss": 0.9068, "step": 809 }, { "epoch": 0.24915410642879113, "grad_norm": 0.5054160343336437, "learning_rate": 8.806636570099846e-05, "loss": 0.7202, "step": 810 }, { "epoch": 0.2494617040910489, "grad_norm": 0.5453818791836226, "learning_rate": 8.803404571142944e-05, "loss": 0.8343, "step": 811 }, { "epoch": 0.24976930175330667, "grad_norm": 0.49282537337059606, "learning_rate": 8.80016879625293e-05, "loss": 0.7141, "step": 812 }, { "epoch": 0.25007689941556444, "grad_norm": 0.43228374162296596, "learning_rate": 8.796929248642214e-05, "loss": 0.8763, "step": 813 }, { "epoch": 0.25038449707782223, "grad_norm": 0.4216159204848058, "learning_rate": 8.79368593152694e-05, "loss": 0.8885, "step": 814 }, { "epoch": 0.25069209474008, "grad_norm": 0.3901200784234157, "learning_rate": 8.790438848127001e-05, "loss": 0.7946, "step": 815 }, { "epoch": 0.2509996924023377, "grad_norm": 0.562193162037815, "learning_rate": 8.787188001666026e-05, "loss": 0.8901, "step": 816 }, { "epoch": 0.2513072900645955, "grad_norm": 0.6198441144929514, "learning_rate": 8.783933395371383e-05, "loss": 0.897, "step": 817 }, { "epoch": 0.25161488772685325, "grad_norm": 0.4457420317962644, "learning_rate": 8.780675032474169e-05, "loss": 0.7962, "step": 818 }, { "epoch": 0.25192248538911105, "grad_norm": 0.5198806773483347, "learning_rate": 8.777412916209213e-05, "loss": 1.0957, "step": 819 }, { "epoch": 0.2522300830513688, "grad_norm": 0.5028238093489619, "learning_rate": 8.774147049815069e-05, "loss": 0.9054, "step": 820 }, { "epoch": 0.2525376807136266, "grad_norm": 0.4683422204225956, "learning_rate": 8.770877436534016e-05, "loss": 0.8317, "step": 821 }, { "epoch": 0.25284527837588433, "grad_norm": 1.30091495754804, "learning_rate": 8.767604079612049e-05, "loss": 0.8972, "step": 822 }, { "epoch": 0.2531528760381421, "grad_norm": 0.44656507464590117, "learning_rate": 8.764326982298882e-05, "loss": 0.9105, "step": 823 }, { "epoch": 0.25346047370039987, "grad_norm": 0.8150529251866454, "learning_rate": 8.761046147847945e-05, "loss": 1.1854, "step": 824 }, { "epoch": 0.25376807136265767, "grad_norm": 0.6562975078532798, "learning_rate": 8.757761579516373e-05, "loss": 1.0038, "step": 825 }, { "epoch": 0.2540756690249154, "grad_norm": 0.483615291336053, "learning_rate": 8.754473280565012e-05, "loss": 0.9103, "step": 826 }, { "epoch": 0.2543832666871732, "grad_norm": 0.453712606189041, "learning_rate": 8.751181254258408e-05, "loss": 0.9565, "step": 827 }, { "epoch": 0.25469086434943095, "grad_norm": 0.5888651392569334, "learning_rate": 8.74788550386481e-05, "loss": 0.9036, "step": 828 }, { "epoch": 0.2549984620116887, "grad_norm": 0.5035108668838517, "learning_rate": 8.744586032656166e-05, "loss": 0.8797, "step": 829 }, { "epoch": 0.2553060596739465, "grad_norm": 0.8476204469533666, "learning_rate": 8.741282843908111e-05, "loss": 1.106, "step": 830 }, { "epoch": 0.2556136573362042, "grad_norm": 0.46273187123979304, "learning_rate": 8.73797594089998e-05, "loss": 0.7793, "step": 831 }, { "epoch": 0.255921254998462, "grad_norm": 0.5174475052574737, "learning_rate": 8.734665326914791e-05, "loss": 0.8595, "step": 832 }, { "epoch": 0.25622885266071976, "grad_norm": 0.947785497289752, "learning_rate": 8.731351005239241e-05, "loss": 0.8641, "step": 833 }, { "epoch": 0.25653645032297756, "grad_norm": 0.5416870632213003, "learning_rate": 8.728032979163716e-05, "loss": 0.9925, "step": 834 }, { "epoch": 0.2568440479852353, "grad_norm": 0.5925736852822072, "learning_rate": 8.724711251982275e-05, "loss": 0.8811, "step": 835 }, { "epoch": 0.2571516456474931, "grad_norm": 0.46369213601906295, "learning_rate": 8.721385826992654e-05, "loss": 0.99, "step": 836 }, { "epoch": 0.25745924330975084, "grad_norm": 0.4319180588287235, "learning_rate": 8.718056707496259e-05, "loss": 0.6959, "step": 837 }, { "epoch": 0.25776684097200864, "grad_norm": 0.5494267972901301, "learning_rate": 8.714723896798163e-05, "loss": 0.8761, "step": 838 }, { "epoch": 0.2580744386342664, "grad_norm": 0.6963994675555824, "learning_rate": 8.711387398207105e-05, "loss": 0.9636, "step": 839 }, { "epoch": 0.2583820362965241, "grad_norm": 0.44350586906603523, "learning_rate": 8.708047215035484e-05, "loss": 0.8297, "step": 840 }, { "epoch": 0.2586896339587819, "grad_norm": 0.5584820799104525, "learning_rate": 8.704703350599357e-05, "loss": 0.966, "step": 841 }, { "epoch": 0.25899723162103966, "grad_norm": 0.783745526719448, "learning_rate": 8.701355808218439e-05, "loss": 0.9766, "step": 842 }, { "epoch": 0.25930482928329746, "grad_norm": 0.5095054239336972, "learning_rate": 8.698004591216088e-05, "loss": 0.7781, "step": 843 }, { "epoch": 0.2596124269455552, "grad_norm": 0.5333107530714362, "learning_rate": 8.694649702919322e-05, "loss": 0.8491, "step": 844 }, { "epoch": 0.259920024607813, "grad_norm": 0.543020815312436, "learning_rate": 8.691291146658793e-05, "loss": 0.7919, "step": 845 }, { "epoch": 0.26022762227007074, "grad_norm": 0.5422330839394375, "learning_rate": 8.6879289257688e-05, "loss": 0.8971, "step": 846 }, { "epoch": 0.26053521993232853, "grad_norm": 0.9384693930475011, "learning_rate": 8.684563043587279e-05, "loss": 1.1259, "step": 847 }, { "epoch": 0.2608428175945863, "grad_norm": 0.41630396935030106, "learning_rate": 8.681193503455802e-05, "loss": 0.7994, "step": 848 }, { "epoch": 0.26115041525684407, "grad_norm": 0.48929349697239993, "learning_rate": 8.67782030871957e-05, "loss": 0.8714, "step": 849 }, { "epoch": 0.2614580129191018, "grad_norm": 0.5563009757315975, "learning_rate": 8.674443462727415e-05, "loss": 0.9071, "step": 850 }, { "epoch": 0.2617656105813596, "grad_norm": 0.6667031612128609, "learning_rate": 8.671062968831791e-05, "loss": 0.9527, "step": 851 }, { "epoch": 0.26207320824361735, "grad_norm": 0.7988802456555234, "learning_rate": 8.667678830388776e-05, "loss": 0.9887, "step": 852 }, { "epoch": 0.2623808059058751, "grad_norm": 0.6766698717376328, "learning_rate": 8.664291050758064e-05, "loss": 0.7749, "step": 853 }, { "epoch": 0.2626884035681329, "grad_norm": 0.9023544609571932, "learning_rate": 8.660899633302968e-05, "loss": 1.0897, "step": 854 }, { "epoch": 0.26299600123039063, "grad_norm": 0.8777788584139872, "learning_rate": 8.657504581390408e-05, "loss": 0.9763, "step": 855 }, { "epoch": 0.2633035988926484, "grad_norm": 0.8851852543960769, "learning_rate": 8.654105898390912e-05, "loss": 1.2105, "step": 856 }, { "epoch": 0.26361119655490617, "grad_norm": 0.5504023945103796, "learning_rate": 8.650703587678616e-05, "loss": 0.9162, "step": 857 }, { "epoch": 0.26391879421716397, "grad_norm": 0.6268908570803765, "learning_rate": 8.64729765263126e-05, "loss": 0.961, "step": 858 }, { "epoch": 0.2642263918794217, "grad_norm": 0.7401998671964597, "learning_rate": 8.643888096630174e-05, "loss": 0.9603, "step": 859 }, { "epoch": 0.2645339895416795, "grad_norm": 0.6720925819703732, "learning_rate": 8.640474923060287e-05, "loss": 0.9594, "step": 860 }, { "epoch": 0.26484158720393725, "grad_norm": 0.5088739279506154, "learning_rate": 8.637058135310123e-05, "loss": 0.7796, "step": 861 }, { "epoch": 0.26514918486619504, "grad_norm": 0.41483859706789045, "learning_rate": 8.633637736771789e-05, "loss": 0.9116, "step": 862 }, { "epoch": 0.2654567825284528, "grad_norm": 0.565177697874746, "learning_rate": 8.630213730840979e-05, "loss": 0.8875, "step": 863 }, { "epoch": 0.2657643801907105, "grad_norm": 0.521243303517061, "learning_rate": 8.626786120916967e-05, "loss": 0.8891, "step": 864 }, { "epoch": 0.2660719778529683, "grad_norm": 0.5956245253865635, "learning_rate": 8.623354910402606e-05, "loss": 0.8968, "step": 865 }, { "epoch": 0.26637957551522606, "grad_norm": 0.5509595355115863, "learning_rate": 8.619920102704323e-05, "loss": 0.7898, "step": 866 }, { "epoch": 0.26668717317748386, "grad_norm": 0.7900141075748662, "learning_rate": 8.616481701232117e-05, "loss": 0.8788, "step": 867 }, { "epoch": 0.2669947708397416, "grad_norm": 0.5428959931847759, "learning_rate": 8.613039709399555e-05, "loss": 1.02, "step": 868 }, { "epoch": 0.2673023685019994, "grad_norm": 0.6280260765740817, "learning_rate": 8.609594130623768e-05, "loss": 1.0566, "step": 869 }, { "epoch": 0.26760996616425714, "grad_norm": 0.7308099702510316, "learning_rate": 8.606144968325445e-05, "loss": 0.927, "step": 870 }, { "epoch": 0.26791756382651494, "grad_norm": 0.6087005963148553, "learning_rate": 8.60269222592884e-05, "loss": 0.8158, "step": 871 }, { "epoch": 0.2682251614887727, "grad_norm": 0.5570736320387991, "learning_rate": 8.599235906861752e-05, "loss": 1.02, "step": 872 }, { "epoch": 0.2685327591510305, "grad_norm": 0.41913288587633296, "learning_rate": 8.595776014555539e-05, "loss": 0.6816, "step": 873 }, { "epoch": 0.2688403568132882, "grad_norm": 0.7999947787792127, "learning_rate": 8.592312552445102e-05, "loss": 0.9652, "step": 874 }, { "epoch": 0.26914795447554596, "grad_norm": 0.529946930912981, "learning_rate": 8.588845523968886e-05, "loss": 0.7776, "step": 875 }, { "epoch": 0.26945555213780376, "grad_norm": 0.5845447394997982, "learning_rate": 8.58537493256888e-05, "loss": 0.893, "step": 876 }, { "epoch": 0.2697631498000615, "grad_norm": 0.44385029224534783, "learning_rate": 8.581900781690604e-05, "loss": 0.7673, "step": 877 }, { "epoch": 0.2700707474623193, "grad_norm": 0.5540124114594304, "learning_rate": 8.578423074783119e-05, "loss": 0.9393, "step": 878 }, { "epoch": 0.27037834512457704, "grad_norm": 0.5587936855474099, "learning_rate": 8.574941815299011e-05, "loss": 0.9324, "step": 879 }, { "epoch": 0.27068594278683483, "grad_norm": 0.5144439590953592, "learning_rate": 8.571457006694397e-05, "loss": 0.9427, "step": 880 }, { "epoch": 0.2709935404490926, "grad_norm": 0.5405268988040159, "learning_rate": 8.567968652428912e-05, "loss": 0.9508, "step": 881 }, { "epoch": 0.27130113811135037, "grad_norm": 0.5603399948546399, "learning_rate": 8.564476755965719e-05, "loss": 0.9432, "step": 882 }, { "epoch": 0.2716087357736081, "grad_norm": 0.8687782805915591, "learning_rate": 8.560981320771488e-05, "loss": 0.9077, "step": 883 }, { "epoch": 0.2719163334358659, "grad_norm": 0.5260294830922473, "learning_rate": 8.557482350316409e-05, "loss": 0.8591, "step": 884 }, { "epoch": 0.27222393109812365, "grad_norm": 0.6521509118134717, "learning_rate": 8.553979848074182e-05, "loss": 0.8715, "step": 885 }, { "epoch": 0.27253152876038145, "grad_norm": 0.48568656056810994, "learning_rate": 8.550473817522009e-05, "loss": 0.8913, "step": 886 }, { "epoch": 0.2728391264226392, "grad_norm": 0.48547176916905027, "learning_rate": 8.546964262140595e-05, "loss": 0.855, "step": 887 }, { "epoch": 0.27314672408489693, "grad_norm": 0.47765561572370036, "learning_rate": 8.543451185414152e-05, "loss": 0.8369, "step": 888 }, { "epoch": 0.2734543217471547, "grad_norm": 0.8501265117869946, "learning_rate": 8.539934590830378e-05, "loss": 1.1662, "step": 889 }, { "epoch": 0.27376191940941247, "grad_norm": 0.8796255692600016, "learning_rate": 8.536414481880469e-05, "loss": 0.9955, "step": 890 }, { "epoch": 0.27406951707167027, "grad_norm": 0.6726659375671719, "learning_rate": 8.532890862059106e-05, "loss": 1.162, "step": 891 }, { "epoch": 0.274377114733928, "grad_norm": 0.7359727427140467, "learning_rate": 8.529363734864463e-05, "loss": 0.9647, "step": 892 }, { "epoch": 0.2746847123961858, "grad_norm": 0.6328613252067506, "learning_rate": 8.525833103798187e-05, "loss": 0.9765, "step": 893 }, { "epoch": 0.27499231005844355, "grad_norm": 0.8111747564311783, "learning_rate": 8.522298972365411e-05, "loss": 1.0099, "step": 894 }, { "epoch": 0.27529990772070134, "grad_norm": 2.4715941216595665, "learning_rate": 8.51876134407474e-05, "loss": 0.8546, "step": 895 }, { "epoch": 0.2756075053829591, "grad_norm": 0.5542180388109889, "learning_rate": 8.515220222438246e-05, "loss": 0.9477, "step": 896 }, { "epoch": 0.2759151030452169, "grad_norm": 0.7182968713958987, "learning_rate": 8.511675610971477e-05, "loss": 0.856, "step": 897 }, { "epoch": 0.2762227007074746, "grad_norm": 0.49364670744972716, "learning_rate": 8.508127513193444e-05, "loss": 0.8659, "step": 898 }, { "epoch": 0.27653029836973236, "grad_norm": 0.7318886261350516, "learning_rate": 8.504575932626614e-05, "loss": 1.1203, "step": 899 }, { "epoch": 0.27683789603199016, "grad_norm": 0.6215867273260494, "learning_rate": 8.501020872796917e-05, "loss": 1.0072, "step": 900 }, { "epoch": 0.2771454936942479, "grad_norm": 0.6772602472314504, "learning_rate": 8.497462337233733e-05, "loss": 1.0257, "step": 901 }, { "epoch": 0.2774530913565057, "grad_norm": 0.692087680058657, "learning_rate": 8.493900329469897e-05, "loss": 0.913, "step": 902 }, { "epoch": 0.27776068901876344, "grad_norm": 0.7913528532036941, "learning_rate": 8.490334853041689e-05, "loss": 1.0798, "step": 903 }, { "epoch": 0.27806828668102124, "grad_norm": 0.4955310023937105, "learning_rate": 8.486765911488829e-05, "loss": 0.8441, "step": 904 }, { "epoch": 0.278375884343279, "grad_norm": 0.4708158635053622, "learning_rate": 8.483193508354484e-05, "loss": 0.8618, "step": 905 }, { "epoch": 0.2786834820055368, "grad_norm": 1.716356545099553, "learning_rate": 8.479617647185255e-05, "loss": 0.913, "step": 906 }, { "epoch": 0.2789910796677945, "grad_norm": 0.7515136775114539, "learning_rate": 8.476038331531171e-05, "loss": 1.0634, "step": 907 }, { "epoch": 0.2792986773300523, "grad_norm": 0.5310184961500611, "learning_rate": 8.472455564945699e-05, "loss": 1.0305, "step": 908 }, { "epoch": 0.27960627499231006, "grad_norm": 0.4807152014664309, "learning_rate": 8.468869350985725e-05, "loss": 0.7501, "step": 909 }, { "epoch": 0.27991387265456785, "grad_norm": 0.5757823344888241, "learning_rate": 8.465279693211561e-05, "loss": 0.9785, "step": 910 }, { "epoch": 0.2802214703168256, "grad_norm": 0.644012087018941, "learning_rate": 8.461686595186938e-05, "loss": 0.9638, "step": 911 }, { "epoch": 0.28052906797908334, "grad_norm": 1.0658925846143161, "learning_rate": 8.458090060479e-05, "loss": 1.166, "step": 912 }, { "epoch": 0.28083666564134113, "grad_norm": 0.49980621124774616, "learning_rate": 8.454490092658306e-05, "loss": 0.9295, "step": 913 }, { "epoch": 0.2811442633035989, "grad_norm": 0.6765818730598461, "learning_rate": 8.450886695298821e-05, "loss": 0.8526, "step": 914 }, { "epoch": 0.28145186096585667, "grad_norm": 0.43515794383210343, "learning_rate": 8.447279871977916e-05, "loss": 0.8718, "step": 915 }, { "epoch": 0.2817594586281144, "grad_norm": 0.4867144508003138, "learning_rate": 8.443669626276362e-05, "loss": 0.9605, "step": 916 }, { "epoch": 0.2820670562903722, "grad_norm": 0.5864111530614786, "learning_rate": 8.44005596177833e-05, "loss": 1.0552, "step": 917 }, { "epoch": 0.28237465395262995, "grad_norm": 0.5915084624786097, "learning_rate": 8.43643888207138e-05, "loss": 0.9533, "step": 918 }, { "epoch": 0.28268225161488775, "grad_norm": 0.42841163880754113, "learning_rate": 8.43281839074647e-05, "loss": 0.6991, "step": 919 }, { "epoch": 0.2829898492771455, "grad_norm": 0.46010048225528416, "learning_rate": 8.42919449139794e-05, "loss": 0.8418, "step": 920 }, { "epoch": 0.2832974469394033, "grad_norm": 0.5069860845205415, "learning_rate": 8.425567187623512e-05, "loss": 1.0326, "step": 921 }, { "epoch": 0.283605044601661, "grad_norm": 0.7775375801815639, "learning_rate": 8.421936483024294e-05, "loss": 1.0158, "step": 922 }, { "epoch": 0.28391264226391877, "grad_norm": 0.47949018265740223, "learning_rate": 8.418302381204763e-05, "loss": 0.8284, "step": 923 }, { "epoch": 0.28422023992617657, "grad_norm": 0.5220865048317538, "learning_rate": 8.414664885772774e-05, "loss": 0.9375, "step": 924 }, { "epoch": 0.2845278375884343, "grad_norm": 0.7371843177330433, "learning_rate": 8.411024000339549e-05, "loss": 0.8678, "step": 925 }, { "epoch": 0.2848354352506921, "grad_norm": 0.8492683380904725, "learning_rate": 8.407379728519676e-05, "loss": 0.9845, "step": 926 }, { "epoch": 0.28514303291294985, "grad_norm": 0.525052572250942, "learning_rate": 8.403732073931106e-05, "loss": 0.9901, "step": 927 }, { "epoch": 0.28545063057520764, "grad_norm": 1.0897345756361114, "learning_rate": 8.400081040195143e-05, "loss": 1.0249, "step": 928 }, { "epoch": 0.2857582282374654, "grad_norm": 0.5109877788736581, "learning_rate": 8.396426630936453e-05, "loss": 0.7439, "step": 929 }, { "epoch": 0.2860658258997232, "grad_norm": 0.5707720468976233, "learning_rate": 8.392768849783052e-05, "loss": 0.9195, "step": 930 }, { "epoch": 0.2863734235619809, "grad_norm": 0.4790353145164375, "learning_rate": 8.389107700366298e-05, "loss": 1.0609, "step": 931 }, { "epoch": 0.2866810212242387, "grad_norm": 0.5783888782061818, "learning_rate": 8.385443186320897e-05, "loss": 0.8178, "step": 932 }, { "epoch": 0.28698861888649646, "grad_norm": 0.5015229668478303, "learning_rate": 8.381775311284897e-05, "loss": 0.8055, "step": 933 }, { "epoch": 0.2872962165487542, "grad_norm": 3.07516642913105, "learning_rate": 8.378104078899676e-05, "loss": 0.9249, "step": 934 }, { "epoch": 0.287603814211012, "grad_norm": 0.5219029199018033, "learning_rate": 8.374429492809954e-05, "loss": 0.8521, "step": 935 }, { "epoch": 0.28791141187326974, "grad_norm": 0.4882873387029985, "learning_rate": 8.370751556663774e-05, "loss": 0.7612, "step": 936 }, { "epoch": 0.28821900953552754, "grad_norm": 0.5073845128021655, "learning_rate": 8.367070274112507e-05, "loss": 0.9134, "step": 937 }, { "epoch": 0.2885266071977853, "grad_norm": 6.907833192404101, "learning_rate": 8.363385648810847e-05, "loss": 0.8163, "step": 938 }, { "epoch": 0.2888342048600431, "grad_norm": 0.5136643861124326, "learning_rate": 8.359697684416805e-05, "loss": 0.7999, "step": 939 }, { "epoch": 0.2891418025223008, "grad_norm": 0.6667614717650528, "learning_rate": 8.356006384591708e-05, "loss": 0.8221, "step": 940 }, { "epoch": 0.2894494001845586, "grad_norm": 0.525826115167639, "learning_rate": 8.352311753000195e-05, "loss": 0.826, "step": 941 }, { "epoch": 0.28975699784681636, "grad_norm": 0.5934090091622922, "learning_rate": 8.34861379331021e-05, "loss": 0.9686, "step": 942 }, { "epoch": 0.29006459550907415, "grad_norm": 0.7927042682220404, "learning_rate": 8.344912509193004e-05, "loss": 1.0191, "step": 943 }, { "epoch": 0.2903721931713319, "grad_norm": 0.6154968166669776, "learning_rate": 8.341207904323125e-05, "loss": 0.878, "step": 944 }, { "epoch": 0.2906797908335897, "grad_norm": 0.6200455970919668, "learning_rate": 8.337499982378425e-05, "loss": 1.0366, "step": 945 }, { "epoch": 0.29098738849584743, "grad_norm": 0.6523914989646749, "learning_rate": 8.33378874704004e-05, "loss": 0.9663, "step": 946 }, { "epoch": 0.2912949861581052, "grad_norm": 0.5279961445875212, "learning_rate": 8.3300742019924e-05, "loss": 0.9057, "step": 947 }, { "epoch": 0.29160258382036297, "grad_norm": 0.7329628777523726, "learning_rate": 8.326356350923222e-05, "loss": 0.8918, "step": 948 }, { "epoch": 0.2919101814826207, "grad_norm": 0.526706490217, "learning_rate": 8.322635197523502e-05, "loss": 0.7364, "step": 949 }, { "epoch": 0.2922177791448785, "grad_norm": 0.6638368965919629, "learning_rate": 8.318910745487516e-05, "loss": 1.0329, "step": 950 }, { "epoch": 0.29252537680713625, "grad_norm": 0.47018679486025505, "learning_rate": 8.315182998512817e-05, "loss": 0.8746, "step": 951 }, { "epoch": 0.29283297446939405, "grad_norm": 0.4315053475546793, "learning_rate": 8.311451960300224e-05, "loss": 0.8178, "step": 952 }, { "epoch": 0.2931405721316518, "grad_norm": 0.601839859884721, "learning_rate": 8.30771763455383e-05, "loss": 0.834, "step": 953 }, { "epoch": 0.2934481697939096, "grad_norm": 0.6549790354264049, "learning_rate": 8.303980024980985e-05, "loss": 0.9447, "step": 954 }, { "epoch": 0.2937557674561673, "grad_norm": 0.4934008433140182, "learning_rate": 8.300239135292304e-05, "loss": 0.8718, "step": 955 }, { "epoch": 0.2940633651184251, "grad_norm": 0.5003814050101681, "learning_rate": 8.296494969201656e-05, "loss": 0.9587, "step": 956 }, { "epoch": 0.29437096278068287, "grad_norm": 0.760473306598229, "learning_rate": 8.292747530426163e-05, "loss": 0.9176, "step": 957 }, { "epoch": 0.2946785604429406, "grad_norm": 0.5713908870769302, "learning_rate": 8.288996822686199e-05, "loss": 0.9373, "step": 958 }, { "epoch": 0.2949861581051984, "grad_norm": 0.7326927699847804, "learning_rate": 8.285242849705377e-05, "loss": 1.0774, "step": 959 }, { "epoch": 0.29529375576745615, "grad_norm": 0.5882837351652368, "learning_rate": 8.281485615210559e-05, "loss": 0.8012, "step": 960 }, { "epoch": 0.29560135342971394, "grad_norm": 0.6239946907570761, "learning_rate": 8.277725122931839e-05, "loss": 0.883, "step": 961 }, { "epoch": 0.2959089510919717, "grad_norm": 0.6257474177609671, "learning_rate": 8.273961376602552e-05, "loss": 1.0188, "step": 962 }, { "epoch": 0.2962165487542295, "grad_norm": 0.6599969079644783, "learning_rate": 8.270194379959255e-05, "loss": 1.0198, "step": 963 }, { "epoch": 0.2965241464164872, "grad_norm": 0.7500220535312526, "learning_rate": 8.266424136741737e-05, "loss": 0.9721, "step": 964 }, { "epoch": 0.296831744078745, "grad_norm": 0.6251269082766113, "learning_rate": 8.262650650693008e-05, "loss": 0.9391, "step": 965 }, { "epoch": 0.29713934174100276, "grad_norm": 0.5103435083600936, "learning_rate": 8.258873925559304e-05, "loss": 0.9434, "step": 966 }, { "epoch": 0.29744693940326056, "grad_norm": 0.5612260552820756, "learning_rate": 8.255093965090066e-05, "loss": 0.9898, "step": 967 }, { "epoch": 0.2977545370655183, "grad_norm": 1.3454181660396412, "learning_rate": 8.251310773037955e-05, "loss": 1.1633, "step": 968 }, { "epoch": 0.2980621347277761, "grad_norm": 0.6195308456340769, "learning_rate": 8.247524353158837e-05, "loss": 0.817, "step": 969 }, { "epoch": 0.29836973239003384, "grad_norm": 0.4934626415812117, "learning_rate": 8.243734709211781e-05, "loss": 1.168, "step": 970 }, { "epoch": 0.2986773300522916, "grad_norm": 0.43287373021915776, "learning_rate": 8.239941844959063e-05, "loss": 0.8637, "step": 971 }, { "epoch": 0.2989849277145494, "grad_norm": 0.5564956282102851, "learning_rate": 8.236145764166147e-05, "loss": 0.8831, "step": 972 }, { "epoch": 0.2992925253768071, "grad_norm": 0.39819222967770235, "learning_rate": 8.2323464706017e-05, "loss": 0.8443, "step": 973 }, { "epoch": 0.2996001230390649, "grad_norm": 0.5285701610966331, "learning_rate": 8.228543968037571e-05, "loss": 0.9064, "step": 974 }, { "epoch": 0.29990772070132266, "grad_norm": 0.5705532183224227, "learning_rate": 8.224738260248798e-05, "loss": 1.0763, "step": 975 }, { "epoch": 0.30021531836358045, "grad_norm": 0.48925893595023145, "learning_rate": 8.220929351013602e-05, "loss": 0.7219, "step": 976 }, { "epoch": 0.3005229160258382, "grad_norm": 0.5550884578942742, "learning_rate": 8.21711724411338e-05, "loss": 0.9843, "step": 977 }, { "epoch": 0.300830513688096, "grad_norm": 0.6682471451093064, "learning_rate": 8.213301943332703e-05, "loss": 1.1265, "step": 978 }, { "epoch": 0.30113811135035373, "grad_norm": 0.5935558803394049, "learning_rate": 8.209483452459316e-05, "loss": 1.0194, "step": 979 }, { "epoch": 0.30144570901261153, "grad_norm": 0.3947026003397006, "learning_rate": 8.205661775284133e-05, "loss": 0.8704, "step": 980 }, { "epoch": 0.30175330667486927, "grad_norm": 0.596496992840469, "learning_rate": 8.201836915601221e-05, "loss": 0.8382, "step": 981 }, { "epoch": 0.302060904337127, "grad_norm": 0.7094360554868769, "learning_rate": 8.198008877207817e-05, "loss": 0.9694, "step": 982 }, { "epoch": 0.3023685019993848, "grad_norm": 0.5521069875653439, "learning_rate": 8.194177663904308e-05, "loss": 0.9226, "step": 983 }, { "epoch": 0.30267609966164255, "grad_norm": 0.5177844589619114, "learning_rate": 8.190343279494239e-05, "loss": 0.8422, "step": 984 }, { "epoch": 0.30298369732390035, "grad_norm": 0.5529253109796195, "learning_rate": 8.186505727784295e-05, "loss": 0.9208, "step": 985 }, { "epoch": 0.3032912949861581, "grad_norm": 0.5630686627271393, "learning_rate": 8.182665012584312e-05, "loss": 0.9955, "step": 986 }, { "epoch": 0.3035988926484159, "grad_norm": 0.6238916977065345, "learning_rate": 8.178821137707263e-05, "loss": 0.81, "step": 987 }, { "epoch": 0.3039064903106736, "grad_norm": 0.6657934584120304, "learning_rate": 8.17497410696926e-05, "loss": 0.9522, "step": 988 }, { "epoch": 0.3042140879729314, "grad_norm": 4.841341640493257, "learning_rate": 8.171123924189545e-05, "loss": 1.0211, "step": 989 }, { "epoch": 0.30452168563518917, "grad_norm": 0.6176900766639162, "learning_rate": 8.167270593190495e-05, "loss": 0.9636, "step": 990 }, { "epoch": 0.30482928329744696, "grad_norm": 0.9798279820592628, "learning_rate": 8.163414117797606e-05, "loss": 0.9244, "step": 991 }, { "epoch": 0.3051368809597047, "grad_norm": 0.618470725437091, "learning_rate": 8.159554501839502e-05, "loss": 0.6379, "step": 992 }, { "epoch": 0.3054444786219625, "grad_norm": 0.6910268299653808, "learning_rate": 8.155691749147917e-05, "loss": 0.861, "step": 993 }, { "epoch": 0.30575207628422024, "grad_norm": 0.7947972400241364, "learning_rate": 8.151825863557705e-05, "loss": 1.0566, "step": 994 }, { "epoch": 0.306059673946478, "grad_norm": 0.4887451878382148, "learning_rate": 8.147956848906832e-05, "loss": 0.7268, "step": 995 }, { "epoch": 0.3063672716087358, "grad_norm": 0.5370447557053373, "learning_rate": 8.144084709036361e-05, "loss": 0.883, "step": 996 }, { "epoch": 0.3066748692709935, "grad_norm": 0.6051819257573475, "learning_rate": 8.140209447790472e-05, "loss": 1.0159, "step": 997 }, { "epoch": 0.3069824669332513, "grad_norm": 0.5313583547802248, "learning_rate": 8.13633106901643e-05, "loss": 0.7967, "step": 998 }, { "epoch": 0.30729006459550906, "grad_norm": 0.4548009036660708, "learning_rate": 8.132449576564602e-05, "loss": 0.9653, "step": 999 }, { "epoch": 0.30759766225776686, "grad_norm": 0.8133975154247689, "learning_rate": 8.128564974288446e-05, "loss": 0.8704, "step": 1000 }, { "epoch": 0.3079052599200246, "grad_norm": 0.48394045251020235, "learning_rate": 8.124677266044505e-05, "loss": 0.9439, "step": 1001 }, { "epoch": 0.3082128575822824, "grad_norm": 0.6350376808552709, "learning_rate": 8.120786455692408e-05, "loss": 0.9209, "step": 1002 }, { "epoch": 0.30852045524454014, "grad_norm": 0.4429388027800383, "learning_rate": 8.116892547094862e-05, "loss": 1.0267, "step": 1003 }, { "epoch": 0.30882805290679793, "grad_norm": 0.6856974649161915, "learning_rate": 8.112995544117652e-05, "loss": 0.9401, "step": 1004 }, { "epoch": 0.3091356505690557, "grad_norm": 0.5060921073567124, "learning_rate": 8.109095450629629e-05, "loss": 0.9145, "step": 1005 }, { "epoch": 0.3094432482313134, "grad_norm": 0.5496587029816338, "learning_rate": 8.105192270502722e-05, "loss": 0.9543, "step": 1006 }, { "epoch": 0.3097508458935712, "grad_norm": 0.5779444022756925, "learning_rate": 8.101286007611917e-05, "loss": 1.1065, "step": 1007 }, { "epoch": 0.31005844355582896, "grad_norm": 0.7122641152092388, "learning_rate": 8.097376665835259e-05, "loss": 0.9148, "step": 1008 }, { "epoch": 0.31036604121808675, "grad_norm": 0.4126317334367124, "learning_rate": 8.093464249053859e-05, "loss": 0.9122, "step": 1009 }, { "epoch": 0.3106736388803445, "grad_norm": 0.5867390919573324, "learning_rate": 8.089548761151873e-05, "loss": 0.8231, "step": 1010 }, { "epoch": 0.3109812365426023, "grad_norm": 0.5402461948327179, "learning_rate": 8.085630206016505e-05, "loss": 0.843, "step": 1011 }, { "epoch": 0.31128883420486003, "grad_norm": 0.4408345644202687, "learning_rate": 8.08170858753801e-05, "loss": 0.8906, "step": 1012 }, { "epoch": 0.31159643186711783, "grad_norm": 0.4845456247885646, "learning_rate": 8.07778390960968e-05, "loss": 0.9305, "step": 1013 }, { "epoch": 0.31190402952937557, "grad_norm": 0.7957250910008531, "learning_rate": 8.073856176127846e-05, "loss": 0.9711, "step": 1014 }, { "epoch": 0.31221162719163337, "grad_norm": 0.4592004329376912, "learning_rate": 8.069925390991872e-05, "loss": 0.8952, "step": 1015 }, { "epoch": 0.3125192248538911, "grad_norm": 0.5177353183576087, "learning_rate": 8.065991558104151e-05, "loss": 0.9828, "step": 1016 }, { "epoch": 0.31282682251614885, "grad_norm": 0.4748120968825824, "learning_rate": 8.062054681370102e-05, "loss": 0.8188, "step": 1017 }, { "epoch": 0.31313442017840665, "grad_norm": 0.7403948612447648, "learning_rate": 8.058114764698168e-05, "loss": 1.0024, "step": 1018 }, { "epoch": 0.3134420178406644, "grad_norm": 0.6620411952956493, "learning_rate": 8.054171811999809e-05, "loss": 0.9001, "step": 1019 }, { "epoch": 0.3137496155029222, "grad_norm": 0.49363371585150595, "learning_rate": 8.050225827189493e-05, "loss": 1.0232, "step": 1020 }, { "epoch": 0.3140572131651799, "grad_norm": 0.7435475590779845, "learning_rate": 8.046276814184709e-05, "loss": 0.938, "step": 1021 }, { "epoch": 0.3143648108274377, "grad_norm": 0.5075021952252755, "learning_rate": 8.042324776905945e-05, "loss": 0.7834, "step": 1022 }, { "epoch": 0.31467240848969547, "grad_norm": 0.6008309812543194, "learning_rate": 8.038369719276692e-05, "loss": 0.9176, "step": 1023 }, { "epoch": 0.31498000615195326, "grad_norm": 0.7428945979104211, "learning_rate": 8.034411645223443e-05, "loss": 0.8454, "step": 1024 }, { "epoch": 0.315287603814211, "grad_norm": 0.8446063950777624, "learning_rate": 8.030450558675682e-05, "loss": 0.9042, "step": 1025 }, { "epoch": 0.3155952014764688, "grad_norm": 0.7269745892623988, "learning_rate": 8.026486463565885e-05, "loss": 1.238, "step": 1026 }, { "epoch": 0.31590279913872654, "grad_norm": 0.6064049260445311, "learning_rate": 8.022519363829515e-05, "loss": 0.8557, "step": 1027 }, { "epoch": 0.31621039680098434, "grad_norm": 0.5304723100480051, "learning_rate": 8.018549263405019e-05, "loss": 0.9059, "step": 1028 }, { "epoch": 0.3165179944632421, "grad_norm": 0.6011831934042883, "learning_rate": 8.014576166233822e-05, "loss": 0.7518, "step": 1029 }, { "epoch": 0.3168255921254998, "grad_norm": 1.2449617336942382, "learning_rate": 8.010600076260325e-05, "loss": 0.9205, "step": 1030 }, { "epoch": 0.3171331897877576, "grad_norm": 0.5435998156086724, "learning_rate": 8.006620997431893e-05, "loss": 1.0018, "step": 1031 }, { "epoch": 0.31744078745001536, "grad_norm": 0.5775798815167078, "learning_rate": 8.002638933698871e-05, "loss": 0.9543, "step": 1032 }, { "epoch": 0.31774838511227316, "grad_norm": 0.5389351611965967, "learning_rate": 7.998653889014562e-05, "loss": 0.945, "step": 1033 }, { "epoch": 0.3180559827745309, "grad_norm": 0.5366480614114018, "learning_rate": 7.99466586733522e-05, "loss": 0.9191, "step": 1034 }, { "epoch": 0.3183635804367887, "grad_norm": 0.6912798710763582, "learning_rate": 7.99067487262007e-05, "loss": 1.0452, "step": 1035 }, { "epoch": 0.31867117809904644, "grad_norm": 0.5642106291318325, "learning_rate": 7.986680908831274e-05, "loss": 0.8549, "step": 1036 }, { "epoch": 0.31897877576130423, "grad_norm": 0.6411078098553283, "learning_rate": 7.982683979933949e-05, "loss": 1.0178, "step": 1037 }, { "epoch": 0.319286373423562, "grad_norm": 0.6540686084794005, "learning_rate": 7.978684089896159e-05, "loss": 0.9769, "step": 1038 }, { "epoch": 0.3195939710858198, "grad_norm": 0.5145036103693919, "learning_rate": 7.974681242688898e-05, "loss": 1.044, "step": 1039 }, { "epoch": 0.3199015687480775, "grad_norm": 0.5562260859943321, "learning_rate": 7.970675442286104e-05, "loss": 1.0081, "step": 1040 }, { "epoch": 0.32020916641033526, "grad_norm": 0.5945067342561915, "learning_rate": 7.966666692664644e-05, "loss": 0.9358, "step": 1041 }, { "epoch": 0.32051676407259305, "grad_norm": 0.4989846918731151, "learning_rate": 7.962654997804313e-05, "loss": 0.9108, "step": 1042 }, { "epoch": 0.3208243617348508, "grad_norm": 0.8227911813921668, "learning_rate": 7.958640361687828e-05, "loss": 0.9295, "step": 1043 }, { "epoch": 0.3211319593971086, "grad_norm": 0.5026490590142177, "learning_rate": 7.954622788300831e-05, "loss": 0.9373, "step": 1044 }, { "epoch": 0.32143955705936633, "grad_norm": 0.5333460668731521, "learning_rate": 7.950602281631873e-05, "loss": 0.79, "step": 1045 }, { "epoch": 0.32174715472162413, "grad_norm": 0.5069410995319754, "learning_rate": 7.946578845672425e-05, "loss": 0.9587, "step": 1046 }, { "epoch": 0.32205475238388187, "grad_norm": 0.4768202440875092, "learning_rate": 7.942552484416862e-05, "loss": 0.867, "step": 1047 }, { "epoch": 0.32236235004613967, "grad_norm": 0.5674064352698791, "learning_rate": 7.938523201862461e-05, "loss": 0.9804, "step": 1048 }, { "epoch": 0.3226699477083974, "grad_norm": 0.6319295521133781, "learning_rate": 7.934491002009403e-05, "loss": 0.8968, "step": 1049 }, { "epoch": 0.3229775453706552, "grad_norm": 0.57138557167845, "learning_rate": 7.930455888860763e-05, "loss": 0.8938, "step": 1050 }, { "epoch": 0.32328514303291295, "grad_norm": 0.538167506665699, "learning_rate": 7.92641786642251e-05, "loss": 0.7817, "step": 1051 }, { "epoch": 0.32359274069517074, "grad_norm": 0.5753125913040875, "learning_rate": 7.922376938703501e-05, "loss": 0.9814, "step": 1052 }, { "epoch": 0.3239003383574285, "grad_norm": 0.5668165951821539, "learning_rate": 7.918333109715474e-05, "loss": 0.906, "step": 1053 }, { "epoch": 0.3242079360196862, "grad_norm": 0.6064468343888418, "learning_rate": 7.914286383473054e-05, "loss": 0.7732, "step": 1054 }, { "epoch": 0.324515533681944, "grad_norm": 0.6481975763405017, "learning_rate": 7.910236763993735e-05, "loss": 1.0194, "step": 1055 }, { "epoch": 0.32482313134420177, "grad_norm": 0.47762760112747604, "learning_rate": 7.906184255297888e-05, "loss": 0.8836, "step": 1056 }, { "epoch": 0.32513072900645956, "grad_norm": 0.4948748699084173, "learning_rate": 7.902128861408749e-05, "loss": 0.8994, "step": 1057 }, { "epoch": 0.3254383266687173, "grad_norm": 1.1549891456909338, "learning_rate": 7.898070586352423e-05, "loss": 0.9874, "step": 1058 }, { "epoch": 0.3257459243309751, "grad_norm": 0.531764117935052, "learning_rate": 7.894009434157872e-05, "loss": 1.0069, "step": 1059 }, { "epoch": 0.32605352199323284, "grad_norm": 0.5275846621784426, "learning_rate": 7.889945408856913e-05, "loss": 0.8378, "step": 1060 }, { "epoch": 0.32636111965549064, "grad_norm": 0.8577388074553396, "learning_rate": 7.885878514484222e-05, "loss": 1.0313, "step": 1061 }, { "epoch": 0.3266687173177484, "grad_norm": 0.586827990829425, "learning_rate": 7.881808755077314e-05, "loss": 0.9298, "step": 1062 }, { "epoch": 0.3269763149800062, "grad_norm": 1.145795756746969, "learning_rate": 7.877736134676556e-05, "loss": 1.0733, "step": 1063 }, { "epoch": 0.3272839126422639, "grad_norm": 1.1975265911597759, "learning_rate": 7.87366065732515e-05, "loss": 0.97, "step": 1064 }, { "epoch": 0.32759151030452166, "grad_norm": 0.5491994956914119, "learning_rate": 7.869582327069141e-05, "loss": 0.8273, "step": 1065 }, { "epoch": 0.32789910796677946, "grad_norm": 0.797766430727589, "learning_rate": 7.865501147957396e-05, "loss": 1.2629, "step": 1066 }, { "epoch": 0.3282067056290372, "grad_norm": 0.5134419298364848, "learning_rate": 7.861417124041622e-05, "loss": 0.9649, "step": 1067 }, { "epoch": 0.328514303291295, "grad_norm": 0.7778740857503093, "learning_rate": 7.857330259376341e-05, "loss": 0.8757, "step": 1068 }, { "epoch": 0.32882190095355274, "grad_norm": 0.5381678033608657, "learning_rate": 7.8532405580189e-05, "loss": 0.9262, "step": 1069 }, { "epoch": 0.32912949861581053, "grad_norm": 0.6145884326903728, "learning_rate": 7.84914802402946e-05, "loss": 0.8321, "step": 1070 }, { "epoch": 0.3294370962780683, "grad_norm": 0.4386699114323515, "learning_rate": 7.845052661470998e-05, "loss": 0.9633, "step": 1071 }, { "epoch": 0.3297446939403261, "grad_norm": 0.9490176913162528, "learning_rate": 7.840954474409292e-05, "loss": 1.0552, "step": 1072 }, { "epoch": 0.3300522916025838, "grad_norm": 0.616530650211928, "learning_rate": 7.836853466912933e-05, "loss": 0.9153, "step": 1073 }, { "epoch": 0.3303598892648416, "grad_norm": 0.46999908016566766, "learning_rate": 7.832749643053306e-05, "loss": 0.7902, "step": 1074 }, { "epoch": 0.33066748692709935, "grad_norm": 0.6552675907261873, "learning_rate": 7.82864300690459e-05, "loss": 0.973, "step": 1075 }, { "epoch": 0.3309750845893571, "grad_norm": 0.49871840533010076, "learning_rate": 7.824533562543766e-05, "loss": 0.8941, "step": 1076 }, { "epoch": 0.3312826822516149, "grad_norm": 0.6359239328138044, "learning_rate": 7.82042131405059e-05, "loss": 0.8743, "step": 1077 }, { "epoch": 0.33159027991387263, "grad_norm": 0.5457306921286306, "learning_rate": 7.816306265507612e-05, "loss": 0.8918, "step": 1078 }, { "epoch": 0.33189787757613043, "grad_norm": 0.5293907916530365, "learning_rate": 7.812188421000157e-05, "loss": 0.7331, "step": 1079 }, { "epoch": 0.33220547523838817, "grad_norm": 0.5188263092961263, "learning_rate": 7.808067784616328e-05, "loss": 1.0186, "step": 1080 }, { "epoch": 0.33251307290064597, "grad_norm": 0.5313262358057395, "learning_rate": 7.803944360446997e-05, "loss": 0.7752, "step": 1081 }, { "epoch": 0.3328206705629037, "grad_norm": 0.6568148048321762, "learning_rate": 7.799818152585805e-05, "loss": 0.8429, "step": 1082 }, { "epoch": 0.3331282682251615, "grad_norm": 0.575965196099137, "learning_rate": 7.79568916512916e-05, "loss": 0.9947, "step": 1083 }, { "epoch": 0.33343586588741925, "grad_norm": 0.7171469234799642, "learning_rate": 7.791557402176221e-05, "loss": 1.0291, "step": 1084 }, { "epoch": 0.33374346354967704, "grad_norm": 0.6103004339732131, "learning_rate": 7.787422867828913e-05, "loss": 0.8811, "step": 1085 }, { "epoch": 0.3340510612119348, "grad_norm": 0.6193002214101884, "learning_rate": 7.783285566191907e-05, "loss": 0.896, "step": 1086 }, { "epoch": 0.3343586588741926, "grad_norm": 0.5238733370963673, "learning_rate": 7.779145501372616e-05, "loss": 0.933, "step": 1087 }, { "epoch": 0.3346662565364503, "grad_norm": 0.4911554466339205, "learning_rate": 7.775002677481207e-05, "loss": 0.7086, "step": 1088 }, { "epoch": 0.33497385419870807, "grad_norm": 0.45227417187748176, "learning_rate": 7.77085709863058e-05, "loss": 0.8992, "step": 1089 }, { "epoch": 0.33528145186096586, "grad_norm": 0.44629111129711696, "learning_rate": 7.766708768936368e-05, "loss": 0.8897, "step": 1090 }, { "epoch": 0.3355890495232236, "grad_norm": 0.8580808411028079, "learning_rate": 7.762557692516941e-05, "loss": 1.1723, "step": 1091 }, { "epoch": 0.3358966471854814, "grad_norm": 0.5651360418343659, "learning_rate": 7.758403873493393e-05, "loss": 0.9056, "step": 1092 }, { "epoch": 0.33620424484773914, "grad_norm": 1.2769770906019386, "learning_rate": 7.754247315989536e-05, "loss": 0.8976, "step": 1093 }, { "epoch": 0.33651184250999694, "grad_norm": 0.6049410283082794, "learning_rate": 7.750088024131912e-05, "loss": 0.9292, "step": 1094 }, { "epoch": 0.3368194401722547, "grad_norm": 0.500154892274036, "learning_rate": 7.745926002049766e-05, "loss": 0.927, "step": 1095 }, { "epoch": 0.3371270378345125, "grad_norm": 0.5410545297239269, "learning_rate": 7.741761253875059e-05, "loss": 0.8981, "step": 1096 }, { "epoch": 0.3374346354967702, "grad_norm": 0.5601315115159144, "learning_rate": 7.73759378374246e-05, "loss": 0.7485, "step": 1097 }, { "epoch": 0.337742233159028, "grad_norm": 0.5781416015483817, "learning_rate": 7.733423595789336e-05, "loss": 0.9039, "step": 1098 }, { "epoch": 0.33804983082128576, "grad_norm": 0.5275674280264572, "learning_rate": 7.729250694155754e-05, "loss": 0.9818, "step": 1099 }, { "epoch": 0.3383574284835435, "grad_norm": 0.5117443992469914, "learning_rate": 7.725075082984478e-05, "loss": 0.9271, "step": 1100 }, { "epoch": 0.3386650261458013, "grad_norm": 0.5329474852246806, "learning_rate": 7.720896766420958e-05, "loss": 0.9717, "step": 1101 }, { "epoch": 0.33897262380805904, "grad_norm": 0.601535498684396, "learning_rate": 7.716715748613328e-05, "loss": 0.8859, "step": 1102 }, { "epoch": 0.33928022147031683, "grad_norm": 0.5578944259569014, "learning_rate": 7.712532033712411e-05, "loss": 0.7911, "step": 1103 }, { "epoch": 0.3395878191325746, "grad_norm": 0.4133375234473888, "learning_rate": 7.708345625871703e-05, "loss": 0.8025, "step": 1104 }, { "epoch": 0.3398954167948324, "grad_norm": 0.7878599000969511, "learning_rate": 7.704156529247373e-05, "loss": 1.0293, "step": 1105 }, { "epoch": 0.3402030144570901, "grad_norm": 0.4364397524664674, "learning_rate": 7.69996474799826e-05, "loss": 0.8123, "step": 1106 }, { "epoch": 0.3405106121193479, "grad_norm": 0.5273286860818426, "learning_rate": 7.695770286285868e-05, "loss": 0.9891, "step": 1107 }, { "epoch": 0.34081820978160565, "grad_norm": 0.6823040766920849, "learning_rate": 7.691573148274366e-05, "loss": 0.9873, "step": 1108 }, { "epoch": 0.34112580744386345, "grad_norm": 0.527185149187801, "learning_rate": 7.687373338130574e-05, "loss": 0.9385, "step": 1109 }, { "epoch": 0.3414334051061212, "grad_norm": 0.6892543809757362, "learning_rate": 7.683170860023967e-05, "loss": 0.8735, "step": 1110 }, { "epoch": 0.341741002768379, "grad_norm": 0.47081515691526227, "learning_rate": 7.67896571812667e-05, "loss": 0.846, "step": 1111 }, { "epoch": 0.34204860043063673, "grad_norm": 0.5572842896298974, "learning_rate": 7.674757916613453e-05, "loss": 0.869, "step": 1112 }, { "epoch": 0.34235619809289447, "grad_norm": 0.45073823881861125, "learning_rate": 7.670547459661723e-05, "loss": 0.8301, "step": 1113 }, { "epoch": 0.34266379575515227, "grad_norm": 0.5984978673370903, "learning_rate": 7.666334351451528e-05, "loss": 0.8708, "step": 1114 }, { "epoch": 0.34297139341741, "grad_norm": 0.5280583235404968, "learning_rate": 7.662118596165541e-05, "loss": 0.8183, "step": 1115 }, { "epoch": 0.3432789910796678, "grad_norm": 0.7294336946710676, "learning_rate": 7.657900197989072e-05, "loss": 0.987, "step": 1116 }, { "epoch": 0.34358658874192555, "grad_norm": 0.6220921355126866, "learning_rate": 7.65367916111005e-05, "loss": 0.9112, "step": 1117 }, { "epoch": 0.34389418640418334, "grad_norm": 0.5578006746735077, "learning_rate": 7.649455489719021e-05, "loss": 1.0119, "step": 1118 }, { "epoch": 0.3442017840664411, "grad_norm": 0.5589519652670737, "learning_rate": 7.645229188009153e-05, "loss": 0.7663, "step": 1119 }, { "epoch": 0.3445093817286989, "grad_norm": 0.5314275564425678, "learning_rate": 7.641000260176218e-05, "loss": 0.8643, "step": 1120 }, { "epoch": 0.3448169793909566, "grad_norm": 0.5439735178099271, "learning_rate": 7.636768710418602e-05, "loss": 0.8065, "step": 1121 }, { "epoch": 0.3451245770532144, "grad_norm": 0.6582052670498468, "learning_rate": 7.632534542937288e-05, "loss": 0.8756, "step": 1122 }, { "epoch": 0.34543217471547216, "grad_norm": 0.7298986604362443, "learning_rate": 7.628297761935863e-05, "loss": 0.8569, "step": 1123 }, { "epoch": 0.3457397723777299, "grad_norm": 0.7243022368932663, "learning_rate": 7.624058371620505e-05, "loss": 0.8953, "step": 1124 }, { "epoch": 0.3460473700399877, "grad_norm": 0.4809998049806218, "learning_rate": 7.619816376199984e-05, "loss": 0.7315, "step": 1125 }, { "epoch": 0.34635496770224544, "grad_norm": 0.5826192779500807, "learning_rate": 7.615571779885657e-05, "loss": 0.9315, "step": 1126 }, { "epoch": 0.34666256536450324, "grad_norm": 0.4900281435541806, "learning_rate": 7.611324586891462e-05, "loss": 1.0138, "step": 1127 }, { "epoch": 0.346970163026761, "grad_norm": 0.6123423812116731, "learning_rate": 7.607074801433913e-05, "loss": 1.0964, "step": 1128 }, { "epoch": 0.3472777606890188, "grad_norm": 0.608300760851725, "learning_rate": 7.602822427732104e-05, "loss": 1.0272, "step": 1129 }, { "epoch": 0.3475853583512765, "grad_norm": 0.6646342150605287, "learning_rate": 7.598567470007691e-05, "loss": 0.8888, "step": 1130 }, { "epoch": 0.3478929560135343, "grad_norm": 0.9208074443517362, "learning_rate": 7.594309932484898e-05, "loss": 0.9299, "step": 1131 }, { "epoch": 0.34820055367579206, "grad_norm": 0.7811452295576191, "learning_rate": 7.590049819390513e-05, "loss": 1.0383, "step": 1132 }, { "epoch": 0.34850815133804985, "grad_norm": 0.8369541997392304, "learning_rate": 7.58578713495388e-05, "loss": 0.9765, "step": 1133 }, { "epoch": 0.3488157490003076, "grad_norm": 0.48653863261713576, "learning_rate": 7.581521883406893e-05, "loss": 0.883, "step": 1134 }, { "epoch": 0.34912334666256534, "grad_norm": 0.6453802573125554, "learning_rate": 7.577254068983994e-05, "loss": 0.6998, "step": 1135 }, { "epoch": 0.34943094432482313, "grad_norm": 0.48872087225127536, "learning_rate": 7.572983695922176e-05, "loss": 0.848, "step": 1136 }, { "epoch": 0.3497385419870809, "grad_norm": 0.5632252935687869, "learning_rate": 7.568710768460965e-05, "loss": 0.9437, "step": 1137 }, { "epoch": 0.3500461396493387, "grad_norm": 0.612549851988923, "learning_rate": 7.564435290842427e-05, "loss": 0.9889, "step": 1138 }, { "epoch": 0.3503537373115964, "grad_norm": 0.40847367466102447, "learning_rate": 7.560157267311157e-05, "loss": 0.8711, "step": 1139 }, { "epoch": 0.3506613349738542, "grad_norm": 0.4848301803727756, "learning_rate": 7.555876702114279e-05, "loss": 0.8523, "step": 1140 }, { "epoch": 0.35096893263611195, "grad_norm": 0.5283558728308897, "learning_rate": 7.551593599501444e-05, "loss": 0.8478, "step": 1141 }, { "epoch": 0.35127653029836975, "grad_norm": 0.5003654141039103, "learning_rate": 7.547307963724812e-05, "loss": 0.8375, "step": 1142 }, { "epoch": 0.3515841279606275, "grad_norm": 1.3390772350365405, "learning_rate": 7.54301979903907e-05, "loss": 0.9173, "step": 1143 }, { "epoch": 0.3518917256228853, "grad_norm": 0.5167964664581876, "learning_rate": 7.538729109701408e-05, "loss": 0.8806, "step": 1144 }, { "epoch": 0.35219932328514303, "grad_norm": 0.47597656459245, "learning_rate": 7.534435899971521e-05, "loss": 0.844, "step": 1145 }, { "epoch": 0.3525069209474008, "grad_norm": 0.44046963967513664, "learning_rate": 7.530140174111615e-05, "loss": 0.9728, "step": 1146 }, { "epoch": 0.35281451860965857, "grad_norm": 0.5829030987013268, "learning_rate": 7.525841936386386e-05, "loss": 0.8696, "step": 1147 }, { "epoch": 0.3531221162719163, "grad_norm": 0.7104000553757208, "learning_rate": 7.521541191063023e-05, "loss": 1.0108, "step": 1148 }, { "epoch": 0.3534297139341741, "grad_norm": 0.6262323054814611, "learning_rate": 7.517237942411213e-05, "loss": 1.0689, "step": 1149 }, { "epoch": 0.35373731159643185, "grad_norm": 0.6871560784428946, "learning_rate": 7.51293219470312e-05, "loss": 0.9388, "step": 1150 }, { "epoch": 0.35404490925868964, "grad_norm": 0.6325866739332761, "learning_rate": 7.508623952213389e-05, "loss": 1.0401, "step": 1151 }, { "epoch": 0.3543525069209474, "grad_norm": 0.7708870978093311, "learning_rate": 7.504313219219153e-05, "loss": 1.1251, "step": 1152 }, { "epoch": 0.3546601045832052, "grad_norm": 0.5308331431867985, "learning_rate": 7.500000000000001e-05, "loss": 0.8916, "step": 1153 }, { "epoch": 0.3549677022454629, "grad_norm": 0.44592832915093294, "learning_rate": 7.495684298838002e-05, "loss": 0.7954, "step": 1154 }, { "epoch": 0.3552752999077207, "grad_norm": 0.7322200067968629, "learning_rate": 7.491366120017684e-05, "loss": 1.0528, "step": 1155 }, { "epoch": 0.35558289756997846, "grad_norm": 0.6618799588365871, "learning_rate": 7.48704546782604e-05, "loss": 0.8797, "step": 1156 }, { "epoch": 0.35589049523223626, "grad_norm": 0.4638675404729843, "learning_rate": 7.482722346552509e-05, "loss": 0.8896, "step": 1157 }, { "epoch": 0.356198092894494, "grad_norm": 0.39687274501569575, "learning_rate": 7.478396760488992e-05, "loss": 0.7785, "step": 1158 }, { "epoch": 0.35650569055675174, "grad_norm": 0.5929521595852706, "learning_rate": 7.47406871392983e-05, "loss": 0.8272, "step": 1159 }, { "epoch": 0.35681328821900954, "grad_norm": 0.46845669666132556, "learning_rate": 7.469738211171806e-05, "loss": 0.8527, "step": 1160 }, { "epoch": 0.3571208858812673, "grad_norm": 0.5442596680651484, "learning_rate": 7.465405256514148e-05, "loss": 0.7675, "step": 1161 }, { "epoch": 0.3574284835435251, "grad_norm": 0.4812481001122393, "learning_rate": 7.461069854258514e-05, "loss": 0.9461, "step": 1162 }, { "epoch": 0.3577360812057828, "grad_norm": 0.660969529625281, "learning_rate": 7.456732008708988e-05, "loss": 0.9936, "step": 1163 }, { "epoch": 0.3580436788680406, "grad_norm": 0.4353320559940782, "learning_rate": 7.452391724172091e-05, "loss": 1.0075, "step": 1164 }, { "epoch": 0.35835127653029836, "grad_norm": 0.5855379024400297, "learning_rate": 7.448049004956753e-05, "loss": 0.9632, "step": 1165 }, { "epoch": 0.35865887419255615, "grad_norm": 0.7755995240331441, "learning_rate": 7.443703855374327e-05, "loss": 0.9824, "step": 1166 }, { "epoch": 0.3589664718548139, "grad_norm": 0.908520531048147, "learning_rate": 7.439356279738579e-05, "loss": 1.0575, "step": 1167 }, { "epoch": 0.3592740695170717, "grad_norm": 0.7362840968803785, "learning_rate": 7.435006282365684e-05, "loss": 0.8969, "step": 1168 }, { "epoch": 0.35958166717932943, "grad_norm": 0.4265018182178009, "learning_rate": 7.430653867574217e-05, "loss": 0.6918, "step": 1169 }, { "epoch": 0.35988926484158723, "grad_norm": 0.49825763150970004, "learning_rate": 7.426299039685158e-05, "loss": 1.0278, "step": 1170 }, { "epoch": 0.360196862503845, "grad_norm": 0.4626388926850757, "learning_rate": 7.42194180302188e-05, "loss": 0.9078, "step": 1171 }, { "epoch": 0.3605044601661027, "grad_norm": 0.5009261667821707, "learning_rate": 7.417582161910148e-05, "loss": 0.8577, "step": 1172 }, { "epoch": 0.3608120578283605, "grad_norm": 0.5462648804948331, "learning_rate": 7.413220120678115e-05, "loss": 0.8562, "step": 1173 }, { "epoch": 0.36111965549061825, "grad_norm": 0.5497487158739022, "learning_rate": 7.408855683656313e-05, "loss": 0.8451, "step": 1174 }, { "epoch": 0.36142725315287605, "grad_norm": 0.4131032989327381, "learning_rate": 7.404488855177659e-05, "loss": 0.7408, "step": 1175 }, { "epoch": 0.3617348508151338, "grad_norm": 0.49477390628856305, "learning_rate": 7.400119639577438e-05, "loss": 0.7915, "step": 1176 }, { "epoch": 0.3620424484773916, "grad_norm": 0.45477607274968285, "learning_rate": 7.395748041193308e-05, "loss": 0.9768, "step": 1177 }, { "epoch": 0.36235004613964933, "grad_norm": 0.6100130427490339, "learning_rate": 7.391374064365292e-05, "loss": 0.955, "step": 1178 }, { "epoch": 0.3626576438019071, "grad_norm": 0.6017282950021885, "learning_rate": 7.386997713435774e-05, "loss": 0.8378, "step": 1179 }, { "epoch": 0.36296524146416487, "grad_norm": 0.6614134199524063, "learning_rate": 7.382618992749495e-05, "loss": 0.7918, "step": 1180 }, { "epoch": 0.36327283912642266, "grad_norm": 0.4504479914762136, "learning_rate": 7.378237906653549e-05, "loss": 0.8493, "step": 1181 }, { "epoch": 0.3635804367886804, "grad_norm": 0.6137920625270875, "learning_rate": 7.373854459497378e-05, "loss": 0.9375, "step": 1182 }, { "epoch": 0.36388803445093815, "grad_norm": 0.5514308041981959, "learning_rate": 7.369468655632769e-05, "loss": 1.078, "step": 1183 }, { "epoch": 0.36419563211319594, "grad_norm": 0.5020890800827844, "learning_rate": 7.365080499413846e-05, "loss": 0.8307, "step": 1184 }, { "epoch": 0.3645032297754537, "grad_norm": 0.5967913545466178, "learning_rate": 7.360689995197073e-05, "loss": 0.8512, "step": 1185 }, { "epoch": 0.3648108274377115, "grad_norm": 0.6533982074950004, "learning_rate": 7.356297147341239e-05, "loss": 0.8683, "step": 1186 }, { "epoch": 0.3651184250999692, "grad_norm": 0.5956405374492264, "learning_rate": 7.351901960207466e-05, "loss": 0.8583, "step": 1187 }, { "epoch": 0.365426022762227, "grad_norm": 0.4914209140717309, "learning_rate": 7.347504438159193e-05, "loss": 0.7872, "step": 1188 }, { "epoch": 0.36573362042448476, "grad_norm": 0.5388347197264894, "learning_rate": 7.34310458556218e-05, "loss": 0.8565, "step": 1189 }, { "epoch": 0.36604121808674256, "grad_norm": 0.5497344947222953, "learning_rate": 7.338702406784499e-05, "loss": 0.9896, "step": 1190 }, { "epoch": 0.3663488157490003, "grad_norm": 0.48055251629656015, "learning_rate": 7.334297906196536e-05, "loss": 0.7354, "step": 1191 }, { "epoch": 0.3666564134112581, "grad_norm": 0.6278770468718474, "learning_rate": 7.329891088170974e-05, "loss": 0.9363, "step": 1192 }, { "epoch": 0.36696401107351584, "grad_norm": 0.6662462082431682, "learning_rate": 7.325481957082805e-05, "loss": 0.8654, "step": 1193 }, { "epoch": 0.36727160873577364, "grad_norm": 0.5040992012222398, "learning_rate": 7.321070517309311e-05, "loss": 0.9206, "step": 1194 }, { "epoch": 0.3675792063980314, "grad_norm": 0.4826468678965536, "learning_rate": 7.316656773230069e-05, "loss": 0.73, "step": 1195 }, { "epoch": 0.3678868040602891, "grad_norm": 0.6152448265438656, "learning_rate": 7.312240729226945e-05, "loss": 0.9849, "step": 1196 }, { "epoch": 0.3681944017225469, "grad_norm": 0.46604173449266834, "learning_rate": 7.307822389684085e-05, "loss": 0.8793, "step": 1197 }, { "epoch": 0.36850199938480466, "grad_norm": 0.6626293898934555, "learning_rate": 7.303401758987913e-05, "loss": 0.9918, "step": 1198 }, { "epoch": 0.36880959704706245, "grad_norm": 0.5415681874905592, "learning_rate": 7.298978841527136e-05, "loss": 0.8813, "step": 1199 }, { "epoch": 0.3691171947093202, "grad_norm": 0.5838620072988246, "learning_rate": 7.294553641692721e-05, "loss": 0.8801, "step": 1200 }, { "epoch": 0.369424792371578, "grad_norm": 0.45698342620322574, "learning_rate": 7.290126163877907e-05, "loss": 0.9084, "step": 1201 }, { "epoch": 0.36973239003383573, "grad_norm": 0.5020514870668533, "learning_rate": 7.28569641247819e-05, "loss": 0.9085, "step": 1202 }, { "epoch": 0.37003998769609353, "grad_norm": 0.4426577779209258, "learning_rate": 7.28126439189133e-05, "loss": 0.77, "step": 1203 }, { "epoch": 0.3703475853583513, "grad_norm": 0.5675344181329649, "learning_rate": 7.276830106517333e-05, "loss": 0.8639, "step": 1204 }, { "epoch": 0.37065518302060907, "grad_norm": 0.5811940293497447, "learning_rate": 7.272393560758457e-05, "loss": 0.9054, "step": 1205 }, { "epoch": 0.3709627806828668, "grad_norm": 0.5356608666185128, "learning_rate": 7.267954759019202e-05, "loss": 0.9577, "step": 1206 }, { "epoch": 0.37127037834512455, "grad_norm": 0.6240312564841499, "learning_rate": 7.263513705706311e-05, "loss": 0.8109, "step": 1207 }, { "epoch": 0.37157797600738235, "grad_norm": 0.4581767279502469, "learning_rate": 7.25907040522876e-05, "loss": 0.9604, "step": 1208 }, { "epoch": 0.3718855736696401, "grad_norm": 1.3016362209975045, "learning_rate": 7.254624861997754e-05, "loss": 1.0065, "step": 1209 }, { "epoch": 0.3721931713318979, "grad_norm": 0.7805823991999805, "learning_rate": 7.250177080426729e-05, "loss": 1.0594, "step": 1210 }, { "epoch": 0.37250076899415563, "grad_norm": 0.4277066906942341, "learning_rate": 7.24572706493134e-05, "loss": 0.8133, "step": 1211 }, { "epoch": 0.3728083666564134, "grad_norm": 0.5110788253634616, "learning_rate": 7.241274819929459e-05, "loss": 0.9057, "step": 1212 }, { "epoch": 0.37311596431867117, "grad_norm": 0.6201041729044161, "learning_rate": 7.236820349841176e-05, "loss": 0.9438, "step": 1213 }, { "epoch": 0.37342356198092896, "grad_norm": 0.40878397052821647, "learning_rate": 7.232363659088785e-05, "loss": 0.8074, "step": 1214 }, { "epoch": 0.3737311596431867, "grad_norm": 0.48278222639786983, "learning_rate": 7.227904752096788e-05, "loss": 0.8497, "step": 1215 }, { "epoch": 0.3740387573054445, "grad_norm": 0.9141332413590407, "learning_rate": 7.223443633291884e-05, "loss": 1.0188, "step": 1216 }, { "epoch": 0.37434635496770224, "grad_norm": 0.4827583514366954, "learning_rate": 7.218980307102973e-05, "loss": 0.9891, "step": 1217 }, { "epoch": 0.37465395262996, "grad_norm": 0.8006596454467245, "learning_rate": 7.214514777961139e-05, "loss": 1.1851, "step": 1218 }, { "epoch": 0.3749615502922178, "grad_norm": 0.8606317325220072, "learning_rate": 7.210047050299661e-05, "loss": 1.046, "step": 1219 }, { "epoch": 0.3752691479544755, "grad_norm": 0.4838511438518475, "learning_rate": 7.205577128553995e-05, "loss": 0.8525, "step": 1220 }, { "epoch": 0.3755767456167333, "grad_norm": 0.4692195472961675, "learning_rate": 7.201105017161777e-05, "loss": 0.7396, "step": 1221 }, { "epoch": 0.37588434327899106, "grad_norm": 0.5020816485568572, "learning_rate": 7.196630720562819e-05, "loss": 0.8843, "step": 1222 }, { "epoch": 0.37619194094124886, "grad_norm": 0.4273771527634591, "learning_rate": 7.192154243199097e-05, "loss": 0.7403, "step": 1223 }, { "epoch": 0.3764995386035066, "grad_norm": 0.4811566089065926, "learning_rate": 7.187675589514757e-05, "loss": 1.0209, "step": 1224 }, { "epoch": 0.3768071362657644, "grad_norm": 0.5754145682879258, "learning_rate": 7.183194763956106e-05, "loss": 0.8828, "step": 1225 }, { "epoch": 0.37711473392802214, "grad_norm": 0.6604890030890797, "learning_rate": 7.178711770971602e-05, "loss": 0.7962, "step": 1226 }, { "epoch": 0.37742233159027994, "grad_norm": 0.6182945108603256, "learning_rate": 7.174226615011859e-05, "loss": 0.9225, "step": 1227 }, { "epoch": 0.3777299292525377, "grad_norm": 0.6046421151631746, "learning_rate": 7.16973930052964e-05, "loss": 0.8097, "step": 1228 }, { "epoch": 0.3780375269147955, "grad_norm": 0.5024008895406314, "learning_rate": 7.165249831979844e-05, "loss": 0.6893, "step": 1229 }, { "epoch": 0.3783451245770532, "grad_norm": 0.4370705066827021, "learning_rate": 7.160758213819515e-05, "loss": 0.9835, "step": 1230 }, { "epoch": 0.37865272223931096, "grad_norm": 0.5838384145474883, "learning_rate": 7.156264450507828e-05, "loss": 1.0636, "step": 1231 }, { "epoch": 0.37896031990156875, "grad_norm": 0.5138745446669942, "learning_rate": 7.151768546506089e-05, "loss": 0.8059, "step": 1232 }, { "epoch": 0.3792679175638265, "grad_norm": 0.5905995520360242, "learning_rate": 7.14727050627773e-05, "loss": 0.8555, "step": 1233 }, { "epoch": 0.3795755152260843, "grad_norm": 0.5971279506720606, "learning_rate": 7.142770334288299e-05, "loss": 0.9833, "step": 1234 }, { "epoch": 0.37988311288834203, "grad_norm": 0.590248736851581, "learning_rate": 7.138268035005467e-05, "loss": 1.0828, "step": 1235 }, { "epoch": 0.38019071055059983, "grad_norm": 0.6991771471729836, "learning_rate": 7.133763612899013e-05, "loss": 1.1067, "step": 1236 }, { "epoch": 0.38049830821285757, "grad_norm": 0.6582064536426822, "learning_rate": 7.129257072440824e-05, "loss": 0.977, "step": 1237 }, { "epoch": 0.38080590587511537, "grad_norm": 0.49412251906504406, "learning_rate": 7.124748418104891e-05, "loss": 0.8, "step": 1238 }, { "epoch": 0.3811135035373731, "grad_norm": 0.8162497537898651, "learning_rate": 7.1202376543673e-05, "loss": 0.8633, "step": 1239 }, { "epoch": 0.3814211011996309, "grad_norm": 0.49978874745797386, "learning_rate": 7.11572478570624e-05, "loss": 0.8104, "step": 1240 }, { "epoch": 0.38172869886188865, "grad_norm": 0.7062209024252751, "learning_rate": 7.111209816601977e-05, "loss": 0.9767, "step": 1241 }, { "epoch": 0.3820362965241464, "grad_norm": 0.47479230532211525, "learning_rate": 7.106692751536874e-05, "loss": 0.8411, "step": 1242 }, { "epoch": 0.3823438941864042, "grad_norm": 0.42119883001318364, "learning_rate": 7.10217359499537e-05, "loss": 0.8753, "step": 1243 }, { "epoch": 0.38265149184866193, "grad_norm": 0.5160844592387253, "learning_rate": 7.097652351463975e-05, "loss": 0.7499, "step": 1244 }, { "epoch": 0.3829590895109197, "grad_norm": 0.5635806707060679, "learning_rate": 7.093129025431283e-05, "loss": 0.746, "step": 1245 }, { "epoch": 0.38326668717317747, "grad_norm": 0.6885992358101648, "learning_rate": 7.088603621387946e-05, "loss": 0.7602, "step": 1246 }, { "epoch": 0.38357428483543526, "grad_norm": 0.9458683868167952, "learning_rate": 7.084076143826679e-05, "loss": 1.1972, "step": 1247 }, { "epoch": 0.383881882497693, "grad_norm": 0.5210298013689142, "learning_rate": 7.079546597242261e-05, "loss": 0.9303, "step": 1248 }, { "epoch": 0.3841894801599508, "grad_norm": 0.6611254071835762, "learning_rate": 7.075014986131522e-05, "loss": 0.9088, "step": 1249 }, { "epoch": 0.38449707782220854, "grad_norm": 0.5353050608500535, "learning_rate": 7.070481314993342e-05, "loss": 0.9569, "step": 1250 }, { "epoch": 0.38480467548446634, "grad_norm": 0.5063962072514568, "learning_rate": 7.065945588328646e-05, "loss": 0.8352, "step": 1251 }, { "epoch": 0.3851122731467241, "grad_norm": 0.6756152066865874, "learning_rate": 7.0614078106404e-05, "loss": 0.9692, "step": 1252 }, { "epoch": 0.3854198708089819, "grad_norm": 0.5608349442290848, "learning_rate": 7.056867986433604e-05, "loss": 0.8855, "step": 1253 }, { "epoch": 0.3857274684712396, "grad_norm": 0.6685741463566113, "learning_rate": 7.052326120215294e-05, "loss": 0.9994, "step": 1254 }, { "epoch": 0.38603506613349736, "grad_norm": 0.42288844842460066, "learning_rate": 7.04778221649453e-05, "loss": 0.867, "step": 1255 }, { "epoch": 0.38634266379575516, "grad_norm": 0.6531295620281987, "learning_rate": 7.043236279782395e-05, "loss": 1.1283, "step": 1256 }, { "epoch": 0.3866502614580129, "grad_norm": 1.077473493693647, "learning_rate": 7.038688314591993e-05, "loss": 1.0494, "step": 1257 }, { "epoch": 0.3869578591202707, "grad_norm": 0.47339823761522837, "learning_rate": 7.03413832543844e-05, "loss": 0.8753, "step": 1258 }, { "epoch": 0.38726545678252844, "grad_norm": 0.5966227892800784, "learning_rate": 7.029586316838856e-05, "loss": 0.6504, "step": 1259 }, { "epoch": 0.38757305444478624, "grad_norm": 0.9237775849457694, "learning_rate": 7.025032293312379e-05, "loss": 1.074, "step": 1260 }, { "epoch": 0.387880652107044, "grad_norm": 0.5803947099320448, "learning_rate": 7.020476259380133e-05, "loss": 0.9024, "step": 1261 }, { "epoch": 0.3881882497693018, "grad_norm": 0.4923107476823593, "learning_rate": 7.015918219565248e-05, "loss": 0.8553, "step": 1262 }, { "epoch": 0.3884958474315595, "grad_norm": 0.6557101400848105, "learning_rate": 7.01135817839284e-05, "loss": 0.9356, "step": 1263 }, { "epoch": 0.3888034450938173, "grad_norm": 0.6148097959875056, "learning_rate": 7.006796140390015e-05, "loss": 0.9857, "step": 1264 }, { "epoch": 0.38911104275607505, "grad_norm": 0.4727319932389754, "learning_rate": 7.002232110085858e-05, "loss": 0.8346, "step": 1265 }, { "epoch": 0.3894186404183328, "grad_norm": 0.6211468583784272, "learning_rate": 6.997666092011435e-05, "loss": 0.8753, "step": 1266 }, { "epoch": 0.3897262380805906, "grad_norm": 0.5512888504449616, "learning_rate": 6.993098090699787e-05, "loss": 0.9626, "step": 1267 }, { "epoch": 0.39003383574284833, "grad_norm": 0.5420297972663962, "learning_rate": 6.988528110685916e-05, "loss": 0.9202, "step": 1268 }, { "epoch": 0.39034143340510613, "grad_norm": 0.5140206844134522, "learning_rate": 6.983956156506797e-05, "loss": 1.0062, "step": 1269 }, { "epoch": 0.39064903106736387, "grad_norm": 0.5772361813538357, "learning_rate": 6.979382232701363e-05, "loss": 0.8826, "step": 1270 }, { "epoch": 0.39095662872962167, "grad_norm": 0.6293548616637158, "learning_rate": 6.974806343810496e-05, "loss": 1.0013, "step": 1271 }, { "epoch": 0.3912642263918794, "grad_norm": 0.5200375326166088, "learning_rate": 6.970228494377039e-05, "loss": 0.8226, "step": 1272 }, { "epoch": 0.3915718240541372, "grad_norm": 0.46482517251321137, "learning_rate": 6.965648688945775e-05, "loss": 0.9477, "step": 1273 }, { "epoch": 0.39187942171639495, "grad_norm": 0.4441127963980261, "learning_rate": 6.961066932063427e-05, "loss": 0.8791, "step": 1274 }, { "epoch": 0.39218701937865275, "grad_norm": 0.4266851193107529, "learning_rate": 6.956483228278662e-05, "loss": 0.8337, "step": 1275 }, { "epoch": 0.3924946170409105, "grad_norm": 0.6978911935129432, "learning_rate": 6.951897582142075e-05, "loss": 1.1234, "step": 1276 }, { "epoch": 0.39280221470316823, "grad_norm": 0.5127754429829088, "learning_rate": 6.947309998206191e-05, "loss": 0.9279, "step": 1277 }, { "epoch": 0.393109812365426, "grad_norm": 0.6187360770771425, "learning_rate": 6.942720481025457e-05, "loss": 0.9702, "step": 1278 }, { "epoch": 0.39341741002768377, "grad_norm": 0.5643085622986217, "learning_rate": 6.938129035156243e-05, "loss": 0.9359, "step": 1279 }, { "epoch": 0.39372500768994156, "grad_norm": 0.7385164596413748, "learning_rate": 6.93353566515683e-05, "loss": 0.9876, "step": 1280 }, { "epoch": 0.3940326053521993, "grad_norm": 0.7094368743305745, "learning_rate": 6.928940375587413e-05, "loss": 0.988, "step": 1281 }, { "epoch": 0.3943402030144571, "grad_norm": 0.4850598998108261, "learning_rate": 6.924343171010087e-05, "loss": 0.7296, "step": 1282 }, { "epoch": 0.39464780067671484, "grad_norm": 0.42187558435473854, "learning_rate": 6.919744055988853e-05, "loss": 1.0165, "step": 1283 }, { "epoch": 0.39495539833897264, "grad_norm": 0.6382502057724367, "learning_rate": 6.91514303508961e-05, "loss": 0.8286, "step": 1284 }, { "epoch": 0.3952629960012304, "grad_norm": 0.7475759518955744, "learning_rate": 6.910540112880141e-05, "loss": 1.0222, "step": 1285 }, { "epoch": 0.3955705936634882, "grad_norm": 0.42836301279087224, "learning_rate": 6.905935293930126e-05, "loss": 0.8214, "step": 1286 }, { "epoch": 0.3958781913257459, "grad_norm": 0.3998343978982809, "learning_rate": 6.901328582811123e-05, "loss": 0.7888, "step": 1287 }, { "epoch": 0.3961857889880037, "grad_norm": 0.512513916393819, "learning_rate": 6.896719984096568e-05, "loss": 0.72, "step": 1288 }, { "epoch": 0.39649338665026146, "grad_norm": 0.5100236401759506, "learning_rate": 6.892109502361774e-05, "loss": 1.0248, "step": 1289 }, { "epoch": 0.3968009843125192, "grad_norm": 0.6070977945965289, "learning_rate": 6.887497142183923e-05, "loss": 0.9494, "step": 1290 }, { "epoch": 0.397108581974777, "grad_norm": 0.5626559590320837, "learning_rate": 6.882882908142058e-05, "loss": 0.9468, "step": 1291 }, { "epoch": 0.39741617963703474, "grad_norm": 0.5598433858002749, "learning_rate": 6.878266804817088e-05, "loss": 0.8626, "step": 1292 }, { "epoch": 0.39772377729929254, "grad_norm": 0.4791792826614949, "learning_rate": 6.873648836791772e-05, "loss": 0.7764, "step": 1293 }, { "epoch": 0.3980313749615503, "grad_norm": 0.534405672591238, "learning_rate": 6.869029008650723e-05, "loss": 0.8709, "step": 1294 }, { "epoch": 0.3983389726238081, "grad_norm": 0.5025912648233849, "learning_rate": 6.864407324980404e-05, "loss": 0.9159, "step": 1295 }, { "epoch": 0.3986465702860658, "grad_norm": 0.5039689389034882, "learning_rate": 6.859783790369115e-05, "loss": 0.9336, "step": 1296 }, { "epoch": 0.3989541679483236, "grad_norm": 1.2372576817025918, "learning_rate": 6.855158409406995e-05, "loss": 1.3405, "step": 1297 }, { "epoch": 0.39926176561058135, "grad_norm": 0.8056566868079575, "learning_rate": 6.850531186686017e-05, "loss": 0.9718, "step": 1298 }, { "epoch": 0.39956936327283915, "grad_norm": 0.5273934294031762, "learning_rate": 6.845902126799982e-05, "loss": 0.8963, "step": 1299 }, { "epoch": 0.3998769609350969, "grad_norm": 0.8763829464506481, "learning_rate": 6.841271234344514e-05, "loss": 0.9688, "step": 1300 }, { "epoch": 0.40018455859735463, "grad_norm": 0.6894234257623137, "learning_rate": 6.836638513917059e-05, "loss": 0.9576, "step": 1301 }, { "epoch": 0.40049215625961243, "grad_norm": 0.6825983999642455, "learning_rate": 6.832003970116874e-05, "loss": 0.8389, "step": 1302 }, { "epoch": 0.40079975392187017, "grad_norm": 0.5686341359312325, "learning_rate": 6.827367607545029e-05, "loss": 0.9881, "step": 1303 }, { "epoch": 0.40110735158412797, "grad_norm": 0.48925872489982447, "learning_rate": 6.822729430804398e-05, "loss": 0.8584, "step": 1304 }, { "epoch": 0.4014149492463857, "grad_norm": 0.48368062482859076, "learning_rate": 6.818089444499659e-05, "loss": 0.8302, "step": 1305 }, { "epoch": 0.4017225469086435, "grad_norm": 0.6179986615322293, "learning_rate": 6.813447653237282e-05, "loss": 1.2366, "step": 1306 }, { "epoch": 0.40203014457090125, "grad_norm": 0.5621922289536698, "learning_rate": 6.808804061625532e-05, "loss": 0.8397, "step": 1307 }, { "epoch": 0.40233774223315905, "grad_norm": 0.8440256657861405, "learning_rate": 6.804158674274461e-05, "loss": 0.9095, "step": 1308 }, { "epoch": 0.4026453398954168, "grad_norm": 0.48696099410428134, "learning_rate": 6.799511495795903e-05, "loss": 0.8435, "step": 1309 }, { "epoch": 0.4029529375576746, "grad_norm": 0.548692212132881, "learning_rate": 6.794862530803472e-05, "loss": 0.807, "step": 1310 }, { "epoch": 0.4032605352199323, "grad_norm": 0.6393648547332574, "learning_rate": 6.79021178391255e-05, "loss": 0.8692, "step": 1311 }, { "epoch": 0.4035681328821901, "grad_norm": 0.4206519136648397, "learning_rate": 6.785559259740297e-05, "loss": 1.031, "step": 1312 }, { "epoch": 0.40387573054444786, "grad_norm": 0.6473699976015945, "learning_rate": 6.78090496290563e-05, "loss": 0.8938, "step": 1313 }, { "epoch": 0.4041833282067056, "grad_norm": 0.4240217816072546, "learning_rate": 6.77624889802923e-05, "loss": 0.8005, "step": 1314 }, { "epoch": 0.4044909258689634, "grad_norm": 0.5350128660066124, "learning_rate": 6.771591069733529e-05, "loss": 0.8444, "step": 1315 }, { "epoch": 0.40479852353122114, "grad_norm": 0.4185792943584056, "learning_rate": 6.766931482642715e-05, "loss": 0.8495, "step": 1316 }, { "epoch": 0.40510612119347894, "grad_norm": 0.6111281341466029, "learning_rate": 6.762270141382719e-05, "loss": 0.8937, "step": 1317 }, { "epoch": 0.4054137188557367, "grad_norm": 0.5908373524398031, "learning_rate": 6.757607050581214e-05, "loss": 0.9627, "step": 1318 }, { "epoch": 0.4057213165179945, "grad_norm": 0.662334975306719, "learning_rate": 6.75294221486761e-05, "loss": 0.9925, "step": 1319 }, { "epoch": 0.4060289141802522, "grad_norm": 0.49020707114107953, "learning_rate": 6.74827563887305e-05, "loss": 0.7665, "step": 1320 }, { "epoch": 0.40633651184251, "grad_norm": 0.6380254317116706, "learning_rate": 6.743607327230402e-05, "loss": 1.2167, "step": 1321 }, { "epoch": 0.40664410950476776, "grad_norm": 0.48352167035381327, "learning_rate": 6.73893728457426e-05, "loss": 0.8448, "step": 1322 }, { "epoch": 0.40695170716702556, "grad_norm": 0.5362607552086357, "learning_rate": 6.734265515540937e-05, "loss": 1.057, "step": 1323 }, { "epoch": 0.4072593048292833, "grad_norm": 0.7999485469545429, "learning_rate": 6.729592024768456e-05, "loss": 1.0233, "step": 1324 }, { "epoch": 0.40756690249154104, "grad_norm": 0.4985774509563031, "learning_rate": 6.724916816896552e-05, "loss": 0.8988, "step": 1325 }, { "epoch": 0.40787450015379884, "grad_norm": 0.44837185554331566, "learning_rate": 6.720239896566668e-05, "loss": 0.9761, "step": 1326 }, { "epoch": 0.4081820978160566, "grad_norm": 0.9230133983133206, "learning_rate": 6.71556126842194e-05, "loss": 0.9818, "step": 1327 }, { "epoch": 0.4084896954783144, "grad_norm": 0.8208853128285832, "learning_rate": 6.710880937107203e-05, "loss": 1.0562, "step": 1328 }, { "epoch": 0.4087972931405721, "grad_norm": 0.6705913977580391, "learning_rate": 6.706198907268985e-05, "loss": 0.9991, "step": 1329 }, { "epoch": 0.4091048908028299, "grad_norm": 0.6805032655495968, "learning_rate": 6.701515183555497e-05, "loss": 0.9781, "step": 1330 }, { "epoch": 0.40941248846508765, "grad_norm": 0.4226394334341732, "learning_rate": 6.696829770616635e-05, "loss": 0.8446, "step": 1331 }, { "epoch": 0.40972008612734545, "grad_norm": 0.5696509471736633, "learning_rate": 6.692142673103967e-05, "loss": 1.0095, "step": 1332 }, { "epoch": 0.4100276837896032, "grad_norm": 0.48050891695890735, "learning_rate": 6.687453895670737e-05, "loss": 0.8723, "step": 1333 }, { "epoch": 0.410335281451861, "grad_norm": 0.6296058641135656, "learning_rate": 6.682763442971856e-05, "loss": 0.8618, "step": 1334 }, { "epoch": 0.41064287911411873, "grad_norm": 0.5797033739851611, "learning_rate": 6.678071319663899e-05, "loss": 0.9666, "step": 1335 }, { "epoch": 0.41095047677637647, "grad_norm": 0.5696247839878248, "learning_rate": 6.673377530405097e-05, "loss": 1.1444, "step": 1336 }, { "epoch": 0.41125807443863427, "grad_norm": 0.44719128574711614, "learning_rate": 6.66868207985534e-05, "loss": 0.8362, "step": 1337 }, { "epoch": 0.411565672100892, "grad_norm": 0.6707851971254547, "learning_rate": 6.66398497267616e-05, "loss": 1.099, "step": 1338 }, { "epoch": 0.4118732697631498, "grad_norm": 0.7748385912942937, "learning_rate": 6.65928621353074e-05, "loss": 1.1335, "step": 1339 }, { "epoch": 0.41218086742540755, "grad_norm": 0.4436166160717152, "learning_rate": 6.6545858070839e-05, "loss": 0.8699, "step": 1340 }, { "epoch": 0.41248846508766535, "grad_norm": 0.5555524384578576, "learning_rate": 6.649883758002096e-05, "loss": 0.9733, "step": 1341 }, { "epoch": 0.4127960627499231, "grad_norm": 0.6080650021663963, "learning_rate": 6.645180070953416e-05, "loss": 0.994, "step": 1342 }, { "epoch": 0.4131036604121809, "grad_norm": 0.7078682990024756, "learning_rate": 6.640474750607571e-05, "loss": 0.981, "step": 1343 }, { "epoch": 0.4134112580744386, "grad_norm": 0.5694312374642212, "learning_rate": 6.635767801635897e-05, "loss": 0.7168, "step": 1344 }, { "epoch": 0.4137188557366964, "grad_norm": 0.47371506844930783, "learning_rate": 6.631059228711345e-05, "loss": 0.7333, "step": 1345 }, { "epoch": 0.41402645339895416, "grad_norm": 0.4707268248785088, "learning_rate": 6.626349036508479e-05, "loss": 0.9042, "step": 1346 }, { "epoch": 0.41433405106121196, "grad_norm": 0.4673371264253751, "learning_rate": 6.621637229703468e-05, "loss": 0.7727, "step": 1347 }, { "epoch": 0.4146416487234697, "grad_norm": 0.5776666573417367, "learning_rate": 6.61692381297409e-05, "loss": 0.9114, "step": 1348 }, { "epoch": 0.41494924638572744, "grad_norm": 0.5346901612687492, "learning_rate": 6.612208790999713e-05, "loss": 0.7804, "step": 1349 }, { "epoch": 0.41525684404798524, "grad_norm": 0.5294057830311536, "learning_rate": 6.607492168461305e-05, "loss": 0.8577, "step": 1350 }, { "epoch": 0.415564441710243, "grad_norm": 0.4607429633986584, "learning_rate": 6.602773950041422e-05, "loss": 0.885, "step": 1351 }, { "epoch": 0.4158720393725008, "grad_norm": 0.5224998861969731, "learning_rate": 6.598054140424202e-05, "loss": 0.8409, "step": 1352 }, { "epoch": 0.4161796370347585, "grad_norm": 0.540918444194282, "learning_rate": 6.593332744295365e-05, "loss": 0.9596, "step": 1353 }, { "epoch": 0.4164872346970163, "grad_norm": 0.5233158258278823, "learning_rate": 6.588609766342204e-05, "loss": 1.0426, "step": 1354 }, { "epoch": 0.41679483235927406, "grad_norm": 0.5657097791821017, "learning_rate": 6.583885211253585e-05, "loss": 0.9388, "step": 1355 }, { "epoch": 0.41710243002153186, "grad_norm": 0.46034182319410366, "learning_rate": 6.579159083719936e-05, "loss": 0.9546, "step": 1356 }, { "epoch": 0.4174100276837896, "grad_norm": 0.5567307861183348, "learning_rate": 6.57443138843325e-05, "loss": 0.9855, "step": 1357 }, { "epoch": 0.4177176253460474, "grad_norm": 0.5104506292354213, "learning_rate": 6.569702130087075e-05, "loss": 0.7846, "step": 1358 }, { "epoch": 0.41802522300830514, "grad_norm": 0.5127259469360826, "learning_rate": 6.564971313376511e-05, "loss": 0.8914, "step": 1359 }, { "epoch": 0.4183328206705629, "grad_norm": 0.44465262767828156, "learning_rate": 6.560238942998204e-05, "loss": 0.8869, "step": 1360 }, { "epoch": 0.4186404183328207, "grad_norm": 0.6383600606691604, "learning_rate": 6.555505023650341e-05, "loss": 0.9187, "step": 1361 }, { "epoch": 0.4189480159950784, "grad_norm": 0.5153825855775278, "learning_rate": 6.550769560032653e-05, "loss": 0.8, "step": 1362 }, { "epoch": 0.4192556136573362, "grad_norm": 0.4292489565596584, "learning_rate": 6.546032556846398e-05, "loss": 0.8064, "step": 1363 }, { "epoch": 0.41956321131959395, "grad_norm": 0.6595803460482118, "learning_rate": 6.541294018794364e-05, "loss": 1.061, "step": 1364 }, { "epoch": 0.41987080898185175, "grad_norm": 0.40519844263625, "learning_rate": 6.536553950580863e-05, "loss": 0.9056, "step": 1365 }, { "epoch": 0.4201784066441095, "grad_norm": 0.6714965575779784, "learning_rate": 6.531812356911729e-05, "loss": 0.9539, "step": 1366 }, { "epoch": 0.4204860043063673, "grad_norm": 0.6685720834598244, "learning_rate": 6.527069242494303e-05, "loss": 0.7079, "step": 1367 }, { "epoch": 0.42079360196862503, "grad_norm": 0.5074706278332645, "learning_rate": 6.522324612037444e-05, "loss": 0.8864, "step": 1368 }, { "epoch": 0.4211011996308828, "grad_norm": 0.5378891697894393, "learning_rate": 6.517578470251514e-05, "loss": 0.9775, "step": 1369 }, { "epoch": 0.42140879729314057, "grad_norm": 0.4165720756351434, "learning_rate": 6.51283082184837e-05, "loss": 0.7974, "step": 1370 }, { "epoch": 0.42171639495539837, "grad_norm": 0.7016612332481993, "learning_rate": 6.508081671541373e-05, "loss": 0.8013, "step": 1371 }, { "epoch": 0.4220239926176561, "grad_norm": 0.586646197022387, "learning_rate": 6.503331024045367e-05, "loss": 0.8992, "step": 1372 }, { "epoch": 0.42233159027991385, "grad_norm": 0.5917378440548662, "learning_rate": 6.498578884076688e-05, "loss": 0.9312, "step": 1373 }, { "epoch": 0.42263918794217165, "grad_norm": 0.5843035762472522, "learning_rate": 6.493825256353154e-05, "loss": 0.9247, "step": 1374 }, { "epoch": 0.4229467856044294, "grad_norm": 0.7260447070489292, "learning_rate": 6.489070145594054e-05, "loss": 0.9185, "step": 1375 }, { "epoch": 0.4232543832666872, "grad_norm": 0.5172974015823448, "learning_rate": 6.484313556520154e-05, "loss": 0.8089, "step": 1376 }, { "epoch": 0.4235619809289449, "grad_norm": 0.5366780461579361, "learning_rate": 6.47955549385369e-05, "loss": 0.8947, "step": 1377 }, { "epoch": 0.4238695785912027, "grad_norm": 0.4719346468607404, "learning_rate": 6.474795962318356e-05, "loss": 0.8669, "step": 1378 }, { "epoch": 0.42417717625346046, "grad_norm": 0.2907234571593672, "learning_rate": 6.470034966639305e-05, "loss": 0.8761, "step": 1379 }, { "epoch": 0.42448477391571826, "grad_norm": 0.49945623240472153, "learning_rate": 6.465272511543146e-05, "loss": 0.98, "step": 1380 }, { "epoch": 0.424792371577976, "grad_norm": 0.4800507329555267, "learning_rate": 6.460508601757933e-05, "loss": 0.9748, "step": 1381 }, { "epoch": 0.4250999692402338, "grad_norm": 0.5476440165500169, "learning_rate": 6.455743242013172e-05, "loss": 0.784, "step": 1382 }, { "epoch": 0.42540756690249154, "grad_norm": 0.5906278410209208, "learning_rate": 6.450976437039799e-05, "loss": 0.9151, "step": 1383 }, { "epoch": 0.4257151645647493, "grad_norm": 0.5311797248890785, "learning_rate": 6.446208191570189e-05, "loss": 0.9226, "step": 1384 }, { "epoch": 0.4260227622270071, "grad_norm": 0.48271146364029077, "learning_rate": 6.44143851033815e-05, "loss": 0.8034, "step": 1385 }, { "epoch": 0.4263303598892648, "grad_norm": 0.5114367749978898, "learning_rate": 6.436667398078911e-05, "loss": 0.764, "step": 1386 }, { "epoch": 0.4266379575515226, "grad_norm": 0.566680957075587, "learning_rate": 6.43189485952912e-05, "loss": 0.8414, "step": 1387 }, { "epoch": 0.42694555521378036, "grad_norm": 0.6637039850632904, "learning_rate": 6.42712089942685e-05, "loss": 0.8523, "step": 1388 }, { "epoch": 0.42725315287603816, "grad_norm": 0.5948227503365401, "learning_rate": 6.422345522511575e-05, "loss": 0.9779, "step": 1389 }, { "epoch": 0.4275607505382959, "grad_norm": 0.6125131373419926, "learning_rate": 6.417568733524181e-05, "loss": 0.7879, "step": 1390 }, { "epoch": 0.4278683482005537, "grad_norm": 0.5700657340052303, "learning_rate": 6.412790537206957e-05, "loss": 0.8588, "step": 1391 }, { "epoch": 0.42817594586281144, "grad_norm": 0.797748846510084, "learning_rate": 6.408010938303584e-05, "loss": 1.1242, "step": 1392 }, { "epoch": 0.42848354352506923, "grad_norm": 0.44430617661061295, "learning_rate": 6.40322994155914e-05, "loss": 0.8933, "step": 1393 }, { "epoch": 0.428791141187327, "grad_norm": 0.4190669178015237, "learning_rate": 6.398447551720091e-05, "loss": 0.7104, "step": 1394 }, { "epoch": 0.4290987388495847, "grad_norm": 0.4782980082108795, "learning_rate": 6.393663773534281e-05, "loss": 0.7624, "step": 1395 }, { "epoch": 0.4294063365118425, "grad_norm": 0.7898575669002191, "learning_rate": 6.388878611750937e-05, "loss": 0.9926, "step": 1396 }, { "epoch": 0.42971393417410025, "grad_norm": 0.5394720542959498, "learning_rate": 6.38409207112066e-05, "loss": 0.9373, "step": 1397 }, { "epoch": 0.43002153183635805, "grad_norm": 0.5492559029865349, "learning_rate": 6.379304156395416e-05, "loss": 0.8659, "step": 1398 }, { "epoch": 0.4303291294986158, "grad_norm": 0.8054056135827193, "learning_rate": 6.374514872328536e-05, "loss": 1.055, "step": 1399 }, { "epoch": 0.4306367271608736, "grad_norm": 0.42442808453290803, "learning_rate": 6.369724223674715e-05, "loss": 0.8544, "step": 1400 }, { "epoch": 0.43094432482313133, "grad_norm": 0.5294171398646339, "learning_rate": 6.364932215189998e-05, "loss": 0.8861, "step": 1401 }, { "epoch": 0.4312519224853891, "grad_norm": 0.4679458682489583, "learning_rate": 6.360138851631782e-05, "loss": 0.9692, "step": 1402 }, { "epoch": 0.43155952014764687, "grad_norm": 0.5362036198268724, "learning_rate": 6.355344137758808e-05, "loss": 0.8757, "step": 1403 }, { "epoch": 0.43186711780990467, "grad_norm": 0.6596093410673867, "learning_rate": 6.350548078331158e-05, "loss": 1.0307, "step": 1404 }, { "epoch": 0.4321747154721624, "grad_norm": 0.5207057993892851, "learning_rate": 6.34575067811025e-05, "loss": 0.956, "step": 1405 }, { "epoch": 0.4324823131344202, "grad_norm": 0.5822320265809926, "learning_rate": 6.340951941858836e-05, "loss": 0.9256, "step": 1406 }, { "epoch": 0.43278991079667795, "grad_norm": 0.702385707929363, "learning_rate": 6.336151874340989e-05, "loss": 0.9571, "step": 1407 }, { "epoch": 0.4330975084589357, "grad_norm": 0.6229718025740899, "learning_rate": 6.331350480322106e-05, "loss": 0.9229, "step": 1408 }, { "epoch": 0.4334051061211935, "grad_norm": 0.579186489765101, "learning_rate": 6.326547764568902e-05, "loss": 0.855, "step": 1409 }, { "epoch": 0.4337127037834512, "grad_norm": 0.5022572592890608, "learning_rate": 6.321743731849405e-05, "loss": 0.7979, "step": 1410 }, { "epoch": 0.434020301445709, "grad_norm": 0.6270133605034877, "learning_rate": 6.316938386932946e-05, "loss": 0.8597, "step": 1411 }, { "epoch": 0.43432789910796676, "grad_norm": 0.4851765628765513, "learning_rate": 6.312131734590164e-05, "loss": 0.9985, "step": 1412 }, { "epoch": 0.43463549677022456, "grad_norm": 0.5007587250994711, "learning_rate": 6.307323779592993e-05, "loss": 0.8158, "step": 1413 }, { "epoch": 0.4349430944324823, "grad_norm": 0.5265855642333197, "learning_rate": 6.302514526714659e-05, "loss": 1.0817, "step": 1414 }, { "epoch": 0.4352506920947401, "grad_norm": 0.5440066977850299, "learning_rate": 6.29770398072968e-05, "loss": 0.9603, "step": 1415 }, { "epoch": 0.43555828975699784, "grad_norm": 0.9784170714942898, "learning_rate": 6.292892146413856e-05, "loss": 0.9335, "step": 1416 }, { "epoch": 0.43586588741925564, "grad_norm": 0.6170351047419911, "learning_rate": 6.288079028544266e-05, "loss": 0.9518, "step": 1417 }, { "epoch": 0.4361734850815134, "grad_norm": 0.6036415624039827, "learning_rate": 6.283264631899264e-05, "loss": 0.9562, "step": 1418 }, { "epoch": 0.4364810827437711, "grad_norm": 0.5241744404195637, "learning_rate": 6.27844896125847e-05, "loss": 0.8729, "step": 1419 }, { "epoch": 0.4367886804060289, "grad_norm": 0.933351456197878, "learning_rate": 6.273632021402776e-05, "loss": 1.0748, "step": 1420 }, { "epoch": 0.43709627806828666, "grad_norm": 0.597297473818783, "learning_rate": 6.268813817114326e-05, "loss": 0.9953, "step": 1421 }, { "epoch": 0.43740387573054446, "grad_norm": 0.5170116999201573, "learning_rate": 6.263994353176526e-05, "loss": 1.0056, "step": 1422 }, { "epoch": 0.4377114733928022, "grad_norm": 0.5728694935303933, "learning_rate": 6.259173634374027e-05, "loss": 0.8975, "step": 1423 }, { "epoch": 0.43801907105506, "grad_norm": 0.5216571773878539, "learning_rate": 6.254351665492731e-05, "loss": 1.1208, "step": 1424 }, { "epoch": 0.43832666871731774, "grad_norm": 0.9248480623545452, "learning_rate": 6.249528451319777e-05, "loss": 1.145, "step": 1425 }, { "epoch": 0.43863426637957553, "grad_norm": 2.2366018380166843, "learning_rate": 6.244703996643541e-05, "loss": 0.9138, "step": 1426 }, { "epoch": 0.4389418640418333, "grad_norm": 0.6228519116327076, "learning_rate": 6.239878306253634e-05, "loss": 0.919, "step": 1427 }, { "epoch": 0.43924946170409107, "grad_norm": 0.5286107868099741, "learning_rate": 6.235051384940889e-05, "loss": 0.86, "step": 1428 }, { "epoch": 0.4395570593663488, "grad_norm": 0.565101012436761, "learning_rate": 6.230223237497363e-05, "loss": 0.8639, "step": 1429 }, { "epoch": 0.4398646570286066, "grad_norm": 0.7052377803551528, "learning_rate": 6.225393868716332e-05, "loss": 1.0019, "step": 1430 }, { "epoch": 0.44017225469086435, "grad_norm": 0.6353985355133711, "learning_rate": 6.22056328339228e-05, "loss": 1.1204, "step": 1431 }, { "epoch": 0.4404798523531221, "grad_norm": 0.5247784572019453, "learning_rate": 6.215731486320905e-05, "loss": 0.961, "step": 1432 }, { "epoch": 0.4407874500153799, "grad_norm": 0.724276070276883, "learning_rate": 6.210898482299103e-05, "loss": 0.8193, "step": 1433 }, { "epoch": 0.44109504767763763, "grad_norm": 0.5170017765013486, "learning_rate": 6.20606427612497e-05, "loss": 0.9452, "step": 1434 }, { "epoch": 0.4414026453398954, "grad_norm": 0.6629366913089397, "learning_rate": 6.201228872597796e-05, "loss": 0.9796, "step": 1435 }, { "epoch": 0.44171024300215317, "grad_norm": 0.7497505827697087, "learning_rate": 6.196392276518058e-05, "loss": 0.7954, "step": 1436 }, { "epoch": 0.44201784066441097, "grad_norm": 0.7110617713439289, "learning_rate": 6.191554492687418e-05, "loss": 1.0636, "step": 1437 }, { "epoch": 0.4423254383266687, "grad_norm": 0.6058461878931081, "learning_rate": 6.18671552590872e-05, "loss": 0.9555, "step": 1438 }, { "epoch": 0.4426330359889265, "grad_norm": 0.549940367153561, "learning_rate": 6.181875380985976e-05, "loss": 0.9329, "step": 1439 }, { "epoch": 0.44294063365118425, "grad_norm": 0.5034698751728448, "learning_rate": 6.177034062724372e-05, "loss": 0.9221, "step": 1440 }, { "epoch": 0.44324823131344204, "grad_norm": 0.5831285605551415, "learning_rate": 6.172191575930259e-05, "loss": 1.022, "step": 1441 }, { "epoch": 0.4435558289756998, "grad_norm": 0.5373357779801663, "learning_rate": 6.167347925411147e-05, "loss": 0.8989, "step": 1442 }, { "epoch": 0.4438634266379575, "grad_norm": 0.5616106485875361, "learning_rate": 6.1625031159757e-05, "loss": 0.9521, "step": 1443 }, { "epoch": 0.4441710243002153, "grad_norm": 0.44379644809127466, "learning_rate": 6.157657152433737e-05, "loss": 0.9267, "step": 1444 }, { "epoch": 0.44447862196247306, "grad_norm": 0.5943775258739513, "learning_rate": 6.15281003959622e-05, "loss": 1.0565, "step": 1445 }, { "epoch": 0.44478621962473086, "grad_norm": 0.4798210110376646, "learning_rate": 6.147961782275248e-05, "loss": 0.9197, "step": 1446 }, { "epoch": 0.4450938172869886, "grad_norm": 0.47741893006940983, "learning_rate": 6.143112385284062e-05, "loss": 0.8369, "step": 1447 }, { "epoch": 0.4454014149492464, "grad_norm": 0.856923332490531, "learning_rate": 6.138261853437035e-05, "loss": 1.0471, "step": 1448 }, { "epoch": 0.44570901261150414, "grad_norm": 0.47613945254130063, "learning_rate": 6.133410191549658e-05, "loss": 0.7829, "step": 1449 }, { "epoch": 0.44601661027376194, "grad_norm": 0.6179181357179836, "learning_rate": 6.128557404438555e-05, "loss": 1.0703, "step": 1450 }, { "epoch": 0.4463242079360197, "grad_norm": 0.7329017564583167, "learning_rate": 6.123703496921461e-05, "loss": 1.1545, "step": 1451 }, { "epoch": 0.4466318055982775, "grad_norm": 0.6569411009296032, "learning_rate": 6.118848473817226e-05, "loss": 0.8955, "step": 1452 }, { "epoch": 0.4469394032605352, "grad_norm": 0.47095874257250997, "learning_rate": 6.113992339945802e-05, "loss": 0.7224, "step": 1453 }, { "epoch": 0.447247000922793, "grad_norm": 0.7835895542294292, "learning_rate": 6.10913510012825e-05, "loss": 1.1186, "step": 1454 }, { "epoch": 0.44755459858505076, "grad_norm": 0.4499150014170637, "learning_rate": 6.104276759186728e-05, "loss": 0.7856, "step": 1455 }, { "epoch": 0.4478621962473085, "grad_norm": 0.533788326606268, "learning_rate": 6.0994173219444836e-05, "loss": 0.8269, "step": 1456 }, { "epoch": 0.4481697939095663, "grad_norm": 0.4928249040331042, "learning_rate": 6.0945567932258554e-05, "loss": 1.0312, "step": 1457 }, { "epoch": 0.44847739157182404, "grad_norm": 0.6088414074594205, "learning_rate": 6.089695177856266e-05, "loss": 0.9842, "step": 1458 }, { "epoch": 0.44878498923408183, "grad_norm": 0.5885808648080317, "learning_rate": 6.084832480662216e-05, "loss": 0.9966, "step": 1459 }, { "epoch": 0.4490925868963396, "grad_norm": 0.4980227167404087, "learning_rate": 6.0799687064712786e-05, "loss": 0.8872, "step": 1460 }, { "epoch": 0.44940018455859737, "grad_norm": 0.4718892289237269, "learning_rate": 6.075103860112099e-05, "loss": 0.7107, "step": 1461 }, { "epoch": 0.4497077822208551, "grad_norm": 0.8979476956651845, "learning_rate": 6.070237946414385e-05, "loss": 0.9478, "step": 1462 }, { "epoch": 0.4500153798831129, "grad_norm": 0.7028496085213827, "learning_rate": 6.0653709702089045e-05, "loss": 0.9042, "step": 1463 }, { "epoch": 0.45032297754537065, "grad_norm": 0.5835527655182339, "learning_rate": 6.060502936327481e-05, "loss": 0.8367, "step": 1464 }, { "epoch": 0.45063057520762845, "grad_norm": 0.41201435893641214, "learning_rate": 6.0556338496029865e-05, "loss": 0.8065, "step": 1465 }, { "epoch": 0.4509381728698862, "grad_norm": 0.46672432931768626, "learning_rate": 6.0507637148693365e-05, "loss": 0.8731, "step": 1466 }, { "epoch": 0.45124577053214393, "grad_norm": 0.46189778862321323, "learning_rate": 6.0458925369614936e-05, "loss": 0.8682, "step": 1467 }, { "epoch": 0.4515533681944017, "grad_norm": 0.46092785201170344, "learning_rate": 6.041020320715451e-05, "loss": 0.8145, "step": 1468 }, { "epoch": 0.45186096585665947, "grad_norm": 0.4636799641154623, "learning_rate": 6.036147070968231e-05, "loss": 0.9514, "step": 1469 }, { "epoch": 0.45216856351891727, "grad_norm": 0.7089651708926319, "learning_rate": 6.0312727925578885e-05, "loss": 0.8561, "step": 1470 }, { "epoch": 0.452476161181175, "grad_norm": 1.055532934930644, "learning_rate": 6.026397490323492e-05, "loss": 0.9804, "step": 1471 }, { "epoch": 0.4527837588434328, "grad_norm": 0.7427378740566813, "learning_rate": 6.021521169105131e-05, "loss": 0.9728, "step": 1472 }, { "epoch": 0.45309135650569055, "grad_norm": 0.5200560683331935, "learning_rate": 6.016643833743908e-05, "loss": 1.0022, "step": 1473 }, { "epoch": 0.45339895416794834, "grad_norm": 0.5107634601219124, "learning_rate": 6.0117654890819275e-05, "loss": 0.835, "step": 1474 }, { "epoch": 0.4537065518302061, "grad_norm": 0.46959902047275576, "learning_rate": 6.0068861399622986e-05, "loss": 0.8453, "step": 1475 }, { "epoch": 0.4540141494924639, "grad_norm": 0.5518796462589343, "learning_rate": 6.002005791229131e-05, "loss": 1.0252, "step": 1476 }, { "epoch": 0.4543217471547216, "grad_norm": 0.5474624916249057, "learning_rate": 5.997124447727518e-05, "loss": 0.8666, "step": 1477 }, { "epoch": 0.45462934481697936, "grad_norm": 0.4808978668031208, "learning_rate": 5.992242114303549e-05, "loss": 0.8415, "step": 1478 }, { "epoch": 0.45493694247923716, "grad_norm": 0.49754350622632, "learning_rate": 5.9873587958042944e-05, "loss": 1.0267, "step": 1479 }, { "epoch": 0.4552445401414949, "grad_norm": 0.44779431629623867, "learning_rate": 5.982474497077798e-05, "loss": 0.7955, "step": 1480 }, { "epoch": 0.4555521378037527, "grad_norm": 0.5134292515904719, "learning_rate": 5.97758922297308e-05, "loss": 0.908, "step": 1481 }, { "epoch": 0.45585973546601044, "grad_norm": 0.4110962732741282, "learning_rate": 5.9727029783401324e-05, "loss": 0.9551, "step": 1482 }, { "epoch": 0.45616733312826824, "grad_norm": 1.162817382379284, "learning_rate": 5.967815768029904e-05, "loss": 1.0039, "step": 1483 }, { "epoch": 0.456474930790526, "grad_norm": 0.4424922937767716, "learning_rate": 5.962927596894305e-05, "loss": 0.7827, "step": 1484 }, { "epoch": 0.4567825284527838, "grad_norm": 0.9503930045536024, "learning_rate": 5.9580384697862035e-05, "loss": 0.9265, "step": 1485 }, { "epoch": 0.4570901261150415, "grad_norm": 0.6486128275053689, "learning_rate": 5.953148391559409e-05, "loss": 0.9619, "step": 1486 }, { "epoch": 0.4573977237772993, "grad_norm": 0.6075064269143099, "learning_rate": 5.9482573670686825e-05, "loss": 1.0708, "step": 1487 }, { "epoch": 0.45770532143955706, "grad_norm": 0.4707719064049468, "learning_rate": 5.943365401169721e-05, "loss": 0.8637, "step": 1488 }, { "epoch": 0.45801291910181485, "grad_norm": 0.47581444550708313, "learning_rate": 5.938472498719153e-05, "loss": 0.8123, "step": 1489 }, { "epoch": 0.4583205167640726, "grad_norm": 0.5080671820443655, "learning_rate": 5.933578664574545e-05, "loss": 0.921, "step": 1490 }, { "epoch": 0.45862811442633034, "grad_norm": 0.5105430155505789, "learning_rate": 5.9286839035943805e-05, "loss": 0.9248, "step": 1491 }, { "epoch": 0.45893571208858813, "grad_norm": 0.5147777234818454, "learning_rate": 5.9237882206380657e-05, "loss": 0.9296, "step": 1492 }, { "epoch": 0.4592433097508459, "grad_norm": 0.6071377865236739, "learning_rate": 5.918891620565926e-05, "loss": 0.9717, "step": 1493 }, { "epoch": 0.45955090741310367, "grad_norm": 0.6068711969320003, "learning_rate": 5.913994108239192e-05, "loss": 0.9, "step": 1494 }, { "epoch": 0.4598585050753614, "grad_norm": 0.6576515267563453, "learning_rate": 5.909095688520002e-05, "loss": 1.0801, "step": 1495 }, { "epoch": 0.4601661027376192, "grad_norm": 0.5862280995374919, "learning_rate": 5.904196366271395e-05, "loss": 1.1138, "step": 1496 }, { "epoch": 0.46047370039987695, "grad_norm": 0.48882415503454624, "learning_rate": 5.899296146357307e-05, "loss": 0.9429, "step": 1497 }, { "epoch": 0.46078129806213475, "grad_norm": 0.40570835305747543, "learning_rate": 5.8943950336425623e-05, "loss": 0.8955, "step": 1498 }, { "epoch": 0.4610888957243925, "grad_norm": 1.1555946861213167, "learning_rate": 5.8894930329928765e-05, "loss": 0.7953, "step": 1499 }, { "epoch": 0.4613964933866503, "grad_norm": 0.5743524566921846, "learning_rate": 5.884590149274842e-05, "loss": 0.9931, "step": 1500 }, { "epoch": 0.461704091048908, "grad_norm": 0.7546119031366447, "learning_rate": 5.879686387355929e-05, "loss": 0.9632, "step": 1501 }, { "epoch": 0.46201168871116577, "grad_norm": 0.5398555052655971, "learning_rate": 5.874781752104482e-05, "loss": 0.8898, "step": 1502 }, { "epoch": 0.46231928637342357, "grad_norm": 0.5194200460660925, "learning_rate": 5.8698762483897106e-05, "loss": 0.7577, "step": 1503 }, { "epoch": 0.4626268840356813, "grad_norm": 0.5557885361572064, "learning_rate": 5.8649698810816844e-05, "loss": 0.8993, "step": 1504 }, { "epoch": 0.4629344816979391, "grad_norm": 0.5525483684173152, "learning_rate": 5.860062655051336e-05, "loss": 0.941, "step": 1505 }, { "epoch": 0.46324207936019685, "grad_norm": 0.6422534949403745, "learning_rate": 5.855154575170445e-05, "loss": 0.84, "step": 1506 }, { "epoch": 0.46354967702245464, "grad_norm": 0.8132129781664795, "learning_rate": 5.8502456463116406e-05, "loss": 0.876, "step": 1507 }, { "epoch": 0.4638572746847124, "grad_norm": 0.7158962639419579, "learning_rate": 5.845335873348396e-05, "loss": 0.9571, "step": 1508 }, { "epoch": 0.4641648723469702, "grad_norm": 0.58130964156582, "learning_rate": 5.840425261155023e-05, "loss": 0.9138, "step": 1509 }, { "epoch": 0.4644724700092279, "grad_norm": 0.517089231456419, "learning_rate": 5.835513814606662e-05, "loss": 0.8885, "step": 1510 }, { "epoch": 0.4647800676714857, "grad_norm": 0.7601575062213196, "learning_rate": 5.830601538579288e-05, "loss": 1.0695, "step": 1511 }, { "epoch": 0.46508766533374346, "grad_norm": 0.45852672051297094, "learning_rate": 5.825688437949695e-05, "loss": 0.8106, "step": 1512 }, { "epoch": 0.46539526299600126, "grad_norm": 0.6879129144187268, "learning_rate": 5.820774517595495e-05, "loss": 0.9483, "step": 1513 }, { "epoch": 0.465702860658259, "grad_norm": 0.5067922331427596, "learning_rate": 5.815859782395119e-05, "loss": 0.9291, "step": 1514 }, { "epoch": 0.46601045832051674, "grad_norm": 0.5804953419165009, "learning_rate": 5.8109442372278025e-05, "loss": 1.0547, "step": 1515 }, { "epoch": 0.46631805598277454, "grad_norm": 0.4554864943022319, "learning_rate": 5.806027886973585e-05, "loss": 0.8235, "step": 1516 }, { "epoch": 0.4666256536450323, "grad_norm": 0.4977020642071796, "learning_rate": 5.80111073651331e-05, "loss": 0.9876, "step": 1517 }, { "epoch": 0.4669332513072901, "grad_norm": 0.700444139055646, "learning_rate": 5.7961927907286086e-05, "loss": 0.9106, "step": 1518 }, { "epoch": 0.4672408489695478, "grad_norm": 0.5371592632216663, "learning_rate": 5.791274054501906e-05, "loss": 0.8103, "step": 1519 }, { "epoch": 0.4675484466318056, "grad_norm": 0.6356819745324375, "learning_rate": 5.786354532716413e-05, "loss": 0.8955, "step": 1520 }, { "epoch": 0.46785604429406336, "grad_norm": 0.5019373485183297, "learning_rate": 5.781434230256114e-05, "loss": 1.0996, "step": 1521 }, { "epoch": 0.46816364195632115, "grad_norm": 0.5779804335649109, "learning_rate": 5.776513152005778e-05, "loss": 0.834, "step": 1522 }, { "epoch": 0.4684712396185789, "grad_norm": 0.5973187335918348, "learning_rate": 5.771591302850937e-05, "loss": 0.9725, "step": 1523 }, { "epoch": 0.4687788372808367, "grad_norm": 0.5579262148955842, "learning_rate": 5.766668687677888e-05, "loss": 0.8276, "step": 1524 }, { "epoch": 0.46908643494309443, "grad_norm": 0.5939632143384825, "learning_rate": 5.761745311373694e-05, "loss": 0.9506, "step": 1525 }, { "epoch": 0.4693940326053522, "grad_norm": 0.5363939627704389, "learning_rate": 5.756821178826168e-05, "loss": 0.9116, "step": 1526 }, { "epoch": 0.46970163026760997, "grad_norm": 0.5321042489220742, "learning_rate": 5.751896294923879e-05, "loss": 0.8225, "step": 1527 }, { "epoch": 0.4700092279298677, "grad_norm": 0.4721794054374566, "learning_rate": 5.746970664556136e-05, "loss": 0.9135, "step": 1528 }, { "epoch": 0.4703168255921255, "grad_norm": 1.2339598462024788, "learning_rate": 5.7420442926129946e-05, "loss": 1.0349, "step": 1529 }, { "epoch": 0.47062442325438325, "grad_norm": 0.5142842022924186, "learning_rate": 5.737117183985242e-05, "loss": 0.7897, "step": 1530 }, { "epoch": 0.47093202091664105, "grad_norm": 0.5400610622712679, "learning_rate": 5.7321893435644025e-05, "loss": 0.9097, "step": 1531 }, { "epoch": 0.4712396185788988, "grad_norm": 0.4885992924739944, "learning_rate": 5.72726077624272e-05, "loss": 0.7793, "step": 1532 }, { "epoch": 0.4715472162411566, "grad_norm": 0.5449153134583872, "learning_rate": 5.722331486913165e-05, "loss": 0.8556, "step": 1533 }, { "epoch": 0.4718548139034143, "grad_norm": 1.0153350303582354, "learning_rate": 5.717401480469423e-05, "loss": 1.0342, "step": 1534 }, { "epoch": 0.4721624115656721, "grad_norm": 1.8590017527303229, "learning_rate": 5.7124707618058926e-05, "loss": 0.9212, "step": 1535 }, { "epoch": 0.47247000922792987, "grad_norm": 0.5333970676049282, "learning_rate": 5.707539335817675e-05, "loss": 0.8034, "step": 1536 }, { "epoch": 0.4727776068901876, "grad_norm": 0.5113626612504036, "learning_rate": 5.7026072074005834e-05, "loss": 0.9695, "step": 1537 }, { "epoch": 0.4730852045524454, "grad_norm": 0.443343015240705, "learning_rate": 5.697674381451117e-05, "loss": 0.9728, "step": 1538 }, { "epoch": 0.47339280221470315, "grad_norm": 0.38609051821388624, "learning_rate": 5.6927408628664725e-05, "loss": 0.6677, "step": 1539 }, { "epoch": 0.47370039987696094, "grad_norm": 0.4261874051914617, "learning_rate": 5.687806656544539e-05, "loss": 0.9078, "step": 1540 }, { "epoch": 0.4740079975392187, "grad_norm": 0.5519480062945563, "learning_rate": 5.682871767383879e-05, "loss": 0.8578, "step": 1541 }, { "epoch": 0.4743155952014765, "grad_norm": 0.6783180010631218, "learning_rate": 5.67793620028374e-05, "loss": 1.041, "step": 1542 }, { "epoch": 0.4746231928637342, "grad_norm": 0.5508111325492053, "learning_rate": 5.67299996014404e-05, "loss": 0.9389, "step": 1543 }, { "epoch": 0.474930790525992, "grad_norm": 0.6431809523749784, "learning_rate": 5.6680630518653645e-05, "loss": 0.8754, "step": 1544 }, { "epoch": 0.47523838818824976, "grad_norm": 0.5486978425280408, "learning_rate": 5.663125480348963e-05, "loss": 0.969, "step": 1545 }, { "epoch": 0.47554598585050756, "grad_norm": 0.4600458105098266, "learning_rate": 5.658187250496746e-05, "loss": 0.8048, "step": 1546 }, { "epoch": 0.4758535835127653, "grad_norm": 0.48080330549811073, "learning_rate": 5.653248367211274e-05, "loss": 0.9265, "step": 1547 }, { "epoch": 0.4761611811750231, "grad_norm": 0.6947496376224463, "learning_rate": 5.648308835395755e-05, "loss": 0.8549, "step": 1548 }, { "epoch": 0.47646877883728084, "grad_norm": 0.5225095089723527, "learning_rate": 5.643368659954046e-05, "loss": 0.8544, "step": 1549 }, { "epoch": 0.4767763764995386, "grad_norm": 0.46916872857265257, "learning_rate": 5.638427845790639e-05, "loss": 0.8902, "step": 1550 }, { "epoch": 0.4770839741617964, "grad_norm": 0.5324970338372527, "learning_rate": 5.6334863978106614e-05, "loss": 0.9358, "step": 1551 }, { "epoch": 0.4773915718240541, "grad_norm": 1.1577459268794215, "learning_rate": 5.628544320919872e-05, "loss": 0.8281, "step": 1552 }, { "epoch": 0.4776991694863119, "grad_norm": 0.5914212229162871, "learning_rate": 5.623601620024649e-05, "loss": 0.9759, "step": 1553 }, { "epoch": 0.47800676714856966, "grad_norm": 0.5293373134977143, "learning_rate": 5.618658300031992e-05, "loss": 0.919, "step": 1554 }, { "epoch": 0.47831436481082745, "grad_norm": 0.5442942261399684, "learning_rate": 5.61371436584952e-05, "loss": 0.8526, "step": 1555 }, { "epoch": 0.4786219624730852, "grad_norm": 0.5127261328340694, "learning_rate": 5.6087698223854555e-05, "loss": 0.9047, "step": 1556 }, { "epoch": 0.478929560135343, "grad_norm": 0.5698466477953118, "learning_rate": 5.603824674548629e-05, "loss": 0.7277, "step": 1557 }, { "epoch": 0.47923715779760073, "grad_norm": 0.5115873731868934, "learning_rate": 5.598878927248469e-05, "loss": 0.9092, "step": 1558 }, { "epoch": 0.47954475545985853, "grad_norm": 0.6372376321409212, "learning_rate": 5.593932585395e-05, "loss": 0.9004, "step": 1559 }, { "epoch": 0.47985235312211627, "grad_norm": 0.6767204594078197, "learning_rate": 5.58898565389884e-05, "loss": 1.0286, "step": 1560 }, { "epoch": 0.480159950784374, "grad_norm": 0.38684596018150413, "learning_rate": 5.584038137671187e-05, "loss": 0.8226, "step": 1561 }, { "epoch": 0.4804675484466318, "grad_norm": 0.47716453038663553, "learning_rate": 5.579090041623821e-05, "loss": 0.8853, "step": 1562 }, { "epoch": 0.48077514610888955, "grad_norm": 0.5240583705404269, "learning_rate": 5.5741413706691016e-05, "loss": 0.8873, "step": 1563 }, { "epoch": 0.48108274377114735, "grad_norm": 0.6184231515989732, "learning_rate": 5.569192129719955e-05, "loss": 0.9727, "step": 1564 }, { "epoch": 0.4813903414334051, "grad_norm": 0.6746237753341767, "learning_rate": 5.564242323689874e-05, "loss": 0.9512, "step": 1565 }, { "epoch": 0.4816979390956629, "grad_norm": 0.5740963353569964, "learning_rate": 5.5592919574929135e-05, "loss": 1.0587, "step": 1566 }, { "epoch": 0.4820055367579206, "grad_norm": 0.5287401809201142, "learning_rate": 5.554341036043684e-05, "loss": 0.9358, "step": 1567 }, { "epoch": 0.4823131344201784, "grad_norm": 0.5710908158987058, "learning_rate": 5.549389564257346e-05, "loss": 1.0388, "step": 1568 }, { "epoch": 0.48262073208243617, "grad_norm": 0.43456998251036627, "learning_rate": 5.544437547049608e-05, "loss": 0.8421, "step": 1569 }, { "epoch": 0.48292832974469396, "grad_norm": 0.4614960879019599, "learning_rate": 5.5394849893367195e-05, "loss": 0.9011, "step": 1570 }, { "epoch": 0.4832359274069517, "grad_norm": 0.558593198256335, "learning_rate": 5.5345318960354676e-05, "loss": 0.9028, "step": 1571 }, { "epoch": 0.4835435250692095, "grad_norm": 0.6168013066449737, "learning_rate": 5.529578272063168e-05, "loss": 1.1372, "step": 1572 }, { "epoch": 0.48385112273146724, "grad_norm": 0.45329016258078153, "learning_rate": 5.524624122337668e-05, "loss": 0.9858, "step": 1573 }, { "epoch": 0.484158720393725, "grad_norm": 0.49275948893442073, "learning_rate": 5.519669451777332e-05, "loss": 0.7858, "step": 1574 }, { "epoch": 0.4844663180559828, "grad_norm": 0.6602988158264035, "learning_rate": 5.514714265301045e-05, "loss": 0.8529, "step": 1575 }, { "epoch": 0.4847739157182405, "grad_norm": 0.545972916765467, "learning_rate": 5.509758567828203e-05, "loss": 0.794, "step": 1576 }, { "epoch": 0.4850815133804983, "grad_norm": 0.5571015683157369, "learning_rate": 5.504802364278706e-05, "loss": 1.0232, "step": 1577 }, { "epoch": 0.48538911104275606, "grad_norm": 0.7305798726396202, "learning_rate": 5.499845659572964e-05, "loss": 0.8627, "step": 1578 }, { "epoch": 0.48569670870501386, "grad_norm": 0.7357822762253203, "learning_rate": 5.494888458631878e-05, "loss": 0.8709, "step": 1579 }, { "epoch": 0.4860043063672716, "grad_norm": 0.602458126850644, "learning_rate": 5.489930766376843e-05, "loss": 0.8505, "step": 1580 }, { "epoch": 0.4863119040295294, "grad_norm": 0.3868617360107753, "learning_rate": 5.484972587729744e-05, "loss": 0.8987, "step": 1581 }, { "epoch": 0.48661950169178714, "grad_norm": 0.7947391260064087, "learning_rate": 5.480013927612947e-05, "loss": 0.9033, "step": 1582 }, { "epoch": 0.48692709935404493, "grad_norm": 0.43410621829206175, "learning_rate": 5.475054790949295e-05, "loss": 0.6963, "step": 1583 }, { "epoch": 0.4872346970163027, "grad_norm": 0.6969004997516594, "learning_rate": 5.4700951826621096e-05, "loss": 0.8213, "step": 1584 }, { "epoch": 0.4875422946785604, "grad_norm": 0.5709146908130609, "learning_rate": 5.4651351076751725e-05, "loss": 1.0172, "step": 1585 }, { "epoch": 0.4878498923408182, "grad_norm": 0.722057273274909, "learning_rate": 5.4601745709127316e-05, "loss": 0.8153, "step": 1586 }, { "epoch": 0.48815749000307596, "grad_norm": 0.770660249967911, "learning_rate": 5.4552135772994995e-05, "loss": 0.9686, "step": 1587 }, { "epoch": 0.48846508766533375, "grad_norm": 0.45413754014711555, "learning_rate": 5.450252131760634e-05, "loss": 0.9308, "step": 1588 }, { "epoch": 0.4887726853275915, "grad_norm": 0.8890191926890667, "learning_rate": 5.445290239221744e-05, "loss": 0.995, "step": 1589 }, { "epoch": 0.4890802829898493, "grad_norm": 0.7688052788264588, "learning_rate": 5.440327904608886e-05, "loss": 1.0772, "step": 1590 }, { "epoch": 0.48938788065210703, "grad_norm": 0.4919766125262643, "learning_rate": 5.435365132848549e-05, "loss": 0.8295, "step": 1591 }, { "epoch": 0.48969547831436483, "grad_norm": 0.5627155939532313, "learning_rate": 5.430401928867662e-05, "loss": 1.0101, "step": 1592 }, { "epoch": 0.49000307597662257, "grad_norm": 0.5136171980566631, "learning_rate": 5.4254382975935794e-05, "loss": 0.7095, "step": 1593 }, { "epoch": 0.49031067363888037, "grad_norm": 0.8287707693423593, "learning_rate": 5.42047424395408e-05, "loss": 1.0507, "step": 1594 }, { "epoch": 0.4906182713011381, "grad_norm": 0.5907148648017173, "learning_rate": 5.4155097728773654e-05, "loss": 0.9548, "step": 1595 }, { "epoch": 0.49092586896339585, "grad_norm": 0.6086359705002018, "learning_rate": 5.4105448892920466e-05, "loss": 0.9958, "step": 1596 }, { "epoch": 0.49123346662565365, "grad_norm": 0.4663621412046064, "learning_rate": 5.4055795981271465e-05, "loss": 0.8861, "step": 1597 }, { "epoch": 0.4915410642879114, "grad_norm": 0.5333428968925542, "learning_rate": 5.400613904312094e-05, "loss": 0.7859, "step": 1598 }, { "epoch": 0.4918486619501692, "grad_norm": 2.4825218029882876, "learning_rate": 5.395647812776715e-05, "loss": 0.9246, "step": 1599 }, { "epoch": 0.4921562596124269, "grad_norm": 0.7609204177142483, "learning_rate": 5.390681328451233e-05, "loss": 1.1238, "step": 1600 }, { "epoch": 0.4924638572746847, "grad_norm": 0.5664837977313796, "learning_rate": 5.38571445626626e-05, "loss": 1.0034, "step": 1601 }, { "epoch": 0.49277145493694247, "grad_norm": 0.708246764112534, "learning_rate": 5.3807472011527915e-05, "loss": 0.7758, "step": 1602 }, { "epoch": 0.49307905259920026, "grad_norm": 1.2241426024416948, "learning_rate": 5.375779568042206e-05, "loss": 0.8823, "step": 1603 }, { "epoch": 0.493386650261458, "grad_norm": 0.4409155306208021, "learning_rate": 5.370811561866257e-05, "loss": 0.8707, "step": 1604 }, { "epoch": 0.4936942479237158, "grad_norm": 0.5477604806948306, "learning_rate": 5.365843187557066e-05, "loss": 0.8832, "step": 1605 }, { "epoch": 0.49400184558597354, "grad_norm": 1.0147992760113067, "learning_rate": 5.360874450047121e-05, "loss": 1.0761, "step": 1606 }, { "epoch": 0.49430944324823134, "grad_norm": 0.4865531423152669, "learning_rate": 5.355905354269274e-05, "loss": 0.8901, "step": 1607 }, { "epoch": 0.4946170409104891, "grad_norm": 0.7497364190717891, "learning_rate": 5.3509359051567265e-05, "loss": 1.1688, "step": 1608 }, { "epoch": 0.4949246385727468, "grad_norm": 0.4694561554916974, "learning_rate": 5.345966107643034e-05, "loss": 0.9523, "step": 1609 }, { "epoch": 0.4952322362350046, "grad_norm": 0.5844503126346605, "learning_rate": 5.3409959666621014e-05, "loss": 0.9926, "step": 1610 }, { "epoch": 0.49553983389726236, "grad_norm": 1.3873998421938145, "learning_rate": 5.336025487148167e-05, "loss": 0.9192, "step": 1611 }, { "epoch": 0.49584743155952016, "grad_norm": 0.4374999017864761, "learning_rate": 5.33105467403581e-05, "loss": 0.793, "step": 1612 }, { "epoch": 0.4961550292217779, "grad_norm": 0.5882221561028832, "learning_rate": 5.326083532259942e-05, "loss": 0.9637, "step": 1613 }, { "epoch": 0.4964626268840357, "grad_norm": 0.4287862035390917, "learning_rate": 5.321112066755799e-05, "loss": 0.8747, "step": 1614 }, { "epoch": 0.49677022454629344, "grad_norm": 0.49448441665652626, "learning_rate": 5.316140282458935e-05, "loss": 1.1094, "step": 1615 }, { "epoch": 0.49707782220855123, "grad_norm": 0.49610015510042527, "learning_rate": 5.3111681843052275e-05, "loss": 0.8393, "step": 1616 }, { "epoch": 0.497385419870809, "grad_norm": 0.5358812006706369, "learning_rate": 5.306195777230859e-05, "loss": 0.9389, "step": 1617 }, { "epoch": 0.4976930175330668, "grad_norm": 0.5334375380616195, "learning_rate": 5.301223066172323e-05, "loss": 0.8452, "step": 1618 }, { "epoch": 0.4980006151953245, "grad_norm": 0.5726414345139729, "learning_rate": 5.296250056066414e-05, "loss": 0.9557, "step": 1619 }, { "epoch": 0.49830821285758226, "grad_norm": 0.5636198349208378, "learning_rate": 5.2912767518502215e-05, "loss": 0.8228, "step": 1620 }, { "epoch": 0.49861581051984005, "grad_norm": 1.0004901304218345, "learning_rate": 5.2863031584611274e-05, "loss": 0.8888, "step": 1621 }, { "epoch": 0.4989234081820978, "grad_norm": 0.5872274683165479, "learning_rate": 5.2813292808368044e-05, "loss": 0.8639, "step": 1622 }, { "epoch": 0.4992310058443556, "grad_norm": 0.44692644401674, "learning_rate": 5.276355123915203e-05, "loss": 0.947, "step": 1623 }, { "epoch": 0.49953860350661333, "grad_norm": 0.4765369316484997, "learning_rate": 5.271380692634551e-05, "loss": 0.8871, "step": 1624 }, { "epoch": 0.49984620116887113, "grad_norm": 0.5566008016634625, "learning_rate": 5.266405991933352e-05, "loss": 0.9286, "step": 1625 }, { "epoch": 0.5001537988311289, "grad_norm": 0.47882685351287463, "learning_rate": 5.261431026750374e-05, "loss": 0.6692, "step": 1626 }, { "epoch": 0.5004613964933866, "grad_norm": 0.5858778028873686, "learning_rate": 5.25645580202465e-05, "loss": 0.8576, "step": 1627 }, { "epoch": 0.5007689941556445, "grad_norm": 0.48708672696539357, "learning_rate": 5.251480322695469e-05, "loss": 0.9207, "step": 1628 }, { "epoch": 0.5010765918179022, "grad_norm": 0.6335776047181543, "learning_rate": 5.24650459370237e-05, "loss": 0.9531, "step": 1629 }, { "epoch": 0.50138418948016, "grad_norm": 0.7162597718617997, "learning_rate": 5.241528619985149e-05, "loss": 0.8871, "step": 1630 }, { "epoch": 0.5016917871424177, "grad_norm": 0.5300991661864727, "learning_rate": 5.2365524064838344e-05, "loss": 0.9259, "step": 1631 }, { "epoch": 0.5019993848046754, "grad_norm": 0.5040290686409824, "learning_rate": 5.231575958138698e-05, "loss": 0.8241, "step": 1632 }, { "epoch": 0.5023069824669333, "grad_norm": 0.6582271760489202, "learning_rate": 5.226599279890245e-05, "loss": 0.8808, "step": 1633 }, { "epoch": 0.502614580129191, "grad_norm": 0.809357517568404, "learning_rate": 5.2216223766792086e-05, "loss": 0.8665, "step": 1634 }, { "epoch": 0.5029221777914488, "grad_norm": 0.5491479512181336, "learning_rate": 5.216645253446541e-05, "loss": 0.836, "step": 1635 }, { "epoch": 0.5032297754537065, "grad_norm": 0.9212377119329989, "learning_rate": 5.211667915133419e-05, "loss": 1.0141, "step": 1636 }, { "epoch": 0.5035373731159644, "grad_norm": 0.5563801850956175, "learning_rate": 5.2066903666812304e-05, "loss": 0.9207, "step": 1637 }, { "epoch": 0.5038449707782221, "grad_norm": 0.5338867569336218, "learning_rate": 5.20171261303157e-05, "loss": 0.9616, "step": 1638 }, { "epoch": 0.5041525684404798, "grad_norm": 0.5308347679247165, "learning_rate": 5.196734659126239e-05, "loss": 0.8042, "step": 1639 }, { "epoch": 0.5044601661027376, "grad_norm": 0.6949167374051048, "learning_rate": 5.1917565099072365e-05, "loss": 1.0004, "step": 1640 }, { "epoch": 0.5047677637649954, "grad_norm": 0.5904895242068388, "learning_rate": 5.186778170316754e-05, "loss": 0.8053, "step": 1641 }, { "epoch": 0.5050753614272532, "grad_norm": 0.4773629843794587, "learning_rate": 5.181799645297175e-05, "loss": 0.9398, "step": 1642 }, { "epoch": 0.5053829590895109, "grad_norm": 0.4607310298871022, "learning_rate": 5.176820939791064e-05, "loss": 0.7853, "step": 1643 }, { "epoch": 0.5056905567517687, "grad_norm": 0.519488529539426, "learning_rate": 5.1718420587411654e-05, "loss": 0.933, "step": 1644 }, { "epoch": 0.5059981544140264, "grad_norm": 1.662297522804013, "learning_rate": 5.1668630070904e-05, "loss": 0.8432, "step": 1645 }, { "epoch": 0.5063057520762843, "grad_norm": 0.7872028597267816, "learning_rate": 5.161883789781856e-05, "loss": 1.1026, "step": 1646 }, { "epoch": 0.506613349738542, "grad_norm": 0.5639758231329615, "learning_rate": 5.156904411758785e-05, "loss": 0.9178, "step": 1647 }, { "epoch": 0.5069209474007997, "grad_norm": 0.4452951298136305, "learning_rate": 5.1519248779646026e-05, "loss": 0.8129, "step": 1648 }, { "epoch": 0.5072285450630575, "grad_norm": 0.5155915657677326, "learning_rate": 5.146945193342875e-05, "loss": 0.9105, "step": 1649 }, { "epoch": 0.5075361427253153, "grad_norm": 0.805286370212828, "learning_rate": 5.1419653628373164e-05, "loss": 0.9949, "step": 1650 }, { "epoch": 0.5078437403875731, "grad_norm": 0.495186853942777, "learning_rate": 5.136985391391792e-05, "loss": 0.9513, "step": 1651 }, { "epoch": 0.5081513380498308, "grad_norm": 0.6562240473577468, "learning_rate": 5.132005283950303e-05, "loss": 1.0226, "step": 1652 }, { "epoch": 0.5084589357120886, "grad_norm": 0.519890496122487, "learning_rate": 5.127025045456986e-05, "loss": 0.7992, "step": 1653 }, { "epoch": 0.5087665333743464, "grad_norm": 0.5016185433103258, "learning_rate": 5.1220446808561084e-05, "loss": 1.0086, "step": 1654 }, { "epoch": 0.5090741310366041, "grad_norm": 0.99710168335075, "learning_rate": 5.1170641950920596e-05, "loss": 0.8119, "step": 1655 }, { "epoch": 0.5093817286988619, "grad_norm": 0.41192639645625073, "learning_rate": 5.112083593109356e-05, "loss": 0.9208, "step": 1656 }, { "epoch": 0.5096893263611196, "grad_norm": 0.4137908113408895, "learning_rate": 5.107102879852623e-05, "loss": 0.7722, "step": 1657 }, { "epoch": 0.5099969240233774, "grad_norm": 0.6479959013739153, "learning_rate": 5.102122060266599e-05, "loss": 1.0125, "step": 1658 }, { "epoch": 0.5103045216856352, "grad_norm": 0.5793151016208413, "learning_rate": 5.0971411392961286e-05, "loss": 0.766, "step": 1659 }, { "epoch": 0.510612119347893, "grad_norm": 0.5405082886851857, "learning_rate": 5.092160121886157e-05, "loss": 0.8106, "step": 1660 }, { "epoch": 0.5109197170101507, "grad_norm": 0.7222910201195941, "learning_rate": 5.087179012981721e-05, "loss": 0.902, "step": 1661 }, { "epoch": 0.5112273146724085, "grad_norm": 0.5070942973227823, "learning_rate": 5.082197817527955e-05, "loss": 0.8926, "step": 1662 }, { "epoch": 0.5115349123346663, "grad_norm": 0.7287308757232781, "learning_rate": 5.0772165404700754e-05, "loss": 0.8345, "step": 1663 }, { "epoch": 0.511842509996924, "grad_norm": 0.4677786524172967, "learning_rate": 5.072235186753379e-05, "loss": 0.7337, "step": 1664 }, { "epoch": 0.5121501076591818, "grad_norm": 0.6089142884983149, "learning_rate": 5.067253761323241e-05, "loss": 0.8633, "step": 1665 }, { "epoch": 0.5124577053214395, "grad_norm": 0.5521546080138862, "learning_rate": 5.0622722691251045e-05, "loss": 1.0052, "step": 1666 }, { "epoch": 0.5127653029836973, "grad_norm": 0.8033068375535284, "learning_rate": 5.0572907151044844e-05, "loss": 0.8633, "step": 1667 }, { "epoch": 0.5130729006459551, "grad_norm": 0.45876877277303013, "learning_rate": 5.0523091042069524e-05, "loss": 0.922, "step": 1668 }, { "epoch": 0.5133804983082129, "grad_norm": 0.472558421131477, "learning_rate": 5.047327441378137e-05, "loss": 0.8645, "step": 1669 }, { "epoch": 0.5136880959704706, "grad_norm": 0.857949079255689, "learning_rate": 5.0423457315637204e-05, "loss": 0.881, "step": 1670 }, { "epoch": 0.5139956936327283, "grad_norm": 0.5450009237104648, "learning_rate": 5.0373639797094285e-05, "loss": 1.0762, "step": 1671 }, { "epoch": 0.5143032912949862, "grad_norm": 0.5679072583276439, "learning_rate": 5.032382190761032e-05, "loss": 0.9781, "step": 1672 }, { "epoch": 0.5146108889572439, "grad_norm": 0.5112755167067756, "learning_rate": 5.027400369664338e-05, "loss": 0.9338, "step": 1673 }, { "epoch": 0.5149184866195017, "grad_norm": 0.7046533045762284, "learning_rate": 5.022418521365182e-05, "loss": 0.8922, "step": 1674 }, { "epoch": 0.5152260842817594, "grad_norm": 0.9709374946557779, "learning_rate": 5.0174366508094305e-05, "loss": 1.0358, "step": 1675 }, { "epoch": 0.5155336819440173, "grad_norm": 0.7812839817598692, "learning_rate": 5.012454762942972e-05, "loss": 1.2526, "step": 1676 }, { "epoch": 0.515841279606275, "grad_norm": 0.6825646588438476, "learning_rate": 5.0074728627117085e-05, "loss": 0.9005, "step": 1677 }, { "epoch": 0.5161488772685328, "grad_norm": 0.5242868616253615, "learning_rate": 5.002490955061557e-05, "loss": 0.8569, "step": 1678 }, { "epoch": 0.5164564749307905, "grad_norm": 0.5902399175998372, "learning_rate": 4.9975090449384434e-05, "loss": 1.0019, "step": 1679 }, { "epoch": 0.5167640725930482, "grad_norm": 0.7326019598694212, "learning_rate": 4.9925271372882926e-05, "loss": 0.9151, "step": 1680 }, { "epoch": 0.5170716702553061, "grad_norm": 0.43000891590465173, "learning_rate": 4.9875452370570297e-05, "loss": 0.8767, "step": 1681 }, { "epoch": 0.5173792679175638, "grad_norm": 0.667612583451073, "learning_rate": 4.982563349190571e-05, "loss": 0.8878, "step": 1682 }, { "epoch": 0.5176868655798216, "grad_norm": 0.44270606321298145, "learning_rate": 4.977581478634819e-05, "loss": 0.8725, "step": 1683 }, { "epoch": 0.5179944632420793, "grad_norm": 0.6387706534321773, "learning_rate": 4.972599630335664e-05, "loss": 0.9301, "step": 1684 }, { "epoch": 0.5183020609043372, "grad_norm": 0.5626553526168806, "learning_rate": 4.967617809238969e-05, "loss": 0.9019, "step": 1685 }, { "epoch": 0.5186096585665949, "grad_norm": 0.5040721302261333, "learning_rate": 4.962636020290572e-05, "loss": 0.9833, "step": 1686 }, { "epoch": 0.5189172562288527, "grad_norm": 0.6012648479464723, "learning_rate": 4.957654268436281e-05, "loss": 0.9821, "step": 1687 }, { "epoch": 0.5192248538911104, "grad_norm": 0.7604824703432098, "learning_rate": 4.9526725586218645e-05, "loss": 1.0245, "step": 1688 }, { "epoch": 0.5195324515533682, "grad_norm": 0.5627604052043683, "learning_rate": 4.947690895793049e-05, "loss": 0.8856, "step": 1689 }, { "epoch": 0.519840049215626, "grad_norm": 0.488890791878907, "learning_rate": 4.942709284895516e-05, "loss": 0.918, "step": 1690 }, { "epoch": 0.5201476468778837, "grad_norm": 0.479228662282829, "learning_rate": 4.937727730874897e-05, "loss": 0.7879, "step": 1691 }, { "epoch": 0.5204552445401415, "grad_norm": 0.5731014032313658, "learning_rate": 4.9327462386767605e-05, "loss": 0.9045, "step": 1692 }, { "epoch": 0.5207628422023992, "grad_norm": 0.7016115414566071, "learning_rate": 4.9277648132466224e-05, "loss": 0.8126, "step": 1693 }, { "epoch": 0.5210704398646571, "grad_norm": 0.45544546782334105, "learning_rate": 4.922783459529926e-05, "loss": 0.6604, "step": 1694 }, { "epoch": 0.5213780375269148, "grad_norm": 0.4730259063893366, "learning_rate": 4.917802182472045e-05, "loss": 0.7396, "step": 1695 }, { "epoch": 0.5216856351891725, "grad_norm": 0.7056522622556558, "learning_rate": 4.91282098701828e-05, "loss": 0.8729, "step": 1696 }, { "epoch": 0.5219932328514303, "grad_norm": 0.5729114943253246, "learning_rate": 4.907839878113845e-05, "loss": 0.908, "step": 1697 }, { "epoch": 0.5223008305136881, "grad_norm": 0.4243204792318075, "learning_rate": 4.9028588607038726e-05, "loss": 0.8445, "step": 1698 }, { "epoch": 0.5226084281759459, "grad_norm": 0.4997718761958907, "learning_rate": 4.8978779397334016e-05, "loss": 0.8198, "step": 1699 }, { "epoch": 0.5229160258382036, "grad_norm": 0.548950460747712, "learning_rate": 4.892897120147377e-05, "loss": 0.9188, "step": 1700 }, { "epoch": 0.5232236235004614, "grad_norm": 0.6084443657209078, "learning_rate": 4.887916406890645e-05, "loss": 0.8152, "step": 1701 }, { "epoch": 0.5235312211627192, "grad_norm": 0.5511165879796106, "learning_rate": 4.882935804907941e-05, "loss": 0.8859, "step": 1702 }, { "epoch": 0.523838818824977, "grad_norm": 0.6052613570753201, "learning_rate": 4.8779553191438934e-05, "loss": 0.9213, "step": 1703 }, { "epoch": 0.5241464164872347, "grad_norm": 0.5589424030820082, "learning_rate": 4.8729749545430145e-05, "loss": 1.0121, "step": 1704 }, { "epoch": 0.5244540141494924, "grad_norm": 0.56061306607196, "learning_rate": 4.867994716049698e-05, "loss": 0.9509, "step": 1705 }, { "epoch": 0.5247616118117502, "grad_norm": 0.4513231958171241, "learning_rate": 4.863014608608209e-05, "loss": 0.8993, "step": 1706 }, { "epoch": 0.525069209474008, "grad_norm": 0.8165329611370171, "learning_rate": 4.858034637162685e-05, "loss": 0.9735, "step": 1707 }, { "epoch": 0.5253768071362658, "grad_norm": 0.8006786808180936, "learning_rate": 4.853054806657128e-05, "loss": 1.1017, "step": 1708 }, { "epoch": 0.5256844047985235, "grad_norm": 0.5838492857080334, "learning_rate": 4.8480751220353986e-05, "loss": 0.9379, "step": 1709 }, { "epoch": 0.5259920024607813, "grad_norm": 0.5140633393275654, "learning_rate": 4.843095588241216e-05, "loss": 0.9254, "step": 1710 }, { "epoch": 0.5262996001230391, "grad_norm": 0.6379358700672397, "learning_rate": 4.838116210218147e-05, "loss": 0.8178, "step": 1711 }, { "epoch": 0.5266071977852969, "grad_norm": 0.5268486967142219, "learning_rate": 4.833136992909602e-05, "loss": 0.9898, "step": 1712 }, { "epoch": 0.5269147954475546, "grad_norm": 0.7278721617412142, "learning_rate": 4.8281579412588364e-05, "loss": 0.9337, "step": 1713 }, { "epoch": 0.5272223931098123, "grad_norm": 0.7861996078658849, "learning_rate": 4.823179060208938e-05, "loss": 1.0263, "step": 1714 }, { "epoch": 0.5275299907720701, "grad_norm": 0.6585180496128225, "learning_rate": 4.8182003547028265e-05, "loss": 0.8116, "step": 1715 }, { "epoch": 0.5278375884343279, "grad_norm": 0.43714037553561996, "learning_rate": 4.813221829683247e-05, "loss": 0.8939, "step": 1716 }, { "epoch": 0.5281451860965857, "grad_norm": 0.6043156852987099, "learning_rate": 4.808243490092765e-05, "loss": 0.8114, "step": 1717 }, { "epoch": 0.5284527837588434, "grad_norm": 1.0481848372972142, "learning_rate": 4.803265340873762e-05, "loss": 0.9516, "step": 1718 }, { "epoch": 0.5287603814211012, "grad_norm": 0.43232167485898876, "learning_rate": 4.798287386968431e-05, "loss": 0.8089, "step": 1719 }, { "epoch": 0.529067979083359, "grad_norm": 0.6212363096067627, "learning_rate": 4.793309633318772e-05, "loss": 0.8697, "step": 1720 }, { "epoch": 0.5293755767456167, "grad_norm": 1.026874249238565, "learning_rate": 4.788332084866583e-05, "loss": 0.9123, "step": 1721 }, { "epoch": 0.5296831744078745, "grad_norm": 0.6058212078098324, "learning_rate": 4.7833547465534604e-05, "loss": 0.8431, "step": 1722 }, { "epoch": 0.5299907720701322, "grad_norm": 0.5340595774139596, "learning_rate": 4.7783776233207946e-05, "loss": 0.8622, "step": 1723 }, { "epoch": 0.5302983697323901, "grad_norm": 0.639648549106658, "learning_rate": 4.773400720109756e-05, "loss": 1.0684, "step": 1724 }, { "epoch": 0.5306059673946478, "grad_norm": 0.526498312010835, "learning_rate": 4.7684240418613016e-05, "loss": 0.8882, "step": 1725 }, { "epoch": 0.5309135650569056, "grad_norm": 0.9788456386284471, "learning_rate": 4.7634475935161675e-05, "loss": 1.112, "step": 1726 }, { "epoch": 0.5312211627191633, "grad_norm": 0.39158773360970023, "learning_rate": 4.758471380014852e-05, "loss": 0.805, "step": 1727 }, { "epoch": 0.531528760381421, "grad_norm": 0.47933614719977746, "learning_rate": 4.753495406297629e-05, "loss": 0.9442, "step": 1728 }, { "epoch": 0.5318363580436789, "grad_norm": 0.4578756910816667, "learning_rate": 4.748519677304534e-05, "loss": 0.8852, "step": 1729 }, { "epoch": 0.5321439557059366, "grad_norm": 0.5861357660472118, "learning_rate": 4.743544197975351e-05, "loss": 0.9362, "step": 1730 }, { "epoch": 0.5324515533681944, "grad_norm": 0.7794966601637023, "learning_rate": 4.7385689732496264e-05, "loss": 0.9484, "step": 1731 }, { "epoch": 0.5327591510304521, "grad_norm": 0.5763252058929071, "learning_rate": 4.73359400806665e-05, "loss": 0.8881, "step": 1732 }, { "epoch": 0.53306674869271, "grad_norm": 0.5368515785121841, "learning_rate": 4.7286193073654504e-05, "loss": 0.8534, "step": 1733 }, { "epoch": 0.5333743463549677, "grad_norm": 0.4723253537997361, "learning_rate": 4.723644876084799e-05, "loss": 0.9442, "step": 1734 }, { "epoch": 0.5336819440172255, "grad_norm": 0.6029781552204568, "learning_rate": 4.718670719163198e-05, "loss": 0.7708, "step": 1735 }, { "epoch": 0.5339895416794832, "grad_norm": 0.4380701746349461, "learning_rate": 4.713696841538874e-05, "loss": 0.7859, "step": 1736 }, { "epoch": 0.5342971393417411, "grad_norm": 1.0995120870338024, "learning_rate": 4.70872324814978e-05, "loss": 1.0039, "step": 1737 }, { "epoch": 0.5346047370039988, "grad_norm": 0.39522937532500385, "learning_rate": 4.703749943933589e-05, "loss": 0.8256, "step": 1738 }, { "epoch": 0.5349123346662565, "grad_norm": 0.9061685482146453, "learning_rate": 4.698776933827678e-05, "loss": 1.0215, "step": 1739 }, { "epoch": 0.5352199323285143, "grad_norm": 0.6473192810846415, "learning_rate": 4.693804222769142e-05, "loss": 0.8425, "step": 1740 }, { "epoch": 0.535527529990772, "grad_norm": 0.4530134958797127, "learning_rate": 4.688831815694775e-05, "loss": 0.9242, "step": 1741 }, { "epoch": 0.5358351276530299, "grad_norm": 0.4959461028894182, "learning_rate": 4.683859717541066e-05, "loss": 0.9021, "step": 1742 }, { "epoch": 0.5361427253152876, "grad_norm": 0.6178061024837967, "learning_rate": 4.678887933244202e-05, "loss": 0.982, "step": 1743 }, { "epoch": 0.5364503229775454, "grad_norm": 0.5736828982623055, "learning_rate": 4.6739164677400596e-05, "loss": 0.8755, "step": 1744 }, { "epoch": 0.5367579206398031, "grad_norm": 0.5301457769955193, "learning_rate": 4.668945325964191e-05, "loss": 1.0374, "step": 1745 }, { "epoch": 0.537065518302061, "grad_norm": 0.5836793066368448, "learning_rate": 4.663974512851834e-05, "loss": 0.8614, "step": 1746 }, { "epoch": 0.5373731159643187, "grad_norm": 0.4155186217251604, "learning_rate": 4.659004033337902e-05, "loss": 0.9818, "step": 1747 }, { "epoch": 0.5376807136265764, "grad_norm": 0.5063515897089467, "learning_rate": 4.654033892356967e-05, "loss": 0.9156, "step": 1748 }, { "epoch": 0.5379883112888342, "grad_norm": 0.5187932632852487, "learning_rate": 4.6490640948432747e-05, "loss": 0.8818, "step": 1749 }, { "epoch": 0.5382959089510919, "grad_norm": 0.47858595014713123, "learning_rate": 4.644094645730728e-05, "loss": 0.8336, "step": 1750 }, { "epoch": 0.5386035066133498, "grad_norm": 0.6070152416361544, "learning_rate": 4.63912554995288e-05, "loss": 0.832, "step": 1751 }, { "epoch": 0.5389111042756075, "grad_norm": 0.6559014024042312, "learning_rate": 4.634156812442935e-05, "loss": 0.8216, "step": 1752 }, { "epoch": 0.5392187019378653, "grad_norm": 0.41216341813739293, "learning_rate": 4.629188438133744e-05, "loss": 0.7779, "step": 1753 }, { "epoch": 0.539526299600123, "grad_norm": 0.4442401256561957, "learning_rate": 4.6242204319577955e-05, "loss": 0.841, "step": 1754 }, { "epoch": 0.5398338972623808, "grad_norm": 0.4584592624911633, "learning_rate": 4.61925279884721e-05, "loss": 0.9257, "step": 1755 }, { "epoch": 0.5401414949246386, "grad_norm": 0.5792198136209846, "learning_rate": 4.614285543733741e-05, "loss": 0.8883, "step": 1756 }, { "epoch": 0.5404490925868963, "grad_norm": 0.5972365460552305, "learning_rate": 4.609318671548768e-05, "loss": 0.8433, "step": 1757 }, { "epoch": 0.5407566902491541, "grad_norm": 0.4649182860806515, "learning_rate": 4.604352187223286e-05, "loss": 0.864, "step": 1758 }, { "epoch": 0.5410642879114119, "grad_norm": 0.5606318327590237, "learning_rate": 4.5993860956879064e-05, "loss": 0.9115, "step": 1759 }, { "epoch": 0.5413718855736697, "grad_norm": 0.959871892411618, "learning_rate": 4.594420401872855e-05, "loss": 1.2049, "step": 1760 }, { "epoch": 0.5416794832359274, "grad_norm": 0.5072146879773504, "learning_rate": 4.5894551107079546e-05, "loss": 0.8632, "step": 1761 }, { "epoch": 0.5419870808981851, "grad_norm": 0.48998220679246185, "learning_rate": 4.584490227122635e-05, "loss": 0.8798, "step": 1762 }, { "epoch": 0.5422946785604429, "grad_norm": 1.0847228299153355, "learning_rate": 4.579525756045921e-05, "loss": 0.83, "step": 1763 }, { "epoch": 0.5426022762227007, "grad_norm": 0.410568771025966, "learning_rate": 4.574561702406421e-05, "loss": 0.9894, "step": 1764 }, { "epoch": 0.5429098738849585, "grad_norm": 0.4612251487545509, "learning_rate": 4.569598071132338e-05, "loss": 0.7824, "step": 1765 }, { "epoch": 0.5432174715472162, "grad_norm": 0.5422940150234364, "learning_rate": 4.564634867151452e-05, "loss": 0.7721, "step": 1766 }, { "epoch": 0.543525069209474, "grad_norm": 0.4141236244398702, "learning_rate": 4.5596720953911156e-05, "loss": 0.8257, "step": 1767 }, { "epoch": 0.5438326668717318, "grad_norm": 0.5112931193305824, "learning_rate": 4.554709760778256e-05, "loss": 0.8653, "step": 1768 }, { "epoch": 0.5441402645339896, "grad_norm": 0.5456060876098576, "learning_rate": 4.5497478682393685e-05, "loss": 0.9017, "step": 1769 }, { "epoch": 0.5444478621962473, "grad_norm": 0.45673066460428347, "learning_rate": 4.544786422700502e-05, "loss": 0.8441, "step": 1770 }, { "epoch": 0.544755459858505, "grad_norm": 0.49042339408211066, "learning_rate": 4.5398254290872676e-05, "loss": 1.0115, "step": 1771 }, { "epoch": 0.5450630575207629, "grad_norm": 0.7085872150556443, "learning_rate": 4.534864892324831e-05, "loss": 1.3672, "step": 1772 }, { "epoch": 0.5453706551830206, "grad_norm": 0.6530032970604541, "learning_rate": 4.529904817337892e-05, "loss": 1.1141, "step": 1773 }, { "epoch": 0.5456782528452784, "grad_norm": 0.5921287826099859, "learning_rate": 4.5249452090507046e-05, "loss": 1.0852, "step": 1774 }, { "epoch": 0.5459858505075361, "grad_norm": 0.5069560886762358, "learning_rate": 4.5199860723870546e-05, "loss": 0.985, "step": 1775 }, { "epoch": 0.5462934481697939, "grad_norm": 0.6274578490543727, "learning_rate": 4.515027412270257e-05, "loss": 0.9376, "step": 1776 }, { "epoch": 0.5466010458320517, "grad_norm": 0.4764187274961119, "learning_rate": 4.510069233623157e-05, "loss": 0.8593, "step": 1777 }, { "epoch": 0.5469086434943095, "grad_norm": 0.6685502278179973, "learning_rate": 4.505111541368124e-05, "loss": 0.9788, "step": 1778 }, { "epoch": 0.5472162411565672, "grad_norm": 0.5327911494179672, "learning_rate": 4.500154340427037e-05, "loss": 0.9779, "step": 1779 }, { "epoch": 0.5475238388188249, "grad_norm": 0.4829962500829865, "learning_rate": 4.495197635721294e-05, "loss": 0.8752, "step": 1780 }, { "epoch": 0.5478314364810828, "grad_norm": 0.5285076876513851, "learning_rate": 4.4902414321718e-05, "loss": 0.9731, "step": 1781 }, { "epoch": 0.5481390341433405, "grad_norm": 0.49876166287282586, "learning_rate": 4.485285734698956e-05, "loss": 0.8238, "step": 1782 }, { "epoch": 0.5484466318055983, "grad_norm": 0.599030730181133, "learning_rate": 4.4803305482226684e-05, "loss": 0.7112, "step": 1783 }, { "epoch": 0.548754229467856, "grad_norm": 0.7116406441297695, "learning_rate": 4.475375877662334e-05, "loss": 0.8364, "step": 1784 }, { "epoch": 0.5490618271301139, "grad_norm": 1.019185730646751, "learning_rate": 4.470421727936832e-05, "loss": 1.2591, "step": 1785 }, { "epoch": 0.5493694247923716, "grad_norm": 0.5326469616898871, "learning_rate": 4.465468103964533e-05, "loss": 0.974, "step": 1786 }, { "epoch": 0.5496770224546293, "grad_norm": 0.6923539531222697, "learning_rate": 4.460515010663282e-05, "loss": 1.0419, "step": 1787 }, { "epoch": 0.5499846201168871, "grad_norm": 0.5324118345499834, "learning_rate": 4.4555624529503934e-05, "loss": 0.9013, "step": 1788 }, { "epoch": 0.5502922177791448, "grad_norm": 0.4304122831468636, "learning_rate": 4.4506104357426547e-05, "loss": 0.7137, "step": 1789 }, { "epoch": 0.5505998154414027, "grad_norm": 0.8690159844874704, "learning_rate": 4.4456589639563184e-05, "loss": 1.0601, "step": 1790 }, { "epoch": 0.5509074131036604, "grad_norm": 0.590992233427874, "learning_rate": 4.4407080425070876e-05, "loss": 0.9444, "step": 1791 }, { "epoch": 0.5512150107659182, "grad_norm": 0.749253080413735, "learning_rate": 4.4357576763101264e-05, "loss": 1.1533, "step": 1792 }, { "epoch": 0.5515226084281759, "grad_norm": 0.5861996889020926, "learning_rate": 4.4308078702800465e-05, "loss": 0.8946, "step": 1793 }, { "epoch": 0.5518302060904338, "grad_norm": 0.5466997107990366, "learning_rate": 4.425858629330899e-05, "loss": 0.9401, "step": 1794 }, { "epoch": 0.5521378037526915, "grad_norm": 0.6598305233989777, "learning_rate": 4.4209099583761786e-05, "loss": 0.8532, "step": 1795 }, { "epoch": 0.5524454014149492, "grad_norm": 0.58259015888295, "learning_rate": 4.4159618623288157e-05, "loss": 1.1385, "step": 1796 }, { "epoch": 0.552752999077207, "grad_norm": 0.6319411833006421, "learning_rate": 4.4110143461011614e-05, "loss": 0.9704, "step": 1797 }, { "epoch": 0.5530605967394647, "grad_norm": 0.8890674825745941, "learning_rate": 4.4060674146049993e-05, "loss": 0.93, "step": 1798 }, { "epoch": 0.5533681944017226, "grad_norm": 0.5292172757968651, "learning_rate": 4.4011210727515335e-05, "loss": 0.7949, "step": 1799 }, { "epoch": 0.5536757920639803, "grad_norm": 0.4898030894340802, "learning_rate": 4.3961753254513724e-05, "loss": 0.9178, "step": 1800 }, { "epoch": 0.5539833897262381, "grad_norm": 0.5327022618566496, "learning_rate": 4.391230177614544e-05, "loss": 0.9196, "step": 1801 }, { "epoch": 0.5542909873884958, "grad_norm": 0.530078895355124, "learning_rate": 4.386285634150481e-05, "loss": 0.9442, "step": 1802 }, { "epoch": 0.5545985850507537, "grad_norm": 0.4826642927544846, "learning_rate": 4.381341699968008e-05, "loss": 0.8198, "step": 1803 }, { "epoch": 0.5549061827130114, "grad_norm": 0.6578317857467872, "learning_rate": 4.376398379975352e-05, "loss": 0.9882, "step": 1804 }, { "epoch": 0.5552137803752691, "grad_norm": 0.8934270807420613, "learning_rate": 4.3714556790801306e-05, "loss": 0.9453, "step": 1805 }, { "epoch": 0.5555213780375269, "grad_norm": 0.546869767514345, "learning_rate": 4.366513602189339e-05, "loss": 0.9314, "step": 1806 }, { "epoch": 0.5558289756997847, "grad_norm": 0.49094236013686454, "learning_rate": 4.361572154209362e-05, "loss": 0.8074, "step": 1807 }, { "epoch": 0.5561365733620425, "grad_norm": 0.5218401240667315, "learning_rate": 4.356631340045957e-05, "loss": 0.9216, "step": 1808 }, { "epoch": 0.5564441710243002, "grad_norm": 0.5776662617038348, "learning_rate": 4.351691164604247e-05, "loss": 0.912, "step": 1809 }, { "epoch": 0.556751768686558, "grad_norm": 0.5867636228771331, "learning_rate": 4.346751632788728e-05, "loss": 0.8303, "step": 1810 }, { "epoch": 0.5570593663488157, "grad_norm": 0.712164252375668, "learning_rate": 4.341812749503256e-05, "loss": 0.8425, "step": 1811 }, { "epoch": 0.5573669640110736, "grad_norm": 0.6122792863991728, "learning_rate": 4.336874519651038e-05, "loss": 1.1158, "step": 1812 }, { "epoch": 0.5576745616733313, "grad_norm": 0.6023738203732417, "learning_rate": 4.331936948134636e-05, "loss": 0.8692, "step": 1813 }, { "epoch": 0.557982159335589, "grad_norm": 0.7011770075146, "learning_rate": 4.327000039855962e-05, "loss": 0.8998, "step": 1814 }, { "epoch": 0.5582897569978468, "grad_norm": 0.5231355555457569, "learning_rate": 4.322063799716262e-05, "loss": 0.9549, "step": 1815 }, { "epoch": 0.5585973546601046, "grad_norm": 0.46117889737853573, "learning_rate": 4.317128232616122e-05, "loss": 0.8326, "step": 1816 }, { "epoch": 0.5589049523223624, "grad_norm": 0.709106301013398, "learning_rate": 4.312193343455464e-05, "loss": 0.8584, "step": 1817 }, { "epoch": 0.5592125499846201, "grad_norm": 0.6111963088619508, "learning_rate": 4.307259137133528e-05, "loss": 0.9559, "step": 1818 }, { "epoch": 0.5595201476468779, "grad_norm": 0.6333077212799009, "learning_rate": 4.302325618548885e-05, "loss": 0.7394, "step": 1819 }, { "epoch": 0.5598277453091357, "grad_norm": 0.5892883560776262, "learning_rate": 4.297392792599417e-05, "loss": 0.9211, "step": 1820 }, { "epoch": 0.5601353429713934, "grad_norm": 0.6358744609841229, "learning_rate": 4.292460664182325e-05, "loss": 0.9717, "step": 1821 }, { "epoch": 0.5604429406336512, "grad_norm": 0.5099109836569804, "learning_rate": 4.287529238194109e-05, "loss": 0.9196, "step": 1822 }, { "epoch": 0.5607505382959089, "grad_norm": 0.8436387912619328, "learning_rate": 4.2825985195305766e-05, "loss": 0.8761, "step": 1823 }, { "epoch": 0.5610581359581667, "grad_norm": 0.46988794823965646, "learning_rate": 4.2776685130868365e-05, "loss": 0.8824, "step": 1824 }, { "epoch": 0.5613657336204245, "grad_norm": 0.5961293742887988, "learning_rate": 4.2727392237572804e-05, "loss": 1.1278, "step": 1825 }, { "epoch": 0.5616733312826823, "grad_norm": 0.5643509692012586, "learning_rate": 4.267810656435598e-05, "loss": 0.9096, "step": 1826 }, { "epoch": 0.56198092894494, "grad_norm": 0.6011269085914468, "learning_rate": 4.262882816014758e-05, "loss": 0.819, "step": 1827 }, { "epoch": 0.5622885266071977, "grad_norm": 0.6719823013718711, "learning_rate": 4.2579557073870066e-05, "loss": 0.9901, "step": 1828 }, { "epoch": 0.5625961242694556, "grad_norm": 0.556826777567421, "learning_rate": 4.2530293354438635e-05, "loss": 0.8397, "step": 1829 }, { "epoch": 0.5629037219317133, "grad_norm": 0.38816623649148013, "learning_rate": 4.248103705076123e-05, "loss": 0.6677, "step": 1830 }, { "epoch": 0.5632113195939711, "grad_norm": 0.5140139327242244, "learning_rate": 4.2431788211738324e-05, "loss": 0.8146, "step": 1831 }, { "epoch": 0.5635189172562288, "grad_norm": 0.49670771085972154, "learning_rate": 4.2382546886263063e-05, "loss": 0.9126, "step": 1832 }, { "epoch": 0.5638265149184866, "grad_norm": 0.616702579046698, "learning_rate": 4.233331312322113e-05, "loss": 0.8806, "step": 1833 }, { "epoch": 0.5641341125807444, "grad_norm": 0.5055087635430346, "learning_rate": 4.228408697149064e-05, "loss": 0.754, "step": 1834 }, { "epoch": 0.5644417102430022, "grad_norm": 0.5537940963915353, "learning_rate": 4.2234868479942214e-05, "loss": 0.8267, "step": 1835 }, { "epoch": 0.5647493079052599, "grad_norm": 0.43616503403423734, "learning_rate": 4.2185657697438866e-05, "loss": 0.834, "step": 1836 }, { "epoch": 0.5650569055675176, "grad_norm": 0.670254579840028, "learning_rate": 4.2136454672835885e-05, "loss": 0.9434, "step": 1837 }, { "epoch": 0.5653645032297755, "grad_norm": 0.603525235135399, "learning_rate": 4.2087259454980936e-05, "loss": 0.8949, "step": 1838 }, { "epoch": 0.5656721008920332, "grad_norm": 0.6062792252425471, "learning_rate": 4.203807209271393e-05, "loss": 0.9606, "step": 1839 }, { "epoch": 0.565979698554291, "grad_norm": 0.46147902169599353, "learning_rate": 4.1988892634866913e-05, "loss": 0.9985, "step": 1840 }, { "epoch": 0.5662872962165487, "grad_norm": 0.6048280199025343, "learning_rate": 4.1939721130264145e-05, "loss": 0.827, "step": 1841 }, { "epoch": 0.5665948938788066, "grad_norm": 0.5547665811560464, "learning_rate": 4.189055762772199e-05, "loss": 0.9276, "step": 1842 }, { "epoch": 0.5669024915410643, "grad_norm": 0.5034480984411698, "learning_rate": 4.184140217604882e-05, "loss": 0.8514, "step": 1843 }, { "epoch": 0.567210089203322, "grad_norm": 0.6936470934275919, "learning_rate": 4.179225482404505e-05, "loss": 0.9899, "step": 1844 }, { "epoch": 0.5675176868655798, "grad_norm": 0.6281534619841724, "learning_rate": 4.1743115620503084e-05, "loss": 0.9796, "step": 1845 }, { "epoch": 0.5678252845278375, "grad_norm": 0.6523164150743197, "learning_rate": 4.169398461420713e-05, "loss": 0.991, "step": 1846 }, { "epoch": 0.5681328821900954, "grad_norm": 0.4589730962264463, "learning_rate": 4.1644861853933384e-05, "loss": 0.7344, "step": 1847 }, { "epoch": 0.5684404798523531, "grad_norm": 0.39266819546680926, "learning_rate": 4.159574738844979e-05, "loss": 0.7592, "step": 1848 }, { "epoch": 0.5687480775146109, "grad_norm": 0.7228343312110164, "learning_rate": 4.154664126651604e-05, "loss": 0.968, "step": 1849 }, { "epoch": 0.5690556751768686, "grad_norm": 0.6042983402394201, "learning_rate": 4.14975435368836e-05, "loss": 0.7784, "step": 1850 }, { "epoch": 0.5693632728391265, "grad_norm": 0.36476751912535427, "learning_rate": 4.1448454248295574e-05, "loss": 0.8509, "step": 1851 }, { "epoch": 0.5696708705013842, "grad_norm": 0.46430776682136005, "learning_rate": 4.1399373449486654e-05, "loss": 0.804, "step": 1852 }, { "epoch": 0.569978468163642, "grad_norm": 0.601534662113265, "learning_rate": 4.1350301189183154e-05, "loss": 1.043, "step": 1853 }, { "epoch": 0.5702860658258997, "grad_norm": 0.44542243170104, "learning_rate": 4.130123751610291e-05, "loss": 0.7517, "step": 1854 }, { "epoch": 0.5705936634881575, "grad_norm": 0.6765350416524802, "learning_rate": 4.125218247895518e-05, "loss": 1.0776, "step": 1855 }, { "epoch": 0.5709012611504153, "grad_norm": 0.5027264566832836, "learning_rate": 4.1203136126440705e-05, "loss": 0.7811, "step": 1856 }, { "epoch": 0.571208858812673, "grad_norm": 0.6261739756130178, "learning_rate": 4.11540985072516e-05, "loss": 0.9078, "step": 1857 }, { "epoch": 0.5715164564749308, "grad_norm": 0.4712016507132727, "learning_rate": 4.110506967007124e-05, "loss": 0.9075, "step": 1858 }, { "epoch": 0.5718240541371885, "grad_norm": 0.4457433769674466, "learning_rate": 4.105604966357437e-05, "loss": 0.8968, "step": 1859 }, { "epoch": 0.5721316517994464, "grad_norm": 0.5017698387261209, "learning_rate": 4.100703853642695e-05, "loss": 0.8986, "step": 1860 }, { "epoch": 0.5724392494617041, "grad_norm": 0.6008672941265379, "learning_rate": 4.095803633728606e-05, "loss": 0.9375, "step": 1861 }, { "epoch": 0.5727468471239618, "grad_norm": 0.49686775875161127, "learning_rate": 4.090904311479998e-05, "loss": 0.9167, "step": 1862 }, { "epoch": 0.5730544447862196, "grad_norm": 0.48714686233347926, "learning_rate": 4.0860058917608095e-05, "loss": 0.9006, "step": 1863 }, { "epoch": 0.5733620424484774, "grad_norm": 0.4678613173460413, "learning_rate": 4.081108379434075e-05, "loss": 0.9292, "step": 1864 }, { "epoch": 0.5736696401107352, "grad_norm": 0.5470646958052284, "learning_rate": 4.0762117793619335e-05, "loss": 0.7864, "step": 1865 }, { "epoch": 0.5739772377729929, "grad_norm": 0.503894371454343, "learning_rate": 4.071316096405622e-05, "loss": 0.8098, "step": 1866 }, { "epoch": 0.5742848354352507, "grad_norm": 0.9106250726640102, "learning_rate": 4.066421335425457e-05, "loss": 1.1034, "step": 1867 }, { "epoch": 0.5745924330975084, "grad_norm": 0.4356708177207952, "learning_rate": 4.061527501280847e-05, "loss": 0.8219, "step": 1868 }, { "epoch": 0.5749000307597663, "grad_norm": 0.4758465741220129, "learning_rate": 4.056634598830282e-05, "loss": 0.8136, "step": 1869 }, { "epoch": 0.575207628422024, "grad_norm": 0.8085870912023969, "learning_rate": 4.051742632931318e-05, "loss": 0.8873, "step": 1870 }, { "epoch": 0.5755152260842817, "grad_norm": 0.587907277582274, "learning_rate": 4.04685160844059e-05, "loss": 0.9931, "step": 1871 }, { "epoch": 0.5758228237465395, "grad_norm": 0.6840961822970795, "learning_rate": 4.041961530213798e-05, "loss": 0.8731, "step": 1872 }, { "epoch": 0.5761304214087973, "grad_norm": 0.518680920787229, "learning_rate": 4.037072403105695e-05, "loss": 0.8104, "step": 1873 }, { "epoch": 0.5764380190710551, "grad_norm": 0.5353461404013059, "learning_rate": 4.032184231970097e-05, "loss": 1.0268, "step": 1874 }, { "epoch": 0.5767456167333128, "grad_norm": 0.5760891545908912, "learning_rate": 4.02729702165987e-05, "loss": 0.9647, "step": 1875 }, { "epoch": 0.5770532143955706, "grad_norm": 0.4578655260776905, "learning_rate": 4.02241077702692e-05, "loss": 0.9479, "step": 1876 }, { "epoch": 0.5773608120578284, "grad_norm": 0.5016350511503489, "learning_rate": 4.017525502922204e-05, "loss": 0.8688, "step": 1877 }, { "epoch": 0.5776684097200862, "grad_norm": 0.5131281619691845, "learning_rate": 4.012641204195708e-05, "loss": 0.8427, "step": 1878 }, { "epoch": 0.5779760073823439, "grad_norm": 0.6115595240109329, "learning_rate": 4.0077578856964514e-05, "loss": 0.9227, "step": 1879 }, { "epoch": 0.5782836050446016, "grad_norm": 0.5761843774020599, "learning_rate": 4.0028755522724824e-05, "loss": 0.9403, "step": 1880 }, { "epoch": 0.5785912027068594, "grad_norm": 0.7035419320877777, "learning_rate": 3.9979942087708725e-05, "loss": 1.0795, "step": 1881 }, { "epoch": 0.5788988003691172, "grad_norm": 0.6826958039099216, "learning_rate": 3.993113860037702e-05, "loss": 1.0379, "step": 1882 }, { "epoch": 0.579206398031375, "grad_norm": 0.5370164724988526, "learning_rate": 3.988234510918073e-05, "loss": 0.9572, "step": 1883 }, { "epoch": 0.5795139956936327, "grad_norm": 0.6379394246426943, "learning_rate": 3.983356166256094e-05, "loss": 0.9729, "step": 1884 }, { "epoch": 0.5798215933558905, "grad_norm": 0.47293284490487975, "learning_rate": 3.9784788308948694e-05, "loss": 0.9083, "step": 1885 }, { "epoch": 0.5801291910181483, "grad_norm": 0.5889350489427307, "learning_rate": 3.973602509676509e-05, "loss": 0.8978, "step": 1886 }, { "epoch": 0.580436788680406, "grad_norm": 0.4030080884067873, "learning_rate": 3.968727207442114e-05, "loss": 0.9096, "step": 1887 }, { "epoch": 0.5807443863426638, "grad_norm": 0.544337162086893, "learning_rate": 3.963852929031769e-05, "loss": 1.0064, "step": 1888 }, { "epoch": 0.5810519840049215, "grad_norm": 0.4981621568858019, "learning_rate": 3.95897967928455e-05, "loss": 0.8611, "step": 1889 }, { "epoch": 0.5813595816671794, "grad_norm": 0.7834765853402236, "learning_rate": 3.9541074630385056e-05, "loss": 0.757, "step": 1890 }, { "epoch": 0.5816671793294371, "grad_norm": 0.8480682117609069, "learning_rate": 3.949236285130664e-05, "loss": 1.0764, "step": 1891 }, { "epoch": 0.5819747769916949, "grad_norm": 0.5480221059875584, "learning_rate": 3.944366150397015e-05, "loss": 0.946, "step": 1892 }, { "epoch": 0.5822823746539526, "grad_norm": 0.45234582513281096, "learning_rate": 3.939497063672519e-05, "loss": 1.0292, "step": 1893 }, { "epoch": 0.5825899723162103, "grad_norm": 0.45718403142090186, "learning_rate": 3.9346290297910967e-05, "loss": 0.9007, "step": 1894 }, { "epoch": 0.5828975699784682, "grad_norm": 0.46935546810620743, "learning_rate": 3.929762053585615e-05, "loss": 0.9013, "step": 1895 }, { "epoch": 0.5832051676407259, "grad_norm": 0.8049816011841526, "learning_rate": 3.9248961398879006e-05, "loss": 0.9644, "step": 1896 }, { "epoch": 0.5835127653029837, "grad_norm": 0.47739166530031063, "learning_rate": 3.9200312935287225e-05, "loss": 0.8379, "step": 1897 }, { "epoch": 0.5838203629652414, "grad_norm": 0.6725934165151569, "learning_rate": 3.915167519337785e-05, "loss": 0.7165, "step": 1898 }, { "epoch": 0.5841279606274993, "grad_norm": 0.5002421456107891, "learning_rate": 3.9103048221437336e-05, "loss": 0.7457, "step": 1899 }, { "epoch": 0.584435558289757, "grad_norm": 0.5370642277198381, "learning_rate": 3.905443206774145e-05, "loss": 0.9473, "step": 1900 }, { "epoch": 0.5847431559520148, "grad_norm": 0.5661798090157201, "learning_rate": 3.900582678055517e-05, "loss": 0.8988, "step": 1901 }, { "epoch": 0.5850507536142725, "grad_norm": 0.5050603010174427, "learning_rate": 3.8957232408132723e-05, "loss": 0.9043, "step": 1902 }, { "epoch": 0.5853583512765304, "grad_norm": 0.5733633390527648, "learning_rate": 3.890864899871751e-05, "loss": 0.8721, "step": 1903 }, { "epoch": 0.5856659489387881, "grad_norm": 0.5535345503435278, "learning_rate": 3.886007660054198e-05, "loss": 0.9418, "step": 1904 }, { "epoch": 0.5859735466010458, "grad_norm": 0.4661774593905685, "learning_rate": 3.8811515261827744e-05, "loss": 0.7857, "step": 1905 }, { "epoch": 0.5862811442633036, "grad_norm": 0.5582088954965045, "learning_rate": 3.87629650307854e-05, "loss": 1.0206, "step": 1906 }, { "epoch": 0.5865887419255613, "grad_norm": 0.7862302162681508, "learning_rate": 3.871442595561445e-05, "loss": 0.8619, "step": 1907 }, { "epoch": 0.5868963395878192, "grad_norm": 0.6090632448770671, "learning_rate": 3.866589808450342e-05, "loss": 1.0031, "step": 1908 }, { "epoch": 0.5872039372500769, "grad_norm": 0.44890125398895997, "learning_rate": 3.861738146562968e-05, "loss": 0.8942, "step": 1909 }, { "epoch": 0.5875115349123347, "grad_norm": 0.5249181562511928, "learning_rate": 3.856887614715939e-05, "loss": 0.787, "step": 1910 }, { "epoch": 0.5878191325745924, "grad_norm": 0.566532826582179, "learning_rate": 3.852038217724753e-05, "loss": 0.8637, "step": 1911 }, { "epoch": 0.5881267302368502, "grad_norm": 0.625949322618246, "learning_rate": 3.847189960403782e-05, "loss": 0.8496, "step": 1912 }, { "epoch": 0.588434327899108, "grad_norm": 0.47444465330546703, "learning_rate": 3.8423428475662626e-05, "loss": 0.7721, "step": 1913 }, { "epoch": 0.5887419255613657, "grad_norm": 0.8757412901857501, "learning_rate": 3.837496884024299e-05, "loss": 0.9677, "step": 1914 }, { "epoch": 0.5890495232236235, "grad_norm": 0.48790078096137574, "learning_rate": 3.832652074588855e-05, "loss": 0.828, "step": 1915 }, { "epoch": 0.5893571208858812, "grad_norm": 0.4849158170567822, "learning_rate": 3.827808424069742e-05, "loss": 0.931, "step": 1916 }, { "epoch": 0.5896647185481391, "grad_norm": 0.45468809394740517, "learning_rate": 3.822965937275629e-05, "loss": 0.7742, "step": 1917 }, { "epoch": 0.5899723162103968, "grad_norm": 0.6663091845396731, "learning_rate": 3.818124619014027e-05, "loss": 0.7835, "step": 1918 }, { "epoch": 0.5902799138726545, "grad_norm": 0.7662374852678346, "learning_rate": 3.813284474091282e-05, "loss": 1.2678, "step": 1919 }, { "epoch": 0.5905875115349123, "grad_norm": 0.5388091623695498, "learning_rate": 3.808445507312582e-05, "loss": 0.8361, "step": 1920 }, { "epoch": 0.5908951091971701, "grad_norm": 0.7554718379030686, "learning_rate": 3.803607723481944e-05, "loss": 0.9484, "step": 1921 }, { "epoch": 0.5912027068594279, "grad_norm": 0.5345767374375727, "learning_rate": 3.7987711274022056e-05, "loss": 0.9005, "step": 1922 }, { "epoch": 0.5915103045216856, "grad_norm": 0.5931282843702212, "learning_rate": 3.79393572387503e-05, "loss": 0.8738, "step": 1923 }, { "epoch": 0.5918179021839434, "grad_norm": 0.4366903498592969, "learning_rate": 3.789101517700899e-05, "loss": 0.7943, "step": 1924 }, { "epoch": 0.5921254998462012, "grad_norm": 0.46502354716512234, "learning_rate": 3.784268513679096e-05, "loss": 0.8629, "step": 1925 }, { "epoch": 0.592433097508459, "grad_norm": 0.44407438084086276, "learning_rate": 3.7794367166077196e-05, "loss": 0.8941, "step": 1926 }, { "epoch": 0.5927406951707167, "grad_norm": 0.7138991009969, "learning_rate": 3.77460613128367e-05, "loss": 0.9751, "step": 1927 }, { "epoch": 0.5930482928329744, "grad_norm": 0.5309668716416194, "learning_rate": 3.769776762502638e-05, "loss": 0.9233, "step": 1928 }, { "epoch": 0.5933558904952322, "grad_norm": 0.6869610118265244, "learning_rate": 3.7649486150591116e-05, "loss": 0.8643, "step": 1929 }, { "epoch": 0.59366348815749, "grad_norm": 0.6432958359627212, "learning_rate": 3.7601216937463674e-05, "loss": 1.1915, "step": 1930 }, { "epoch": 0.5939710858197478, "grad_norm": 0.8277432197163438, "learning_rate": 3.755296003356459e-05, "loss": 0.9567, "step": 1931 }, { "epoch": 0.5942786834820055, "grad_norm": 0.469429706891041, "learning_rate": 3.750471548680223e-05, "loss": 0.9499, "step": 1932 }, { "epoch": 0.5945862811442633, "grad_norm": 0.7311736760284633, "learning_rate": 3.74564833450727e-05, "loss": 0.948, "step": 1933 }, { "epoch": 0.5948938788065211, "grad_norm": 0.4739045402393258, "learning_rate": 3.740826365625973e-05, "loss": 0.9412, "step": 1934 }, { "epoch": 0.5952014764687789, "grad_norm": 0.3921958233215777, "learning_rate": 3.736005646823475e-05, "loss": 0.8984, "step": 1935 }, { "epoch": 0.5955090741310366, "grad_norm": 0.6386007263147032, "learning_rate": 3.7311861828856756e-05, "loss": 0.9004, "step": 1936 }, { "epoch": 0.5958166717932943, "grad_norm": 0.6151466087295394, "learning_rate": 3.726367978597226e-05, "loss": 0.9195, "step": 1937 }, { "epoch": 0.5961242694555522, "grad_norm": 0.7577584720057892, "learning_rate": 3.72155103874153e-05, "loss": 1.0239, "step": 1938 }, { "epoch": 0.5964318671178099, "grad_norm": 0.5561867112173775, "learning_rate": 3.7167353681007385e-05, "loss": 0.7214, "step": 1939 }, { "epoch": 0.5967394647800677, "grad_norm": 0.569529844562664, "learning_rate": 3.711920971455736e-05, "loss": 0.7504, "step": 1940 }, { "epoch": 0.5970470624423254, "grad_norm": 0.9101177179123515, "learning_rate": 3.7071078535861447e-05, "loss": 0.9558, "step": 1941 }, { "epoch": 0.5973546601045832, "grad_norm": 0.469338101668895, "learning_rate": 3.702296019270321e-05, "loss": 0.9516, "step": 1942 }, { "epoch": 0.597662257766841, "grad_norm": 0.5388807253614017, "learning_rate": 3.6974854732853426e-05, "loss": 0.933, "step": 1943 }, { "epoch": 0.5979698554290988, "grad_norm": 0.4244728756924775, "learning_rate": 3.692676220407009e-05, "loss": 0.8208, "step": 1944 }, { "epoch": 0.5982774530913565, "grad_norm": 0.45792059419906095, "learning_rate": 3.687868265409838e-05, "loss": 0.9426, "step": 1945 }, { "epoch": 0.5985850507536142, "grad_norm": 0.5003485930713791, "learning_rate": 3.683061613067055e-05, "loss": 0.7751, "step": 1946 }, { "epoch": 0.5988926484158721, "grad_norm": 0.5048007297064417, "learning_rate": 3.6782562681505964e-05, "loss": 0.9119, "step": 1947 }, { "epoch": 0.5992002460781298, "grad_norm": 0.539819806952825, "learning_rate": 3.6734522354310994e-05, "loss": 0.9618, "step": 1948 }, { "epoch": 0.5995078437403876, "grad_norm": 0.806589532353654, "learning_rate": 3.6686495196778955e-05, "loss": 0.9712, "step": 1949 }, { "epoch": 0.5998154414026453, "grad_norm": 0.4871302431523358, "learning_rate": 3.6638481256590125e-05, "loss": 0.7895, "step": 1950 }, { "epoch": 0.600123039064903, "grad_norm": 1.2879999733246774, "learning_rate": 3.659048058141167e-05, "loss": 1.1069, "step": 1951 }, { "epoch": 0.6004306367271609, "grad_norm": 1.1248328896780728, "learning_rate": 3.6542493218897504e-05, "loss": 0.8865, "step": 1952 }, { "epoch": 0.6007382343894186, "grad_norm": 0.6702776859178032, "learning_rate": 3.649451921668843e-05, "loss": 0.9923, "step": 1953 }, { "epoch": 0.6010458320516764, "grad_norm": 0.5853533873865697, "learning_rate": 3.644655862241195e-05, "loss": 0.9466, "step": 1954 }, { "epoch": 0.6013534297139341, "grad_norm": 0.6915987151829692, "learning_rate": 3.63986114836822e-05, "loss": 0.9951, "step": 1955 }, { "epoch": 0.601661027376192, "grad_norm": 0.47517693951502105, "learning_rate": 3.635067784810002e-05, "loss": 0.7196, "step": 1956 }, { "epoch": 0.6019686250384497, "grad_norm": 0.7075026005457885, "learning_rate": 3.630275776325286e-05, "loss": 0.816, "step": 1957 }, { "epoch": 0.6022762227007075, "grad_norm": 0.4877102711650836, "learning_rate": 3.625485127671465e-05, "loss": 1.0255, "step": 1958 }, { "epoch": 0.6025838203629652, "grad_norm": 0.8255213800057078, "learning_rate": 3.620695843604586e-05, "loss": 0.9356, "step": 1959 }, { "epoch": 0.6028914180252231, "grad_norm": 0.5177306049712619, "learning_rate": 3.615907928879341e-05, "loss": 0.7902, "step": 1960 }, { "epoch": 0.6031990156874808, "grad_norm": 0.5328401153490785, "learning_rate": 3.6111213882490644e-05, "loss": 0.8593, "step": 1961 }, { "epoch": 0.6035066133497385, "grad_norm": 0.3820678536029298, "learning_rate": 3.60633622646572e-05, "loss": 0.7535, "step": 1962 }, { "epoch": 0.6038142110119963, "grad_norm": 0.4979028473760685, "learning_rate": 3.60155244827991e-05, "loss": 0.8449, "step": 1963 }, { "epoch": 0.604121808674254, "grad_norm": 0.5643984521454606, "learning_rate": 3.596770058440861e-05, "loss": 0.9621, "step": 1964 }, { "epoch": 0.6044294063365119, "grad_norm": 1.072629444068974, "learning_rate": 3.591989061696417e-05, "loss": 0.9551, "step": 1965 }, { "epoch": 0.6047370039987696, "grad_norm": 0.8189654691827027, "learning_rate": 3.5872094627930435e-05, "loss": 1.0825, "step": 1966 }, { "epoch": 0.6050446016610274, "grad_norm": 0.4408936841223427, "learning_rate": 3.58243126647582e-05, "loss": 0.8564, "step": 1967 }, { "epoch": 0.6053521993232851, "grad_norm": 0.497554555378012, "learning_rate": 3.5776544774884266e-05, "loss": 0.8852, "step": 1968 }, { "epoch": 0.605659796985543, "grad_norm": 0.5763558153370038, "learning_rate": 3.572879100573151e-05, "loss": 1.096, "step": 1969 }, { "epoch": 0.6059673946478007, "grad_norm": 0.5388795178203619, "learning_rate": 3.568105140470881e-05, "loss": 0.8617, "step": 1970 }, { "epoch": 0.6062749923100584, "grad_norm": 0.726681606634205, "learning_rate": 3.563332601921091e-05, "loss": 1.0441, "step": 1971 }, { "epoch": 0.6065825899723162, "grad_norm": 0.3994865354826678, "learning_rate": 3.5585614896618505e-05, "loss": 0.8639, "step": 1972 }, { "epoch": 0.606890187634574, "grad_norm": 0.5712542987878784, "learning_rate": 3.5537918084298115e-05, "loss": 0.8614, "step": 1973 }, { "epoch": 0.6071977852968318, "grad_norm": 0.49898161109584926, "learning_rate": 3.549023562960202e-05, "loss": 0.9816, "step": 1974 }, { "epoch": 0.6075053829590895, "grad_norm": 0.47941270352760085, "learning_rate": 3.544256757986828e-05, "loss": 0.7859, "step": 1975 }, { "epoch": 0.6078129806213473, "grad_norm": 0.7374822747343492, "learning_rate": 3.539491398242067e-05, "loss": 0.9771, "step": 1976 }, { "epoch": 0.608120578283605, "grad_norm": 0.4569743705826547, "learning_rate": 3.534727488456856e-05, "loss": 0.8602, "step": 1977 }, { "epoch": 0.6084281759458628, "grad_norm": 1.1785509139946901, "learning_rate": 3.529965033360696e-05, "loss": 1.2167, "step": 1978 }, { "epoch": 0.6087357736081206, "grad_norm": 0.591610200560038, "learning_rate": 3.525204037681647e-05, "loss": 1.168, "step": 1979 }, { "epoch": 0.6090433712703783, "grad_norm": 0.8343360556909323, "learning_rate": 3.520444506146311e-05, "loss": 0.9629, "step": 1980 }, { "epoch": 0.6093509689326361, "grad_norm": 0.7399098943524678, "learning_rate": 3.5156864434798455e-05, "loss": 0.8416, "step": 1981 }, { "epoch": 0.6096585665948939, "grad_norm": 0.7209382701412954, "learning_rate": 3.510929854405949e-05, "loss": 0.7976, "step": 1982 }, { "epoch": 0.6099661642571517, "grad_norm": 0.7472273070500095, "learning_rate": 3.506174743646849e-05, "loss": 1.0613, "step": 1983 }, { "epoch": 0.6102737619194094, "grad_norm": 1.0128453776579962, "learning_rate": 3.501421115923312e-05, "loss": 1.0335, "step": 1984 }, { "epoch": 0.6105813595816671, "grad_norm": 0.561111738739846, "learning_rate": 3.4966689759546345e-05, "loss": 1.0637, "step": 1985 }, { "epoch": 0.610888957243925, "grad_norm": 0.4906537769659106, "learning_rate": 3.491918328458629e-05, "loss": 0.9442, "step": 1986 }, { "epoch": 0.6111965549061827, "grad_norm": 0.46974685934736626, "learning_rate": 3.48716917815163e-05, "loss": 0.8956, "step": 1987 }, { "epoch": 0.6115041525684405, "grad_norm": 0.49761117732544785, "learning_rate": 3.4824215297484875e-05, "loss": 0.8877, "step": 1988 }, { "epoch": 0.6118117502306982, "grad_norm": 0.7153497173149632, "learning_rate": 3.477675387962556e-05, "loss": 0.9683, "step": 1989 }, { "epoch": 0.612119347892956, "grad_norm": 0.5081012951811802, "learning_rate": 3.4729307575056975e-05, "loss": 1.0037, "step": 1990 }, { "epoch": 0.6124269455552138, "grad_norm": 0.8620926293683266, "learning_rate": 3.468187643088273e-05, "loss": 0.8225, "step": 1991 }, { "epoch": 0.6127345432174716, "grad_norm": 0.5481865600563087, "learning_rate": 3.463446049419138e-05, "loss": 0.851, "step": 1992 }, { "epoch": 0.6130421408797293, "grad_norm": 0.56934060810994, "learning_rate": 3.458705981205637e-05, "loss": 0.9542, "step": 1993 }, { "epoch": 0.613349738541987, "grad_norm": 0.6097146761211961, "learning_rate": 3.453967443153603e-05, "loss": 0.9588, "step": 1994 }, { "epoch": 0.6136573362042449, "grad_norm": 0.5698361845792151, "learning_rate": 3.449230439967348e-05, "loss": 0.6561, "step": 1995 }, { "epoch": 0.6139649338665026, "grad_norm": 0.7030369734636313, "learning_rate": 3.444494976349658e-05, "loss": 0.7964, "step": 1996 }, { "epoch": 0.6142725315287604, "grad_norm": 0.5607230935516986, "learning_rate": 3.439761057001797e-05, "loss": 0.8027, "step": 1997 }, { "epoch": 0.6145801291910181, "grad_norm": 0.771201153848508, "learning_rate": 3.43502868662349e-05, "loss": 0.9636, "step": 1998 }, { "epoch": 0.6148877268532759, "grad_norm": 0.6247923801504991, "learning_rate": 3.430297869912925e-05, "loss": 0.7351, "step": 1999 }, { "epoch": 0.6151953245155337, "grad_norm": 0.4161744742933713, "learning_rate": 3.4255686115667505e-05, "loss": 0.8795, "step": 2000 }, { "epoch": 0.6155029221777915, "grad_norm": 0.5351640576536907, "learning_rate": 3.420840916280066e-05, "loss": 0.9521, "step": 2001 }, { "epoch": 0.6158105198400492, "grad_norm": 0.8930418847898514, "learning_rate": 3.416114788746417e-05, "loss": 1.0686, "step": 2002 }, { "epoch": 0.6161181175023069, "grad_norm": 0.4453460111782809, "learning_rate": 3.411390233657797e-05, "loss": 0.9515, "step": 2003 }, { "epoch": 0.6164257151645648, "grad_norm": 0.921554853926346, "learning_rate": 3.4066672557046366e-05, "loss": 1.2076, "step": 2004 }, { "epoch": 0.6167333128268225, "grad_norm": 0.5636565012878733, "learning_rate": 3.4019458595757995e-05, "loss": 0.8757, "step": 2005 }, { "epoch": 0.6170409104890803, "grad_norm": 0.5040346169265576, "learning_rate": 3.3972260499585796e-05, "loss": 1.0059, "step": 2006 }, { "epoch": 0.617348508151338, "grad_norm": 0.5084682413818424, "learning_rate": 3.392507831538696e-05, "loss": 0.9089, "step": 2007 }, { "epoch": 0.6176561058135959, "grad_norm": 0.5355048015562345, "learning_rate": 3.387791209000288e-05, "loss": 0.7153, "step": 2008 }, { "epoch": 0.6179637034758536, "grad_norm": 1.0123179243327844, "learning_rate": 3.383076187025912e-05, "loss": 1.024, "step": 2009 }, { "epoch": 0.6182713011381114, "grad_norm": 0.6143381666631869, "learning_rate": 3.378362770296533e-05, "loss": 0.9805, "step": 2010 }, { "epoch": 0.6185788988003691, "grad_norm": 0.5576419819323198, "learning_rate": 3.373650963491521e-05, "loss": 0.7708, "step": 2011 }, { "epoch": 0.6188864964626268, "grad_norm": 0.6010899502701025, "learning_rate": 3.368940771288655e-05, "loss": 0.8632, "step": 2012 }, { "epoch": 0.6191940941248847, "grad_norm": 0.540913465517736, "learning_rate": 3.364232198364104e-05, "loss": 0.9521, "step": 2013 }, { "epoch": 0.6195016917871424, "grad_norm": 0.4955166194373055, "learning_rate": 3.359525249392429e-05, "loss": 0.865, "step": 2014 }, { "epoch": 0.6198092894494002, "grad_norm": 0.5438721652012425, "learning_rate": 3.3548199290465855e-05, "loss": 0.9904, "step": 2015 }, { "epoch": 0.6201168871116579, "grad_norm": 0.476626980613068, "learning_rate": 3.350116241997904e-05, "loss": 0.836, "step": 2016 }, { "epoch": 0.6204244847739158, "grad_norm": 0.5410770277644801, "learning_rate": 3.345414192916101e-05, "loss": 0.7323, "step": 2017 }, { "epoch": 0.6207320824361735, "grad_norm": 0.47643015831562535, "learning_rate": 3.340713786469262e-05, "loss": 0.812, "step": 2018 }, { "epoch": 0.6210396800984312, "grad_norm": 0.49176797943793105, "learning_rate": 3.336015027323841e-05, "loss": 0.8374, "step": 2019 }, { "epoch": 0.621347277760689, "grad_norm": 0.5330094482010318, "learning_rate": 3.331317920144662e-05, "loss": 0.9844, "step": 2020 }, { "epoch": 0.6216548754229468, "grad_norm": 0.554208648652481, "learning_rate": 3.3266224695949045e-05, "loss": 0.8456, "step": 2021 }, { "epoch": 0.6219624730852046, "grad_norm": 0.4667101695596475, "learning_rate": 3.321928680336103e-05, "loss": 0.9238, "step": 2022 }, { "epoch": 0.6222700707474623, "grad_norm": 0.8049288541849527, "learning_rate": 3.317236557028145e-05, "loss": 1.0537, "step": 2023 }, { "epoch": 0.6225776684097201, "grad_norm": 0.5003250927882887, "learning_rate": 3.312546104329265e-05, "loss": 0.8607, "step": 2024 }, { "epoch": 0.6228852660719778, "grad_norm": 0.6280419759936572, "learning_rate": 3.307857326896034e-05, "loss": 0.9304, "step": 2025 }, { "epoch": 0.6231928637342357, "grad_norm": 0.7613316803640372, "learning_rate": 3.303170229383366e-05, "loss": 0.8372, "step": 2026 }, { "epoch": 0.6235004613964934, "grad_norm": 0.6416805231239254, "learning_rate": 3.2984848164445024e-05, "loss": 1.0041, "step": 2027 }, { "epoch": 0.6238080590587511, "grad_norm": 0.4551990150118437, "learning_rate": 3.2938010927310145e-05, "loss": 0.7426, "step": 2028 }, { "epoch": 0.6241156567210089, "grad_norm": 0.530232393083924, "learning_rate": 3.289119062892797e-05, "loss": 0.7672, "step": 2029 }, { "epoch": 0.6244232543832667, "grad_norm": 0.5376889278970628, "learning_rate": 3.284438731578061e-05, "loss": 0.7866, "step": 2030 }, { "epoch": 0.6247308520455245, "grad_norm": 0.4722017954922129, "learning_rate": 3.279760103433333e-05, "loss": 0.7894, "step": 2031 }, { "epoch": 0.6250384497077822, "grad_norm": 0.7795367843722858, "learning_rate": 3.2750831831034483e-05, "loss": 0.9343, "step": 2032 }, { "epoch": 0.62534604737004, "grad_norm": 0.6617806602675839, "learning_rate": 3.270407975231545e-05, "loss": 0.9393, "step": 2033 }, { "epoch": 0.6256536450322977, "grad_norm": 0.5463265285944753, "learning_rate": 3.265734484459064e-05, "loss": 0.8651, "step": 2034 }, { "epoch": 0.6259612426945556, "grad_norm": 0.5815306524211954, "learning_rate": 3.261062715425741e-05, "loss": 0.9688, "step": 2035 }, { "epoch": 0.6262688403568133, "grad_norm": 0.5867394568670731, "learning_rate": 3.2563926727695986e-05, "loss": 1.1284, "step": 2036 }, { "epoch": 0.626576438019071, "grad_norm": 0.6374484785828218, "learning_rate": 3.251724361126951e-05, "loss": 1.0174, "step": 2037 }, { "epoch": 0.6268840356813288, "grad_norm": 0.6371982172136245, "learning_rate": 3.2470577851323905e-05, "loss": 0.9094, "step": 2038 }, { "epoch": 0.6271916333435866, "grad_norm": 0.6699648280322665, "learning_rate": 3.242392949418786e-05, "loss": 1.056, "step": 2039 }, { "epoch": 0.6274992310058444, "grad_norm": 0.5397944957332315, "learning_rate": 3.2377298586172814e-05, "loss": 0.9967, "step": 2040 }, { "epoch": 0.6278068286681021, "grad_norm": 0.5315411346220286, "learning_rate": 3.2330685173572864e-05, "loss": 0.8076, "step": 2041 }, { "epoch": 0.6281144263303599, "grad_norm": 0.6300441332812278, "learning_rate": 3.228408930266472e-05, "loss": 0.9811, "step": 2042 }, { "epoch": 0.6284220239926177, "grad_norm": 0.41638172274336044, "learning_rate": 3.223751101970772e-05, "loss": 0.6796, "step": 2043 }, { "epoch": 0.6287296216548754, "grad_norm": 0.5778549642036526, "learning_rate": 3.219095037094371e-05, "loss": 0.7631, "step": 2044 }, { "epoch": 0.6290372193171332, "grad_norm": 0.5693436810293528, "learning_rate": 3.2144407402597046e-05, "loss": 0.9254, "step": 2045 }, { "epoch": 0.6293448169793909, "grad_norm": 0.45982011369448583, "learning_rate": 3.209788216087451e-05, "loss": 0.8652, "step": 2046 }, { "epoch": 0.6296524146416487, "grad_norm": 0.4823424114033112, "learning_rate": 3.2051374691965296e-05, "loss": 0.8882, "step": 2047 }, { "epoch": 0.6299600123039065, "grad_norm": 0.49499825511663537, "learning_rate": 3.200488504204098e-05, "loss": 0.8759, "step": 2048 }, { "epoch": 0.6302676099661643, "grad_norm": 0.9395558485589434, "learning_rate": 3.19584132572554e-05, "loss": 1.0992, "step": 2049 }, { "epoch": 0.630575207628422, "grad_norm": 1.2079368278717917, "learning_rate": 3.191195938374468e-05, "loss": 0.8653, "step": 2050 }, { "epoch": 0.6308828052906797, "grad_norm": 0.5162259400867579, "learning_rate": 3.186552346762719e-05, "loss": 1.0326, "step": 2051 }, { "epoch": 0.6311904029529376, "grad_norm": 0.4028664083199654, "learning_rate": 3.1819105555003426e-05, "loss": 0.8243, "step": 2052 }, { "epoch": 0.6314980006151953, "grad_norm": 0.6931123316559309, "learning_rate": 3.177270569195602e-05, "loss": 0.9254, "step": 2053 }, { "epoch": 0.6318055982774531, "grad_norm": 1.0964697612059422, "learning_rate": 3.172632392454973e-05, "loss": 1.0054, "step": 2054 }, { "epoch": 0.6321131959397108, "grad_norm": 0.4849325662529596, "learning_rate": 3.167996029883128e-05, "loss": 0.7829, "step": 2055 }, { "epoch": 0.6324207936019687, "grad_norm": 0.6424529081974754, "learning_rate": 3.1633614860829426e-05, "loss": 1.0701, "step": 2056 }, { "epoch": 0.6327283912642264, "grad_norm": 0.5019561335938777, "learning_rate": 3.158728765655487e-05, "loss": 0.8986, "step": 2057 }, { "epoch": 0.6330359889264842, "grad_norm": 0.5659662885962197, "learning_rate": 3.1540978732000196e-05, "loss": 1.008, "step": 2058 }, { "epoch": 0.6333435865887419, "grad_norm": 0.6395506334595312, "learning_rate": 3.149468813313984e-05, "loss": 1.1433, "step": 2059 }, { "epoch": 0.6336511842509996, "grad_norm": 0.5132066658968033, "learning_rate": 3.1448415905930065e-05, "loss": 0.8864, "step": 2060 }, { "epoch": 0.6339587819132575, "grad_norm": 0.5712235966348844, "learning_rate": 3.140216209630887e-05, "loss": 0.9727, "step": 2061 }, { "epoch": 0.6342663795755152, "grad_norm": 0.5273924117008809, "learning_rate": 3.135592675019596e-05, "loss": 0.9042, "step": 2062 }, { "epoch": 0.634573977237773, "grad_norm": 0.5001458388127539, "learning_rate": 3.130970991349278e-05, "loss": 0.916, "step": 2063 }, { "epoch": 0.6348815749000307, "grad_norm": 0.7056218798419944, "learning_rate": 3.1263511632082304e-05, "loss": 0.8662, "step": 2064 }, { "epoch": 0.6351891725622886, "grad_norm": 0.4242831751038682, "learning_rate": 3.121733195182914e-05, "loss": 0.9095, "step": 2065 }, { "epoch": 0.6354967702245463, "grad_norm": 0.4495095524005018, "learning_rate": 3.117117091857943e-05, "loss": 0.9232, "step": 2066 }, { "epoch": 0.6358043678868041, "grad_norm": 0.43688533332402807, "learning_rate": 3.1125028578160784e-05, "loss": 0.9146, "step": 2067 }, { "epoch": 0.6361119655490618, "grad_norm": 0.4946547617520929, "learning_rate": 3.107890497638226e-05, "loss": 0.8754, "step": 2068 }, { "epoch": 0.6364195632113195, "grad_norm": 0.6370529908473034, "learning_rate": 3.103280015903434e-05, "loss": 0.7812, "step": 2069 }, { "epoch": 0.6367271608735774, "grad_norm": 0.4030294794085894, "learning_rate": 3.09867141718888e-05, "loss": 0.784, "step": 2070 }, { "epoch": 0.6370347585358351, "grad_norm": 0.8132507443374116, "learning_rate": 3.0940647060698755e-05, "loss": 0.9819, "step": 2071 }, { "epoch": 0.6373423561980929, "grad_norm": 0.5442184081097057, "learning_rate": 3.08945988711986e-05, "loss": 0.8909, "step": 2072 }, { "epoch": 0.6376499538603506, "grad_norm": 0.5124670525283627, "learning_rate": 3.084856964910393e-05, "loss": 0.6875, "step": 2073 }, { "epoch": 0.6379575515226085, "grad_norm": 1.0441204572636826, "learning_rate": 3.080255944011147e-05, "loss": 1.0787, "step": 2074 }, { "epoch": 0.6382651491848662, "grad_norm": 0.5856417574967578, "learning_rate": 3.0756568289899136e-05, "loss": 1.1174, "step": 2075 }, { "epoch": 0.638572746847124, "grad_norm": 0.6648986749446745, "learning_rate": 3.0710596244125886e-05, "loss": 0.8922, "step": 2076 }, { "epoch": 0.6388803445093817, "grad_norm": 1.0210138429998372, "learning_rate": 3.06646433484317e-05, "loss": 1.146, "step": 2077 }, { "epoch": 0.6391879421716395, "grad_norm": 0.4649122802889783, "learning_rate": 3.061870964843757e-05, "loss": 0.7933, "step": 2078 }, { "epoch": 0.6394955398338973, "grad_norm": 0.619093019887512, "learning_rate": 3.057279518974544e-05, "loss": 0.8942, "step": 2079 }, { "epoch": 0.639803137496155, "grad_norm": 0.7120449935539692, "learning_rate": 3.052690001793811e-05, "loss": 0.9198, "step": 2080 }, { "epoch": 0.6401107351584128, "grad_norm": 0.6753935209767904, "learning_rate": 3.0481024178579252e-05, "loss": 0.8343, "step": 2081 }, { "epoch": 0.6404183328206705, "grad_norm": 0.6191828439243493, "learning_rate": 3.0435167717213397e-05, "loss": 0.8106, "step": 2082 }, { "epoch": 0.6407259304829284, "grad_norm": 0.48177308917983497, "learning_rate": 3.0389330679365746e-05, "loss": 1.0159, "step": 2083 }, { "epoch": 0.6410335281451861, "grad_norm": 0.5045458588301286, "learning_rate": 3.0343513110542266e-05, "loss": 0.8109, "step": 2084 }, { "epoch": 0.6413411258074438, "grad_norm": 0.513646307761728, "learning_rate": 3.0297715056229626e-05, "loss": 0.923, "step": 2085 }, { "epoch": 0.6416487234697016, "grad_norm": 0.6437711076540064, "learning_rate": 3.0251936561895046e-05, "loss": 0.8772, "step": 2086 }, { "epoch": 0.6419563211319594, "grad_norm": 0.5270869600877328, "learning_rate": 3.0206177672986385e-05, "loss": 0.9258, "step": 2087 }, { "epoch": 0.6422639187942172, "grad_norm": 0.5670382248571587, "learning_rate": 3.016043843493205e-05, "loss": 0.9225, "step": 2088 }, { "epoch": 0.6425715164564749, "grad_norm": 0.55464638407311, "learning_rate": 3.0114718893140854e-05, "loss": 0.8886, "step": 2089 }, { "epoch": 0.6428791141187327, "grad_norm": 0.6072860952478629, "learning_rate": 3.006901909300215e-05, "loss": 0.8679, "step": 2090 }, { "epoch": 0.6431867117809905, "grad_norm": 0.5575562688777945, "learning_rate": 3.002333907988566e-05, "loss": 0.8351, "step": 2091 }, { "epoch": 0.6434943094432483, "grad_norm": 0.44413097520777534, "learning_rate": 2.997767889914143e-05, "loss": 0.7339, "step": 2092 }, { "epoch": 0.643801907105506, "grad_norm": 0.6435567120266373, "learning_rate": 2.9932038596099865e-05, "loss": 0.9739, "step": 2093 }, { "epoch": 0.6441095047677637, "grad_norm": 0.5460136225960075, "learning_rate": 2.988641821607162e-05, "loss": 1.0488, "step": 2094 }, { "epoch": 0.6444171024300215, "grad_norm": 1.6925873673167602, "learning_rate": 2.9840817804347532e-05, "loss": 1.0062, "step": 2095 }, { "epoch": 0.6447247000922793, "grad_norm": 0.7005238136690581, "learning_rate": 2.9795237406198672e-05, "loss": 1.1433, "step": 2096 }, { "epoch": 0.6450322977545371, "grad_norm": 0.5011897159823805, "learning_rate": 2.9749677066876237e-05, "loss": 0.8231, "step": 2097 }, { "epoch": 0.6453398954167948, "grad_norm": 0.5986839186729166, "learning_rate": 2.970413683161144e-05, "loss": 0.9505, "step": 2098 }, { "epoch": 0.6456474930790526, "grad_norm": 0.8031531004498129, "learning_rate": 2.965861674561562e-05, "loss": 0.9967, "step": 2099 }, { "epoch": 0.6459550907413104, "grad_norm": 1.3207332924401378, "learning_rate": 2.9613116854080075e-05, "loss": 1.2754, "step": 2100 }, { "epoch": 0.6462626884035682, "grad_norm": 0.7277561396076799, "learning_rate": 2.9567637202176057e-05, "loss": 1.006, "step": 2101 }, { "epoch": 0.6465702860658259, "grad_norm": 0.5805090657973209, "learning_rate": 2.952217783505471e-05, "loss": 0.9168, "step": 2102 }, { "epoch": 0.6468778837280836, "grad_norm": 0.5934197507487925, "learning_rate": 2.9476738797847058e-05, "loss": 0.7387, "step": 2103 }, { "epoch": 0.6471854813903415, "grad_norm": 0.5062696030560185, "learning_rate": 2.943132013566397e-05, "loss": 0.7637, "step": 2104 }, { "epoch": 0.6474930790525992, "grad_norm": 0.6343269671618824, "learning_rate": 2.9385921893596012e-05, "loss": 0.9574, "step": 2105 }, { "epoch": 0.647800676714857, "grad_norm": 0.8348868301013739, "learning_rate": 2.9340544116713536e-05, "loss": 0.8722, "step": 2106 }, { "epoch": 0.6481082743771147, "grad_norm": 0.4835989130260463, "learning_rate": 2.9295186850066588e-05, "loss": 0.9198, "step": 2107 }, { "epoch": 0.6484158720393725, "grad_norm": 0.4385925606617867, "learning_rate": 2.924985013868478e-05, "loss": 0.9254, "step": 2108 }, { "epoch": 0.6487234697016303, "grad_norm": 0.7187645053130526, "learning_rate": 2.9204534027577385e-05, "loss": 0.7535, "step": 2109 }, { "epoch": 0.649031067363888, "grad_norm": 0.508808380209438, "learning_rate": 2.9159238561733227e-05, "loss": 0.894, "step": 2110 }, { "epoch": 0.6493386650261458, "grad_norm": 0.6235161719225625, "learning_rate": 2.9113963786120567e-05, "loss": 1.0805, "step": 2111 }, { "epoch": 0.6496462626884035, "grad_norm": 0.4279490653767518, "learning_rate": 2.9068709745687166e-05, "loss": 0.8644, "step": 2112 }, { "epoch": 0.6499538603506614, "grad_norm": 0.5099652104442125, "learning_rate": 2.9023476485360262e-05, "loss": 1.0689, "step": 2113 }, { "epoch": 0.6502614580129191, "grad_norm": 0.4801403824975096, "learning_rate": 2.8978264050046316e-05, "loss": 0.8998, "step": 2114 }, { "epoch": 0.6505690556751769, "grad_norm": 0.5668777076885462, "learning_rate": 2.8933072484631262e-05, "loss": 0.8195, "step": 2115 }, { "epoch": 0.6508766533374346, "grad_norm": 0.4812274014073489, "learning_rate": 2.8887901833980235e-05, "loss": 0.8409, "step": 2116 }, { "epoch": 0.6511842509996923, "grad_norm": 0.6167916801936083, "learning_rate": 2.884275214293763e-05, "loss": 0.8559, "step": 2117 }, { "epoch": 0.6514918486619502, "grad_norm": 0.6167926051030512, "learning_rate": 2.8797623456326993e-05, "loss": 0.848, "step": 2118 }, { "epoch": 0.6517994463242079, "grad_norm": 0.47932818152645235, "learning_rate": 2.8752515818951126e-05, "loss": 0.8755, "step": 2119 }, { "epoch": 0.6521070439864657, "grad_norm": 0.5492545160496596, "learning_rate": 2.8707429275591773e-05, "loss": 1.0063, "step": 2120 }, { "epoch": 0.6524146416487234, "grad_norm": 0.8399984279767961, "learning_rate": 2.8662363871009882e-05, "loss": 1.0013, "step": 2121 }, { "epoch": 0.6527222393109813, "grad_norm": 1.155029653086749, "learning_rate": 2.861731964994535e-05, "loss": 0.9096, "step": 2122 }, { "epoch": 0.653029836973239, "grad_norm": 0.4697325957887021, "learning_rate": 2.857229665711703e-05, "loss": 0.8736, "step": 2123 }, { "epoch": 0.6533374346354968, "grad_norm": 0.4727762082094559, "learning_rate": 2.8527294937222716e-05, "loss": 0.9118, "step": 2124 }, { "epoch": 0.6536450322977545, "grad_norm": 0.5108377838155124, "learning_rate": 2.8482314534939136e-05, "loss": 0.8916, "step": 2125 }, { "epoch": 0.6539526299600124, "grad_norm": 0.6280138262725908, "learning_rate": 2.843735549492173e-05, "loss": 0.9654, "step": 2126 }, { "epoch": 0.6542602276222701, "grad_norm": 0.41967078256103724, "learning_rate": 2.8392417861804866e-05, "loss": 0.8042, "step": 2127 }, { "epoch": 0.6545678252845278, "grad_norm": 0.7173933315298899, "learning_rate": 2.8347501680201584e-05, "loss": 1.0373, "step": 2128 }, { "epoch": 0.6548754229467856, "grad_norm": 0.5133610767646248, "learning_rate": 2.830260699470363e-05, "loss": 1.0081, "step": 2129 }, { "epoch": 0.6551830206090433, "grad_norm": 0.45248303223567937, "learning_rate": 2.8257733849881406e-05, "loss": 1.0095, "step": 2130 }, { "epoch": 0.6554906182713012, "grad_norm": 0.5041868477814639, "learning_rate": 2.8212882290284005e-05, "loss": 0.969, "step": 2131 }, { "epoch": 0.6557982159335589, "grad_norm": 0.576156557269994, "learning_rate": 2.816805236043895e-05, "loss": 0.7816, "step": 2132 }, { "epoch": 0.6561058135958167, "grad_norm": 1.1200925432128377, "learning_rate": 2.8123244104852436e-05, "loss": 1.1423, "step": 2133 }, { "epoch": 0.6564134112580744, "grad_norm": 0.6955194087196304, "learning_rate": 2.8078457568009047e-05, "loss": 0.8839, "step": 2134 }, { "epoch": 0.6567210089203322, "grad_norm": 0.620736783005977, "learning_rate": 2.8033692794371835e-05, "loss": 0.885, "step": 2135 }, { "epoch": 0.65702860658259, "grad_norm": 0.4752622073845162, "learning_rate": 2.7988949828382227e-05, "loss": 0.8847, "step": 2136 }, { "epoch": 0.6573362042448477, "grad_norm": 0.5891117415685927, "learning_rate": 2.7944228714460076e-05, "loss": 0.9087, "step": 2137 }, { "epoch": 0.6576438019071055, "grad_norm": 0.44356193053325416, "learning_rate": 2.78995294970034e-05, "loss": 0.9169, "step": 2138 }, { "epoch": 0.6579513995693633, "grad_norm": 0.7193815845019477, "learning_rate": 2.7854852220388617e-05, "loss": 1.2032, "step": 2139 }, { "epoch": 0.6582589972316211, "grad_norm": 0.46407034211703085, "learning_rate": 2.7810196928970296e-05, "loss": 0.7672, "step": 2140 }, { "epoch": 0.6585665948938788, "grad_norm": 0.4743747451434631, "learning_rate": 2.7765563667081173e-05, "loss": 0.8619, "step": 2141 }, { "epoch": 0.6588741925561366, "grad_norm": 0.6909920651231698, "learning_rate": 2.7720952479032124e-05, "loss": 0.8885, "step": 2142 }, { "epoch": 0.6591817902183943, "grad_norm": 0.6756923757780806, "learning_rate": 2.767636340911217e-05, "loss": 1.1234, "step": 2143 }, { "epoch": 0.6594893878806521, "grad_norm": 0.4439852175349944, "learning_rate": 2.7631796501588247e-05, "loss": 0.9096, "step": 2144 }, { "epoch": 0.6597969855429099, "grad_norm": 0.5431624350596465, "learning_rate": 2.7587251800705417e-05, "loss": 0.8872, "step": 2145 }, { "epoch": 0.6601045832051676, "grad_norm": 0.47906727477173944, "learning_rate": 2.7542729350686625e-05, "loss": 0.8818, "step": 2146 }, { "epoch": 0.6604121808674254, "grad_norm": 0.4790458074642498, "learning_rate": 2.749822919573273e-05, "loss": 0.896, "step": 2147 }, { "epoch": 0.6607197785296832, "grad_norm": 0.4829482698394187, "learning_rate": 2.7453751380022457e-05, "loss": 1.0008, "step": 2148 }, { "epoch": 0.661027376191941, "grad_norm": 0.5300360029050944, "learning_rate": 2.740929594771241e-05, "loss": 1.0258, "step": 2149 }, { "epoch": 0.6613349738541987, "grad_norm": 0.5111928919930284, "learning_rate": 2.7364862942936896e-05, "loss": 0.92, "step": 2150 }, { "epoch": 0.6616425715164564, "grad_norm": 0.9499170895430502, "learning_rate": 2.7320452409807983e-05, "loss": 0.9785, "step": 2151 }, { "epoch": 0.6619501691787142, "grad_norm": 0.5026996813301752, "learning_rate": 2.7276064392415453e-05, "loss": 0.737, "step": 2152 }, { "epoch": 0.662257766840972, "grad_norm": 0.48274653417647057, "learning_rate": 2.7231698934826693e-05, "loss": 0.836, "step": 2153 }, { "epoch": 0.6625653645032298, "grad_norm": 0.42744003339026726, "learning_rate": 2.718735608108671e-05, "loss": 0.9015, "step": 2154 }, { "epoch": 0.6628729621654875, "grad_norm": 0.45831504534483597, "learning_rate": 2.7143035875218104e-05, "loss": 0.8036, "step": 2155 }, { "epoch": 0.6631805598277453, "grad_norm": 0.7979883282385979, "learning_rate": 2.7098738361220954e-05, "loss": 0.9983, "step": 2156 }, { "epoch": 0.6634881574900031, "grad_norm": 0.44870281088985414, "learning_rate": 2.7054463583072807e-05, "loss": 0.9223, "step": 2157 }, { "epoch": 0.6637957551522609, "grad_norm": 0.42849366131219585, "learning_rate": 2.701021158472866e-05, "loss": 0.8188, "step": 2158 }, { "epoch": 0.6641033528145186, "grad_norm": 0.7052096970382905, "learning_rate": 2.696598241012088e-05, "loss": 0.9827, "step": 2159 }, { "epoch": 0.6644109504767763, "grad_norm": 0.49395978149773817, "learning_rate": 2.6921776103159168e-05, "loss": 0.9771, "step": 2160 }, { "epoch": 0.6647185481390342, "grad_norm": 0.6245480267802908, "learning_rate": 2.6877592707730566e-05, "loss": 0.8761, "step": 2161 }, { "epoch": 0.6650261458012919, "grad_norm": 0.4525847113461472, "learning_rate": 2.6833432267699322e-05, "loss": 0.8571, "step": 2162 }, { "epoch": 0.6653337434635497, "grad_norm": 0.6271245416425387, "learning_rate": 2.6789294826906907e-05, "loss": 0.8082, "step": 2163 }, { "epoch": 0.6656413411258074, "grad_norm": 0.36487724778569747, "learning_rate": 2.674518042917197e-05, "loss": 0.9341, "step": 2164 }, { "epoch": 0.6659489387880652, "grad_norm": 1.230383792393838, "learning_rate": 2.670108911829028e-05, "loss": 0.9784, "step": 2165 }, { "epoch": 0.666256536450323, "grad_norm": 0.5252874592904102, "learning_rate": 2.6657020938034654e-05, "loss": 1.0447, "step": 2166 }, { "epoch": 0.6665641341125808, "grad_norm": 0.4613057177850866, "learning_rate": 2.6612975932155014e-05, "loss": 0.8574, "step": 2167 }, { "epoch": 0.6668717317748385, "grad_norm": 0.9722313428935928, "learning_rate": 2.6568954144378223e-05, "loss": 1.0731, "step": 2168 }, { "epoch": 0.6671793294370962, "grad_norm": 0.4549461446510981, "learning_rate": 2.6524955618408094e-05, "loss": 0.9063, "step": 2169 }, { "epoch": 0.6674869270993541, "grad_norm": 0.5450318104773816, "learning_rate": 2.6480980397925346e-05, "loss": 0.9023, "step": 2170 }, { "epoch": 0.6677945247616118, "grad_norm": 0.5647282067256406, "learning_rate": 2.6437028526587625e-05, "loss": 0.837, "step": 2171 }, { "epoch": 0.6681021224238696, "grad_norm": 0.6355331076040283, "learning_rate": 2.639310004802928e-05, "loss": 0.9904, "step": 2172 }, { "epoch": 0.6684097200861273, "grad_norm": 0.4482212400515781, "learning_rate": 2.6349195005861544e-05, "loss": 0.9093, "step": 2173 }, { "epoch": 0.6687173177483852, "grad_norm": 0.526208575648711, "learning_rate": 2.6305313443672323e-05, "loss": 0.9281, "step": 2174 }, { "epoch": 0.6690249154106429, "grad_norm": 0.5888416088191187, "learning_rate": 2.6261455405026235e-05, "loss": 0.7533, "step": 2175 }, { "epoch": 0.6693325130729006, "grad_norm": 0.495622791350476, "learning_rate": 2.6217620933464514e-05, "loss": 0.9261, "step": 2176 }, { "epoch": 0.6696401107351584, "grad_norm": 0.6387029852677523, "learning_rate": 2.6173810072505055e-05, "loss": 0.9874, "step": 2177 }, { "epoch": 0.6699477083974161, "grad_norm": 0.4413084550879547, "learning_rate": 2.6130022865642274e-05, "loss": 0.9781, "step": 2178 }, { "epoch": 0.670255306059674, "grad_norm": 0.41857659541889775, "learning_rate": 2.60862593563471e-05, "loss": 0.9049, "step": 2179 }, { "epoch": 0.6705629037219317, "grad_norm": 1.849339184670773, "learning_rate": 2.6042519588066937e-05, "loss": 0.7719, "step": 2180 }, { "epoch": 0.6708705013841895, "grad_norm": 0.6876951080710672, "learning_rate": 2.5998803604225634e-05, "loss": 0.873, "step": 2181 }, { "epoch": 0.6711780990464472, "grad_norm": 0.6606018590314612, "learning_rate": 2.5955111448223407e-05, "loss": 0.82, "step": 2182 }, { "epoch": 0.6714856967087051, "grad_norm": 0.43408227374799, "learning_rate": 2.5911443163436865e-05, "loss": 0.9235, "step": 2183 }, { "epoch": 0.6717932943709628, "grad_norm": 1.046448096932192, "learning_rate": 2.5867798793218855e-05, "loss": 1.1219, "step": 2184 }, { "epoch": 0.6721008920332205, "grad_norm": 0.4943995241780365, "learning_rate": 2.582417838089852e-05, "loss": 0.8051, "step": 2185 }, { "epoch": 0.6724084896954783, "grad_norm": 0.5935074176917691, "learning_rate": 2.5780581969781208e-05, "loss": 0.8881, "step": 2186 }, { "epoch": 0.6727160873577361, "grad_norm": 0.6989686701458174, "learning_rate": 2.5737009603148434e-05, "loss": 0.981, "step": 2187 }, { "epoch": 0.6730236850199939, "grad_norm": 0.6486853276515113, "learning_rate": 2.569346132425783e-05, "loss": 0.9571, "step": 2188 }, { "epoch": 0.6733312826822516, "grad_norm": 0.612582653152141, "learning_rate": 2.564993717634317e-05, "loss": 1.0081, "step": 2189 }, { "epoch": 0.6736388803445094, "grad_norm": 0.4689400536845521, "learning_rate": 2.5606437202614215e-05, "loss": 0.8419, "step": 2190 }, { "epoch": 0.6739464780067671, "grad_norm": 0.4920646043492719, "learning_rate": 2.556296144625674e-05, "loss": 0.9972, "step": 2191 }, { "epoch": 0.674254075669025, "grad_norm": 0.4108939924235081, "learning_rate": 2.5519509950432492e-05, "loss": 0.8926, "step": 2192 }, { "epoch": 0.6745616733312827, "grad_norm": 0.4186581344034789, "learning_rate": 2.5476082758279113e-05, "loss": 0.8562, "step": 2193 }, { "epoch": 0.6748692709935404, "grad_norm": 0.5400048873707155, "learning_rate": 2.5432679912910105e-05, "loss": 1.0231, "step": 2194 }, { "epoch": 0.6751768686557982, "grad_norm": 0.4593194769682683, "learning_rate": 2.5389301457414878e-05, "loss": 0.8348, "step": 2195 }, { "epoch": 0.675484466318056, "grad_norm": 0.5180146969171561, "learning_rate": 2.534594743485853e-05, "loss": 0.903, "step": 2196 }, { "epoch": 0.6757920639803138, "grad_norm": 0.48110250173649677, "learning_rate": 2.5302617888281955e-05, "loss": 1.0588, "step": 2197 }, { "epoch": 0.6760996616425715, "grad_norm": 0.4418571922498832, "learning_rate": 2.5259312860701735e-05, "loss": 0.8157, "step": 2198 }, { "epoch": 0.6764072593048293, "grad_norm": 0.45180661536338906, "learning_rate": 2.5216032395110107e-05, "loss": 0.8018, "step": 2199 }, { "epoch": 0.676714856967087, "grad_norm": 0.4440740792943821, "learning_rate": 2.5172776534474908e-05, "loss": 0.8669, "step": 2200 }, { "epoch": 0.6770224546293448, "grad_norm": 0.428910157854241, "learning_rate": 2.5129545321739617e-05, "loss": 0.7901, "step": 2201 }, { "epoch": 0.6773300522916026, "grad_norm": 0.6091107511856251, "learning_rate": 2.508633879982316e-05, "loss": 0.6614, "step": 2202 }, { "epoch": 0.6776376499538603, "grad_norm": 0.5644828058126244, "learning_rate": 2.504315701162e-05, "loss": 0.9234, "step": 2203 }, { "epoch": 0.6779452476161181, "grad_norm": 0.39006640012462335, "learning_rate": 2.500000000000001e-05, "loss": 0.9692, "step": 2204 }, { "epoch": 0.6782528452783759, "grad_norm": 0.5255215428359937, "learning_rate": 2.4956867807808482e-05, "loss": 0.9504, "step": 2205 }, { "epoch": 0.6785604429406337, "grad_norm": 0.6530114643244709, "learning_rate": 2.4913760477866098e-05, "loss": 0.8564, "step": 2206 }, { "epoch": 0.6788680406028914, "grad_norm": 0.781627212822504, "learning_rate": 2.487067805296882e-05, "loss": 0.9397, "step": 2207 }, { "epoch": 0.6791756382651492, "grad_norm": 0.3848238781708966, "learning_rate": 2.482762057588789e-05, "loss": 0.8612, "step": 2208 }, { "epoch": 0.679483235927407, "grad_norm": 0.6167829808215095, "learning_rate": 2.4784588089369783e-05, "loss": 0.8471, "step": 2209 }, { "epoch": 0.6797908335896647, "grad_norm": 0.8206860577020691, "learning_rate": 2.474158063613617e-05, "loss": 0.7948, "step": 2210 }, { "epoch": 0.6800984312519225, "grad_norm": 0.9186707279130785, "learning_rate": 2.469859825888385e-05, "loss": 1.1386, "step": 2211 }, { "epoch": 0.6804060289141802, "grad_norm": 0.48684733026603916, "learning_rate": 2.465564100028478e-05, "loss": 0.9243, "step": 2212 }, { "epoch": 0.680713626576438, "grad_norm": 0.46864760671023215, "learning_rate": 2.4612708902985932e-05, "loss": 0.8629, "step": 2213 }, { "epoch": 0.6810212242386958, "grad_norm": 0.47367275298577766, "learning_rate": 2.4569802009609306e-05, "loss": 0.857, "step": 2214 }, { "epoch": 0.6813288219009536, "grad_norm": 2.6428423833090617, "learning_rate": 2.452692036275188e-05, "loss": 0.9459, "step": 2215 }, { "epoch": 0.6816364195632113, "grad_norm": 0.5975150876491847, "learning_rate": 2.4484064004985578e-05, "loss": 0.8314, "step": 2216 }, { "epoch": 0.681944017225469, "grad_norm": 0.5646713582342395, "learning_rate": 2.44412329788572e-05, "loss": 1.0192, "step": 2217 }, { "epoch": 0.6822516148877269, "grad_norm": 0.49203299483132273, "learning_rate": 2.4398427326888433e-05, "loss": 0.8249, "step": 2218 }, { "epoch": 0.6825592125499846, "grad_norm": 0.643143679208332, "learning_rate": 2.435564709157574e-05, "loss": 0.9903, "step": 2219 }, { "epoch": 0.6828668102122424, "grad_norm": 0.6785059787914594, "learning_rate": 2.4312892315390364e-05, "loss": 0.8926, "step": 2220 }, { "epoch": 0.6831744078745001, "grad_norm": 0.6986834741478557, "learning_rate": 2.4270163040778255e-05, "loss": 0.9118, "step": 2221 }, { "epoch": 0.683482005536758, "grad_norm": 0.4779619348515175, "learning_rate": 2.4227459310160073e-05, "loss": 0.8267, "step": 2222 }, { "epoch": 0.6837896031990157, "grad_norm": 0.4982677275446996, "learning_rate": 2.4184781165931085e-05, "loss": 0.8571, "step": 2223 }, { "epoch": 0.6840972008612735, "grad_norm": 0.5764418017990792, "learning_rate": 2.414212865046121e-05, "loss": 0.8176, "step": 2224 }, { "epoch": 0.6844047985235312, "grad_norm": 0.7294180250999497, "learning_rate": 2.4099501806094877e-05, "loss": 1.046, "step": 2225 }, { "epoch": 0.6847123961857889, "grad_norm": 0.5487994215648182, "learning_rate": 2.4056900675151033e-05, "loss": 0.9169, "step": 2226 }, { "epoch": 0.6850199938480468, "grad_norm": 0.5462496848591172, "learning_rate": 2.4014325299923118e-05, "loss": 0.8073, "step": 2227 }, { "epoch": 0.6853275915103045, "grad_norm": 0.7581991422082481, "learning_rate": 2.3971775722678985e-05, "loss": 1.0334, "step": 2228 }, { "epoch": 0.6856351891725623, "grad_norm": 1.0417595400224577, "learning_rate": 2.3929251985660867e-05, "loss": 1.2327, "step": 2229 }, { "epoch": 0.68594278683482, "grad_norm": 0.9257507949241432, "learning_rate": 2.3886754131085393e-05, "loss": 1.1519, "step": 2230 }, { "epoch": 0.6862503844970779, "grad_norm": 0.4602928219828027, "learning_rate": 2.3844282201143436e-05, "loss": 0.9366, "step": 2231 }, { "epoch": 0.6865579821593356, "grad_norm": 0.42916034229151084, "learning_rate": 2.380183623800017e-05, "loss": 0.7766, "step": 2232 }, { "epoch": 0.6868655798215934, "grad_norm": 0.6736192535032215, "learning_rate": 2.3759416283794966e-05, "loss": 0.9924, "step": 2233 }, { "epoch": 0.6871731774838511, "grad_norm": 0.4827076022313504, "learning_rate": 2.3717022380641397e-05, "loss": 0.9062, "step": 2234 }, { "epoch": 0.6874807751461088, "grad_norm": 0.45943259559821065, "learning_rate": 2.3674654570627126e-05, "loss": 0.7858, "step": 2235 }, { "epoch": 0.6877883728083667, "grad_norm": 0.5552023399987422, "learning_rate": 2.3632312895813995e-05, "loss": 0.767, "step": 2236 }, { "epoch": 0.6880959704706244, "grad_norm": 0.42062660205130664, "learning_rate": 2.358999739823783e-05, "loss": 0.935, "step": 2237 }, { "epoch": 0.6884035681328822, "grad_norm": 0.53973152084144, "learning_rate": 2.3547708119908485e-05, "loss": 0.8924, "step": 2238 }, { "epoch": 0.6887111657951399, "grad_norm": 1.0156413715978336, "learning_rate": 2.350544510280978e-05, "loss": 1.0551, "step": 2239 }, { "epoch": 0.6890187634573978, "grad_norm": 0.572428414650461, "learning_rate": 2.34632083888995e-05, "loss": 0.8831, "step": 2240 }, { "epoch": 0.6893263611196555, "grad_norm": 0.4149022888994945, "learning_rate": 2.3420998020109276e-05, "loss": 0.9608, "step": 2241 }, { "epoch": 0.6896339587819132, "grad_norm": 0.4244133806663881, "learning_rate": 2.3378814038344587e-05, "loss": 0.88, "step": 2242 }, { "epoch": 0.689941556444171, "grad_norm": 0.4887101781600717, "learning_rate": 2.3336656485484743e-05, "loss": 0.792, "step": 2243 }, { "epoch": 0.6902491541064288, "grad_norm": 0.623278064082461, "learning_rate": 2.3294525403382782e-05, "loss": 0.9787, "step": 2244 }, { "epoch": 0.6905567517686866, "grad_norm": 0.6174403576069931, "learning_rate": 2.3252420833865474e-05, "loss": 0.8921, "step": 2245 }, { "epoch": 0.6908643494309443, "grad_norm": 0.5206472938723691, "learning_rate": 2.32103428187333e-05, "loss": 1.0057, "step": 2246 }, { "epoch": 0.6911719470932021, "grad_norm": 0.4678992681430667, "learning_rate": 2.316829139976034e-05, "loss": 0.8341, "step": 2247 }, { "epoch": 0.6914795447554598, "grad_norm": 0.480966036683038, "learning_rate": 2.3126266618694277e-05, "loss": 0.9967, "step": 2248 }, { "epoch": 0.6917871424177177, "grad_norm": 0.7415817450826002, "learning_rate": 2.308426851725635e-05, "loss": 0.9642, "step": 2249 }, { "epoch": 0.6920947400799754, "grad_norm": 0.5121278187004115, "learning_rate": 2.304229713714133e-05, "loss": 0.9636, "step": 2250 }, { "epoch": 0.6924023377422331, "grad_norm": 0.669580936850576, "learning_rate": 2.30003525200174e-05, "loss": 0.9668, "step": 2251 }, { "epoch": 0.6927099354044909, "grad_norm": 0.8482073097500166, "learning_rate": 2.2958434707526272e-05, "loss": 0.9848, "step": 2252 }, { "epoch": 0.6930175330667487, "grad_norm": 0.5203676473600468, "learning_rate": 2.291654374128297e-05, "loss": 0.9334, "step": 2253 }, { "epoch": 0.6933251307290065, "grad_norm": 0.40511285364901767, "learning_rate": 2.2874679662875888e-05, "loss": 0.7504, "step": 2254 }, { "epoch": 0.6936327283912642, "grad_norm": 0.5123968847866269, "learning_rate": 2.2832842513866724e-05, "loss": 1.0654, "step": 2255 }, { "epoch": 0.693940326053522, "grad_norm": 0.4536503346406055, "learning_rate": 2.279103233579044e-05, "loss": 0.9399, "step": 2256 }, { "epoch": 0.6942479237157798, "grad_norm": 0.6647219545122359, "learning_rate": 2.2749249170155217e-05, "loss": 0.8515, "step": 2257 }, { "epoch": 0.6945555213780376, "grad_norm": 0.7542628438367096, "learning_rate": 2.2707493058442454e-05, "loss": 0.9011, "step": 2258 }, { "epoch": 0.6948631190402953, "grad_norm": 0.7311829024810769, "learning_rate": 2.2665764042106645e-05, "loss": 0.8786, "step": 2259 }, { "epoch": 0.695170716702553, "grad_norm": 0.5257772746903668, "learning_rate": 2.262406216257541e-05, "loss": 0.8701, "step": 2260 }, { "epoch": 0.6954783143648108, "grad_norm": 0.6644758636110133, "learning_rate": 2.2582387461249417e-05, "loss": 0.9444, "step": 2261 }, { "epoch": 0.6957859120270686, "grad_norm": 0.6447284123153237, "learning_rate": 2.2540739979502357e-05, "loss": 0.9423, "step": 2262 }, { "epoch": 0.6960935096893264, "grad_norm": 0.8200688020440781, "learning_rate": 2.249911975868088e-05, "loss": 1.099, "step": 2263 }, { "epoch": 0.6964011073515841, "grad_norm": 0.43208567202353293, "learning_rate": 2.2457526840104632e-05, "loss": 0.7217, "step": 2264 }, { "epoch": 0.6967087050138419, "grad_norm": 0.3891524352095965, "learning_rate": 2.2415961265066083e-05, "loss": 0.9352, "step": 2265 }, { "epoch": 0.6970163026760997, "grad_norm": 0.8992071205658346, "learning_rate": 2.2374423074830597e-05, "loss": 1.0585, "step": 2266 }, { "epoch": 0.6973239003383574, "grad_norm": 0.4682321015955125, "learning_rate": 2.233291231063631e-05, "loss": 1.0324, "step": 2267 }, { "epoch": 0.6976314980006152, "grad_norm": 0.6074777359881548, "learning_rate": 2.229142901369422e-05, "loss": 1.0893, "step": 2268 }, { "epoch": 0.6979390956628729, "grad_norm": 0.46098558782669347, "learning_rate": 2.2249973225187925e-05, "loss": 0.8875, "step": 2269 }, { "epoch": 0.6982466933251307, "grad_norm": 0.45814957190674305, "learning_rate": 2.2208544986273837e-05, "loss": 0.9358, "step": 2270 }, { "epoch": 0.6985542909873885, "grad_norm": 0.4802552947513812, "learning_rate": 2.2167144338080946e-05, "loss": 1.0118, "step": 2271 }, { "epoch": 0.6988618886496463, "grad_norm": 0.6174578530717408, "learning_rate": 2.212577132171087e-05, "loss": 0.8284, "step": 2272 }, { "epoch": 0.699169486311904, "grad_norm": 0.6990967087188268, "learning_rate": 2.208442597823777e-05, "loss": 0.9597, "step": 2273 }, { "epoch": 0.6994770839741618, "grad_norm": 0.6921271261892682, "learning_rate": 2.2043108348708425e-05, "loss": 1.09, "step": 2274 }, { "epoch": 0.6997846816364196, "grad_norm": 0.738562868317752, "learning_rate": 2.2001818474141945e-05, "loss": 1.1923, "step": 2275 }, { "epoch": 0.7000922792986773, "grad_norm": 0.5452582762301261, "learning_rate": 2.1960556395530035e-05, "loss": 0.976, "step": 2276 }, { "epoch": 0.7003998769609351, "grad_norm": 0.4904305912739668, "learning_rate": 2.191932215383673e-05, "loss": 0.8695, "step": 2277 }, { "epoch": 0.7007074746231928, "grad_norm": 0.40794777413570793, "learning_rate": 2.187811578999844e-05, "loss": 0.9368, "step": 2278 }, { "epoch": 0.7010150722854507, "grad_norm": 0.46415632826155734, "learning_rate": 2.1836937344923873e-05, "loss": 0.818, "step": 2279 }, { "epoch": 0.7013226699477084, "grad_norm": 0.48001990778220993, "learning_rate": 2.1795786859494117e-05, "loss": 0.7827, "step": 2280 }, { "epoch": 0.7016302676099662, "grad_norm": 0.4569804479341292, "learning_rate": 2.175466437456235e-05, "loss": 0.8389, "step": 2281 }, { "epoch": 0.7019378652722239, "grad_norm": 0.6164074579419757, "learning_rate": 2.17135699309541e-05, "loss": 1.0588, "step": 2282 }, { "epoch": 0.7022454629344816, "grad_norm": 0.4851083329695192, "learning_rate": 2.1672503569466957e-05, "loss": 0.8526, "step": 2283 }, { "epoch": 0.7025530605967395, "grad_norm": 0.6401651060196427, "learning_rate": 2.1631465330870675e-05, "loss": 0.8169, "step": 2284 }, { "epoch": 0.7028606582589972, "grad_norm": 0.8053719429956863, "learning_rate": 2.1590455255907067e-05, "loss": 0.8431, "step": 2285 }, { "epoch": 0.703168255921255, "grad_norm": 0.5707400073085452, "learning_rate": 2.1549473385290047e-05, "loss": 0.9035, "step": 2286 }, { "epoch": 0.7034758535835127, "grad_norm": 0.5181002590707875, "learning_rate": 2.1508519759705403e-05, "loss": 0.7547, "step": 2287 }, { "epoch": 0.7037834512457706, "grad_norm": 0.42301146015484586, "learning_rate": 2.1467594419811017e-05, "loss": 0.7131, "step": 2288 }, { "epoch": 0.7040910489080283, "grad_norm": 0.6825854758437444, "learning_rate": 2.1426697406236606e-05, "loss": 1.0235, "step": 2289 }, { "epoch": 0.7043986465702861, "grad_norm": 0.3634703824506728, "learning_rate": 2.1385828759583803e-05, "loss": 0.7939, "step": 2290 }, { "epoch": 0.7047062442325438, "grad_norm": 0.4758795956839881, "learning_rate": 2.1344988520426035e-05, "loss": 0.929, "step": 2291 }, { "epoch": 0.7050138418948017, "grad_norm": 0.5521081426878086, "learning_rate": 2.130417672930862e-05, "loss": 0.871, "step": 2292 }, { "epoch": 0.7053214395570594, "grad_norm": 0.48976591747098347, "learning_rate": 2.1263393426748502e-05, "loss": 0.8578, "step": 2293 }, { "epoch": 0.7056290372193171, "grad_norm": 0.5710309506672703, "learning_rate": 2.122263865323445e-05, "loss": 0.8997, "step": 2294 }, { "epoch": 0.7059366348815749, "grad_norm": 0.4489548774458272, "learning_rate": 2.1181912449226872e-05, "loss": 0.7609, "step": 2295 }, { "epoch": 0.7062442325438326, "grad_norm": 0.41227517357555066, "learning_rate": 2.1141214855157797e-05, "loss": 0.8136, "step": 2296 }, { "epoch": 0.7065518302060905, "grad_norm": 0.7390825276412331, "learning_rate": 2.110054591143086e-05, "loss": 1.1077, "step": 2297 }, { "epoch": 0.7068594278683482, "grad_norm": 0.516726208730996, "learning_rate": 2.1059905658421307e-05, "loss": 0.83, "step": 2298 }, { "epoch": 0.707167025530606, "grad_norm": 0.5535700150235096, "learning_rate": 2.1019294136475782e-05, "loss": 0.7057, "step": 2299 }, { "epoch": 0.7074746231928637, "grad_norm": 0.7218690176668119, "learning_rate": 2.097871138591252e-05, "loss": 1.011, "step": 2300 }, { "epoch": 0.7077822208551215, "grad_norm": 0.6367180410422026, "learning_rate": 2.0938157447021145e-05, "loss": 0.9512, "step": 2301 }, { "epoch": 0.7080898185173793, "grad_norm": 0.5579268066139372, "learning_rate": 2.0897632360062673e-05, "loss": 0.8597, "step": 2302 }, { "epoch": 0.708397416179637, "grad_norm": 0.5218519687373363, "learning_rate": 2.0857136165269463e-05, "loss": 0.9934, "step": 2303 }, { "epoch": 0.7087050138418948, "grad_norm": 0.3824561309246054, "learning_rate": 2.0816668902845276e-05, "loss": 0.92, "step": 2304 }, { "epoch": 0.7090126115041526, "grad_norm": 0.7001613659450759, "learning_rate": 2.0776230612965e-05, "loss": 1.1453, "step": 2305 }, { "epoch": 0.7093202091664104, "grad_norm": 0.9045701004666917, "learning_rate": 2.073582133577491e-05, "loss": 1.117, "step": 2306 }, { "epoch": 0.7096278068286681, "grad_norm": 0.5256240861575687, "learning_rate": 2.0695441111392382e-05, "loss": 0.9633, "step": 2307 }, { "epoch": 0.7099354044909258, "grad_norm": 0.6044501353622651, "learning_rate": 2.065508997990599e-05, "loss": 0.871, "step": 2308 }, { "epoch": 0.7102430021531836, "grad_norm": 1.3458230903761155, "learning_rate": 2.0614767981375392e-05, "loss": 0.6978, "step": 2309 }, { "epoch": 0.7105505998154414, "grad_norm": 0.6383629949951062, "learning_rate": 2.0574475155831385e-05, "loss": 0.9781, "step": 2310 }, { "epoch": 0.7108581974776992, "grad_norm": 0.4724567022881396, "learning_rate": 2.053421154327575e-05, "loss": 0.8623, "step": 2311 }, { "epoch": 0.7111657951399569, "grad_norm": 0.7242107224580868, "learning_rate": 2.0493977183681274e-05, "loss": 0.9732, "step": 2312 }, { "epoch": 0.7114733928022147, "grad_norm": 0.4214611333410892, "learning_rate": 2.0453772116991693e-05, "loss": 0.7843, "step": 2313 }, { "epoch": 0.7117809904644725, "grad_norm": 0.5409445199601612, "learning_rate": 2.0413596383121736e-05, "loss": 0.8463, "step": 2314 }, { "epoch": 0.7120885881267303, "grad_norm": 0.6542406574608133, "learning_rate": 2.0373450021956873e-05, "loss": 0.9392, "step": 2315 }, { "epoch": 0.712396185788988, "grad_norm": 0.6713504663134177, "learning_rate": 2.0333333073353562e-05, "loss": 0.8737, "step": 2316 }, { "epoch": 0.7127037834512457, "grad_norm": 0.6874045335835951, "learning_rate": 2.0293245577138964e-05, "loss": 0.7183, "step": 2317 }, { "epoch": 0.7130113811135035, "grad_norm": 0.6503706142427286, "learning_rate": 2.0253187573111026e-05, "loss": 1.051, "step": 2318 }, { "epoch": 0.7133189787757613, "grad_norm": 0.5252665914410443, "learning_rate": 2.021315910103841e-05, "loss": 0.8685, "step": 2319 }, { "epoch": 0.7136265764380191, "grad_norm": 0.3848732251796412, "learning_rate": 2.017316020066052e-05, "loss": 0.9747, "step": 2320 }, { "epoch": 0.7139341741002768, "grad_norm": 0.5091406032433645, "learning_rate": 2.0133190911687265e-05, "loss": 0.9775, "step": 2321 }, { "epoch": 0.7142417717625346, "grad_norm": 0.5722523463168657, "learning_rate": 2.0093251273799313e-05, "loss": 0.6397, "step": 2322 }, { "epoch": 0.7145493694247924, "grad_norm": 0.44723697679349694, "learning_rate": 2.0053341326647796e-05, "loss": 0.8324, "step": 2323 }, { "epoch": 0.7148569670870502, "grad_norm": 0.4338318955092558, "learning_rate": 2.0013461109854397e-05, "loss": 0.7838, "step": 2324 }, { "epoch": 0.7151645647493079, "grad_norm": 0.7646839818953709, "learning_rate": 1.997361066301127e-05, "loss": 0.9993, "step": 2325 }, { "epoch": 0.7154721624115656, "grad_norm": 0.5097409431519482, "learning_rate": 1.993379002568108e-05, "loss": 0.9328, "step": 2326 }, { "epoch": 0.7157797600738235, "grad_norm": 0.3685966623492535, "learning_rate": 1.9893999237396772e-05, "loss": 0.7231, "step": 2327 }, { "epoch": 0.7160873577360812, "grad_norm": 0.458239503582097, "learning_rate": 1.9854238337661784e-05, "loss": 0.9047, "step": 2328 }, { "epoch": 0.716394955398339, "grad_norm": 0.4508814120868249, "learning_rate": 1.9814507365949815e-05, "loss": 0.8627, "step": 2329 }, { "epoch": 0.7167025530605967, "grad_norm": 0.4355426632839696, "learning_rate": 1.9774806361704863e-05, "loss": 0.8155, "step": 2330 }, { "epoch": 0.7170101507228545, "grad_norm": 0.700406105513846, "learning_rate": 1.9735135364341152e-05, "loss": 1.0587, "step": 2331 }, { "epoch": 0.7173177483851123, "grad_norm": 0.5226491838394879, "learning_rate": 1.969549441324321e-05, "loss": 0.8624, "step": 2332 }, { "epoch": 0.71762534604737, "grad_norm": 0.6811368414242329, "learning_rate": 1.965588354776558e-05, "loss": 0.8684, "step": 2333 }, { "epoch": 0.7179329437096278, "grad_norm": 0.5564911269917485, "learning_rate": 1.9616302807233088e-05, "loss": 0.7024, "step": 2334 }, { "epoch": 0.7182405413718855, "grad_norm": 0.5549199083326025, "learning_rate": 1.9576752230940566e-05, "loss": 0.8588, "step": 2335 }, { "epoch": 0.7185481390341434, "grad_norm": 0.5422270693674975, "learning_rate": 1.9537231858152923e-05, "loss": 0.9276, "step": 2336 }, { "epoch": 0.7188557366964011, "grad_norm": 0.6844126540319894, "learning_rate": 1.9497741728105066e-05, "loss": 0.9444, "step": 2337 }, { "epoch": 0.7191633343586589, "grad_norm": 0.949686876847266, "learning_rate": 1.9458281880001944e-05, "loss": 0.9773, "step": 2338 }, { "epoch": 0.7194709320209166, "grad_norm": 0.8197963122796517, "learning_rate": 1.9418852353018318e-05, "loss": 0.9594, "step": 2339 }, { "epoch": 0.7197785296831745, "grad_norm": 0.6063368791624565, "learning_rate": 1.937945318629898e-05, "loss": 0.9356, "step": 2340 }, { "epoch": 0.7200861273454322, "grad_norm": 0.5092665027454816, "learning_rate": 1.9340084418958506e-05, "loss": 0.8437, "step": 2341 }, { "epoch": 0.72039372500769, "grad_norm": 0.711959990739548, "learning_rate": 1.9300746090081296e-05, "loss": 0.8772, "step": 2342 }, { "epoch": 0.7207013226699477, "grad_norm": 0.6490470190768121, "learning_rate": 1.926143823872154e-05, "loss": 0.9567, "step": 2343 }, { "epoch": 0.7210089203322054, "grad_norm": 0.44299642673857714, "learning_rate": 1.9222160903903218e-05, "loss": 0.9358, "step": 2344 }, { "epoch": 0.7213165179944633, "grad_norm": 0.8133447040027132, "learning_rate": 1.9182914124619906e-05, "loss": 1.0788, "step": 2345 }, { "epoch": 0.721624115656721, "grad_norm": 0.466301317024577, "learning_rate": 1.914369793983496e-05, "loss": 0.7609, "step": 2346 }, { "epoch": 0.7219317133189788, "grad_norm": 0.41350649055642874, "learning_rate": 1.910451238848129e-05, "loss": 0.7671, "step": 2347 }, { "epoch": 0.7222393109812365, "grad_norm": 0.5841708501901234, "learning_rate": 1.9065357509461425e-05, "loss": 0.8583, "step": 2348 }, { "epoch": 0.7225469086434944, "grad_norm": 0.5946707446879681, "learning_rate": 1.90262333416474e-05, "loss": 0.8155, "step": 2349 }, { "epoch": 0.7228545063057521, "grad_norm": 0.688225487108603, "learning_rate": 1.8987139923880865e-05, "loss": 1.0516, "step": 2350 }, { "epoch": 0.7231621039680098, "grad_norm": 0.6043175429026753, "learning_rate": 1.894807729497279e-05, "loss": 0.9855, "step": 2351 }, { "epoch": 0.7234697016302676, "grad_norm": 0.8728164620391963, "learning_rate": 1.8909045493703715e-05, "loss": 1.0731, "step": 2352 }, { "epoch": 0.7237772992925253, "grad_norm": 0.6911689714383519, "learning_rate": 1.8870044558823503e-05, "loss": 0.9628, "step": 2353 }, { "epoch": 0.7240848969547832, "grad_norm": 0.48299007695325363, "learning_rate": 1.883107452905139e-05, "loss": 0.9343, "step": 2354 }, { "epoch": 0.7243924946170409, "grad_norm": 0.5991020495862192, "learning_rate": 1.8792135443075916e-05, "loss": 0.8612, "step": 2355 }, { "epoch": 0.7247000922792987, "grad_norm": 0.5578531841310953, "learning_rate": 1.875322733955497e-05, "loss": 1.1577, "step": 2356 }, { "epoch": 0.7250076899415564, "grad_norm": 0.5315212929259352, "learning_rate": 1.8714350257115548e-05, "loss": 0.9108, "step": 2357 }, { "epoch": 0.7253152876038143, "grad_norm": 0.4007958106053407, "learning_rate": 1.8675504234353984e-05, "loss": 0.691, "step": 2358 }, { "epoch": 0.725622885266072, "grad_norm": 0.48830700010561096, "learning_rate": 1.8636689309835715e-05, "loss": 1.0111, "step": 2359 }, { "epoch": 0.7259304829283297, "grad_norm": 0.46474621924719195, "learning_rate": 1.8597905522095294e-05, "loss": 0.852, "step": 2360 }, { "epoch": 0.7262380805905875, "grad_norm": 0.43002703956835747, "learning_rate": 1.8559152909636375e-05, "loss": 0.7301, "step": 2361 }, { "epoch": 0.7265456782528453, "grad_norm": 0.4498838041007543, "learning_rate": 1.852043151093171e-05, "loss": 0.8535, "step": 2362 }, { "epoch": 0.7268532759151031, "grad_norm": 0.4873656724533025, "learning_rate": 1.8481741364422954e-05, "loss": 0.7863, "step": 2363 }, { "epoch": 0.7271608735773608, "grad_norm": 0.3732816629737811, "learning_rate": 1.844308250852085e-05, "loss": 0.7506, "step": 2364 }, { "epoch": 0.7274684712396186, "grad_norm": 0.4553853210622003, "learning_rate": 1.8404454981605006e-05, "loss": 0.8999, "step": 2365 }, { "epoch": 0.7277760689018763, "grad_norm": 0.6901354114104961, "learning_rate": 1.8365858822023952e-05, "loss": 1.1836, "step": 2366 }, { "epoch": 0.7280836665641341, "grad_norm": 0.41811464266915094, "learning_rate": 1.8327294068095052e-05, "loss": 0.8061, "step": 2367 }, { "epoch": 0.7283912642263919, "grad_norm": 0.6175262787962434, "learning_rate": 1.8288760758104568e-05, "loss": 1.0507, "step": 2368 }, { "epoch": 0.7286988618886496, "grad_norm": 0.5824665056251422, "learning_rate": 1.825025893030741e-05, "loss": 0.9487, "step": 2369 }, { "epoch": 0.7290064595509074, "grad_norm": 0.6685640897493067, "learning_rate": 1.8211788622927384e-05, "loss": 1.0898, "step": 2370 }, { "epoch": 0.7293140572131652, "grad_norm": 0.559752789288753, "learning_rate": 1.81733498741569e-05, "loss": 1.0879, "step": 2371 }, { "epoch": 0.729621654875423, "grad_norm": 0.5768808344003564, "learning_rate": 1.8134942722157068e-05, "loss": 0.9545, "step": 2372 }, { "epoch": 0.7299292525376807, "grad_norm": 0.5958033474562969, "learning_rate": 1.8096567205057612e-05, "loss": 0.9803, "step": 2373 }, { "epoch": 0.7302368501999384, "grad_norm": 0.5428606562328107, "learning_rate": 1.8058223360956933e-05, "loss": 0.6802, "step": 2374 }, { "epoch": 0.7305444478621963, "grad_norm": 0.5521983334694296, "learning_rate": 1.801991122792184e-05, "loss": 0.9951, "step": 2375 }, { "epoch": 0.730852045524454, "grad_norm": 0.3907422610511653, "learning_rate": 1.7981630843987806e-05, "loss": 0.9883, "step": 2376 }, { "epoch": 0.7311596431867118, "grad_norm": 0.4282872292038861, "learning_rate": 1.7943382247158697e-05, "loss": 0.869, "step": 2377 }, { "epoch": 0.7314672408489695, "grad_norm": 0.5274783238305799, "learning_rate": 1.7905165475406844e-05, "loss": 0.7309, "step": 2378 }, { "epoch": 0.7317748385112273, "grad_norm": 0.2898751600310954, "learning_rate": 1.786698056667297e-05, "loss": 0.955, "step": 2379 }, { "epoch": 0.7320824361734851, "grad_norm": 0.5221719794664837, "learning_rate": 1.782882755886621e-05, "loss": 1.0015, "step": 2380 }, { "epoch": 0.7323900338357429, "grad_norm": 0.6586152881368669, "learning_rate": 1.779070648986399e-05, "loss": 0.8153, "step": 2381 }, { "epoch": 0.7326976314980006, "grad_norm": 0.46871249877656723, "learning_rate": 1.7752617397512023e-05, "loss": 0.7204, "step": 2382 }, { "epoch": 0.7330052291602583, "grad_norm": 0.5634386032316304, "learning_rate": 1.7714560319624287e-05, "loss": 0.9918, "step": 2383 }, { "epoch": 0.7333128268225162, "grad_norm": 0.42214003683277673, "learning_rate": 1.7676535293983017e-05, "loss": 0.7846, "step": 2384 }, { "epoch": 0.7336204244847739, "grad_norm": 0.731814420427391, "learning_rate": 1.7638542358338527e-05, "loss": 0.9161, "step": 2385 }, { "epoch": 0.7339280221470317, "grad_norm": 0.4500248108517683, "learning_rate": 1.7600581550409385e-05, "loss": 0.8438, "step": 2386 }, { "epoch": 0.7342356198092894, "grad_norm": 0.6632376428591075, "learning_rate": 1.7562652907882197e-05, "loss": 1.0111, "step": 2387 }, { "epoch": 0.7345432174715473, "grad_norm": 0.5667878208261639, "learning_rate": 1.752475646841165e-05, "loss": 0.856, "step": 2388 }, { "epoch": 0.734850815133805, "grad_norm": 0.5664724975517318, "learning_rate": 1.7486892269620448e-05, "loss": 0.8493, "step": 2389 }, { "epoch": 0.7351584127960628, "grad_norm": 0.5124021827987192, "learning_rate": 1.7449060349099357e-05, "loss": 0.8916, "step": 2390 }, { "epoch": 0.7354660104583205, "grad_norm": 1.1335297918257101, "learning_rate": 1.7411260744406965e-05, "loss": 0.9008, "step": 2391 }, { "epoch": 0.7357736081205782, "grad_norm": 0.9978563218451737, "learning_rate": 1.737349349306991e-05, "loss": 0.8338, "step": 2392 }, { "epoch": 0.7360812057828361, "grad_norm": 0.44763653851909374, "learning_rate": 1.7335758632582643e-05, "loss": 0.8008, "step": 2393 }, { "epoch": 0.7363888034450938, "grad_norm": 0.8161401743408689, "learning_rate": 1.729805620040747e-05, "loss": 1.0194, "step": 2394 }, { "epoch": 0.7366964011073516, "grad_norm": 0.7211269465377058, "learning_rate": 1.7260386233974478e-05, "loss": 0.9569, "step": 2395 }, { "epoch": 0.7370039987696093, "grad_norm": 0.6449455008712084, "learning_rate": 1.7222748770681612e-05, "loss": 1.0658, "step": 2396 }, { "epoch": 0.7373115964318672, "grad_norm": 0.5971995130396066, "learning_rate": 1.7185143847894414e-05, "loss": 0.8801, "step": 2397 }, { "epoch": 0.7376191940941249, "grad_norm": 0.4987877599406537, "learning_rate": 1.7147571502946235e-05, "loss": 0.8532, "step": 2398 }, { "epoch": 0.7379267917563826, "grad_norm": 0.44658652850606967, "learning_rate": 1.711003177313803e-05, "loss": 0.7954, "step": 2399 }, { "epoch": 0.7382343894186404, "grad_norm": 0.5464559728425599, "learning_rate": 1.7072524695738384e-05, "loss": 0.9201, "step": 2400 }, { "epoch": 0.7385419870808981, "grad_norm": 0.4872189954538286, "learning_rate": 1.7035050307983445e-05, "loss": 1.0396, "step": 2401 }, { "epoch": 0.738849584743156, "grad_norm": 0.5616699405548825, "learning_rate": 1.6997608647076986e-05, "loss": 0.9851, "step": 2402 }, { "epoch": 0.7391571824054137, "grad_norm": 0.5909420365929299, "learning_rate": 1.6960199750190154e-05, "loss": 0.9819, "step": 2403 }, { "epoch": 0.7394647800676715, "grad_norm": 0.7780627297188639, "learning_rate": 1.6922823654461707e-05, "loss": 1.1066, "step": 2404 }, { "epoch": 0.7397723777299292, "grad_norm": 0.6598880239994029, "learning_rate": 1.688548039699776e-05, "loss": 0.8712, "step": 2405 }, { "epoch": 0.7400799753921871, "grad_norm": 0.524232593909736, "learning_rate": 1.6848170014871846e-05, "loss": 0.9315, "step": 2406 }, { "epoch": 0.7403875730544448, "grad_norm": 0.47289253867132297, "learning_rate": 1.6810892545124835e-05, "loss": 0.944, "step": 2407 }, { "epoch": 0.7406951707167025, "grad_norm": 0.4559201106898875, "learning_rate": 1.6773648024765003e-05, "loss": 0.8133, "step": 2408 }, { "epoch": 0.7410027683789603, "grad_norm": 0.6124904267935504, "learning_rate": 1.6736436490767793e-05, "loss": 0.7549, "step": 2409 }, { "epoch": 0.7413103660412181, "grad_norm": 0.5054829147272734, "learning_rate": 1.669925798007601e-05, "loss": 0.8364, "step": 2410 }, { "epoch": 0.7416179637034759, "grad_norm": 0.5849825441839364, "learning_rate": 1.6662112529599616e-05, "loss": 1.0099, "step": 2411 }, { "epoch": 0.7419255613657336, "grad_norm": 0.6382843500318921, "learning_rate": 1.6625000176215766e-05, "loss": 0.8105, "step": 2412 }, { "epoch": 0.7422331590279914, "grad_norm": 0.4175589530546637, "learning_rate": 1.6587920956768738e-05, "loss": 0.8637, "step": 2413 }, { "epoch": 0.7425407566902491, "grad_norm": 0.38489450763522814, "learning_rate": 1.6550874908069986e-05, "loss": 0.6469, "step": 2414 }, { "epoch": 0.742848354352507, "grad_norm": 0.6182188594065453, "learning_rate": 1.6513862066897907e-05, "loss": 0.8143, "step": 2415 }, { "epoch": 0.7431559520147647, "grad_norm": 0.6194073610502059, "learning_rate": 1.6476882469998063e-05, "loss": 0.9928, "step": 2416 }, { "epoch": 0.7434635496770224, "grad_norm": 0.41461306992868247, "learning_rate": 1.6439936154082923e-05, "loss": 0.9015, "step": 2417 }, { "epoch": 0.7437711473392802, "grad_norm": 0.7382390198962406, "learning_rate": 1.640302315583196e-05, "loss": 0.9406, "step": 2418 }, { "epoch": 0.744078745001538, "grad_norm": 0.5067861400360382, "learning_rate": 1.6366143511891524e-05, "loss": 0.834, "step": 2419 }, { "epoch": 0.7443863426637958, "grad_norm": 0.3932743722861524, "learning_rate": 1.6329297258874944e-05, "loss": 0.7956, "step": 2420 }, { "epoch": 0.7446939403260535, "grad_norm": 0.3932743722861524, "learning_rate": 1.6329297258874944e-05, "loss": 0.8049, "step": 2421 }, { "epoch": 0.7450015379883113, "grad_norm": 0.4472886136140399, "learning_rate": 1.6292484433362266e-05, "loss": 0.9182, "step": 2422 }, { "epoch": 0.7453091356505691, "grad_norm": 0.4651862176825588, "learning_rate": 1.6255705071900468e-05, "loss": 0.7943, "step": 2423 }, { "epoch": 0.7456167333128269, "grad_norm": 0.5650975337825108, "learning_rate": 1.621895921100325e-05, "loss": 0.9784, "step": 2424 }, { "epoch": 0.7459243309750846, "grad_norm": 0.5215851280496091, "learning_rate": 1.6182246887151055e-05, "loss": 0.8255, "step": 2425 }, { "epoch": 0.7462319286373423, "grad_norm": 0.4661117847311073, "learning_rate": 1.6145568136791023e-05, "loss": 0.9318, "step": 2426 }, { "epoch": 0.7465395262996001, "grad_norm": 0.4823533788255518, "learning_rate": 1.610892299633704e-05, "loss": 0.8266, "step": 2427 }, { "epoch": 0.7468471239618579, "grad_norm": 0.44160724730653267, "learning_rate": 1.607231150216948e-05, "loss": 0.8446, "step": 2428 }, { "epoch": 0.7471547216241157, "grad_norm": 0.6384743453781889, "learning_rate": 1.6035733690635462e-05, "loss": 0.973, "step": 2429 }, { "epoch": 0.7474623192863734, "grad_norm": 0.6301002032030746, "learning_rate": 1.5999189598048575e-05, "loss": 0.8676, "step": 2430 }, { "epoch": 0.7477699169486312, "grad_norm": 0.5733718213712867, "learning_rate": 1.5962679260688967e-05, "loss": 0.9178, "step": 2431 }, { "epoch": 0.748077514610889, "grad_norm": 0.45475124919219817, "learning_rate": 1.592620271480324e-05, "loss": 1.0039, "step": 2432 }, { "epoch": 0.7483851122731467, "grad_norm": 0.9951487350914282, "learning_rate": 1.588975999660453e-05, "loss": 0.9566, "step": 2433 }, { "epoch": 0.7486927099354045, "grad_norm": 0.7117544729166654, "learning_rate": 1.585335114227227e-05, "loss": 1.0905, "step": 2434 }, { "epoch": 0.7490003075976622, "grad_norm": 0.49735645916124055, "learning_rate": 1.5816976187952388e-05, "loss": 1.0037, "step": 2435 }, { "epoch": 0.74930790525992, "grad_norm": 0.4627614218124244, "learning_rate": 1.5780635169757087e-05, "loss": 0.9387, "step": 2436 }, { "epoch": 0.7496155029221778, "grad_norm": 0.8298862231299765, "learning_rate": 1.5744328123764894e-05, "loss": 0.9868, "step": 2437 }, { "epoch": 0.7499231005844356, "grad_norm": 0.6409841249582987, "learning_rate": 1.5708055086020607e-05, "loss": 0.8102, "step": 2438 }, { "epoch": 0.7502306982466933, "grad_norm": 0.6899842415841712, "learning_rate": 1.567181609253532e-05, "loss": 0.8256, "step": 2439 }, { "epoch": 0.750538295908951, "grad_norm": 0.6396663111736148, "learning_rate": 1.5635611179286202e-05, "loss": 0.8409, "step": 2440 }, { "epoch": 0.7508458935712089, "grad_norm": 0.4946851119963336, "learning_rate": 1.5599440382216717e-05, "loss": 0.8294, "step": 2441 }, { "epoch": 0.7511534912334666, "grad_norm": 0.6230470035052382, "learning_rate": 1.5563303737236388e-05, "loss": 0.9995, "step": 2442 }, { "epoch": 0.7514610888957244, "grad_norm": 0.4980623928357316, "learning_rate": 1.5527201280220856e-05, "loss": 0.9098, "step": 2443 }, { "epoch": 0.7517686865579821, "grad_norm": 0.6707642948255498, "learning_rate": 1.549113304701179e-05, "loss": 0.9317, "step": 2444 }, { "epoch": 0.75207628422024, "grad_norm": 0.8374470355062235, "learning_rate": 1.545509907341696e-05, "loss": 1.1589, "step": 2445 }, { "epoch": 0.7523838818824977, "grad_norm": 0.5086213460262471, "learning_rate": 1.5419099395210008e-05, "loss": 1.0377, "step": 2446 }, { "epoch": 0.7526914795447555, "grad_norm": 0.42449250611182854, "learning_rate": 1.5383134048130636e-05, "loss": 0.8929, "step": 2447 }, { "epoch": 0.7529990772070132, "grad_norm": 0.6982698491139683, "learning_rate": 1.5347203067884408e-05, "loss": 1.0118, "step": 2448 }, { "epoch": 0.7533066748692709, "grad_norm": 0.4442211809109053, "learning_rate": 1.5311306490142774e-05, "loss": 0.9465, "step": 2449 }, { "epoch": 0.7536142725315288, "grad_norm": 0.7168951820535663, "learning_rate": 1.5275444350543023e-05, "loss": 0.953, "step": 2450 }, { "epoch": 0.7539218701937865, "grad_norm": 0.5043421581984274, "learning_rate": 1.5239616684688296e-05, "loss": 1.0176, "step": 2451 }, { "epoch": 0.7542294678560443, "grad_norm": 0.5145606498196642, "learning_rate": 1.520382352814747e-05, "loss": 0.8863, "step": 2452 }, { "epoch": 0.754537065518302, "grad_norm": 0.6077629107960189, "learning_rate": 1.5168064916455166e-05, "loss": 0.8958, "step": 2453 }, { "epoch": 0.7548446631805599, "grad_norm": 0.7414716634613666, "learning_rate": 1.5132340885111707e-05, "loss": 0.9634, "step": 2454 }, { "epoch": 0.7551522608428176, "grad_norm": 0.5298939060615536, "learning_rate": 1.509665146958314e-05, "loss": 1.083, "step": 2455 }, { "epoch": 0.7554598585050754, "grad_norm": 0.5312086296583447, "learning_rate": 1.5060996705301034e-05, "loss": 0.8367, "step": 2456 }, { "epoch": 0.7557674561673331, "grad_norm": 0.4514686879065897, "learning_rate": 1.5025376627662674e-05, "loss": 0.9156, "step": 2457 }, { "epoch": 0.756075053829591, "grad_norm": 0.6257276702669858, "learning_rate": 1.4989791272030845e-05, "loss": 0.7429, "step": 2458 }, { "epoch": 0.7563826514918487, "grad_norm": 0.4155502883848992, "learning_rate": 1.4954240673733871e-05, "loss": 0.7793, "step": 2459 }, { "epoch": 0.7566902491541064, "grad_norm": 0.6003844217713313, "learning_rate": 1.4918724868065559e-05, "loss": 0.9263, "step": 2460 }, { "epoch": 0.7569978468163642, "grad_norm": 0.5793511428640158, "learning_rate": 1.4883243890285237e-05, "loss": 0.8748, "step": 2461 }, { "epoch": 0.7573054444786219, "grad_norm": 0.5067787008667021, "learning_rate": 1.4847797775617545e-05, "loss": 1.0179, "step": 2462 }, { "epoch": 0.7576130421408798, "grad_norm": 0.37617185889803906, "learning_rate": 1.4812386559252622e-05, "loss": 0.7506, "step": 2463 }, { "epoch": 0.7579206398031375, "grad_norm": 0.5058752430953852, "learning_rate": 1.4777010276345898e-05, "loss": 0.9011, "step": 2464 }, { "epoch": 0.7582282374653952, "grad_norm": 0.4168322634750321, "learning_rate": 1.4741668962018135e-05, "loss": 0.8822, "step": 2465 }, { "epoch": 0.758535835127653, "grad_norm": 0.410786887787752, "learning_rate": 1.4706362651355377e-05, "loss": 0.8595, "step": 2466 }, { "epoch": 0.7588434327899108, "grad_norm": 0.6831748439978349, "learning_rate": 1.4671091379408957e-05, "loss": 0.7715, "step": 2467 }, { "epoch": 0.7591510304521686, "grad_norm": 0.44709599449901943, "learning_rate": 1.463585518119533e-05, "loss": 0.797, "step": 2468 }, { "epoch": 0.7594586281144263, "grad_norm": 0.4877853477280182, "learning_rate": 1.4600654091696236e-05, "loss": 1.0244, "step": 2469 }, { "epoch": 0.7597662257766841, "grad_norm": 0.570133776642992, "learning_rate": 1.4565488145858496e-05, "loss": 0.9482, "step": 2470 }, { "epoch": 0.7600738234389418, "grad_norm": 0.4179043338827309, "learning_rate": 1.4530357378594056e-05, "loss": 0.741, "step": 2471 }, { "epoch": 0.7603814211011997, "grad_norm": 0.44998033197146353, "learning_rate": 1.4495261824779921e-05, "loss": 0.8908, "step": 2472 }, { "epoch": 0.7606890187634574, "grad_norm": 0.44789420130957597, "learning_rate": 1.4460201519258204e-05, "loss": 0.8139, "step": 2473 }, { "epoch": 0.7609966164257151, "grad_norm": 0.5092316225112362, "learning_rate": 1.4425176496835913e-05, "loss": 0.7625, "step": 2474 }, { "epoch": 0.7613042140879729, "grad_norm": 0.5227406609461084, "learning_rate": 1.4390186792285132e-05, "loss": 1.0565, "step": 2475 }, { "epoch": 0.7616118117502307, "grad_norm": 0.5001783843742826, "learning_rate": 1.435523244034283e-05, "loss": 0.9332, "step": 2476 }, { "epoch": 0.7619194094124885, "grad_norm": 0.5299448183758172, "learning_rate": 1.4320313475710884e-05, "loss": 1.0075, "step": 2477 }, { "epoch": 0.7622270070747462, "grad_norm": 0.6586098146194312, "learning_rate": 1.4285429933056033e-05, "loss": 1.0954, "step": 2478 }, { "epoch": 0.762534604737004, "grad_norm": 0.5892757843099711, "learning_rate": 1.4250581847009903e-05, "loss": 0.8091, "step": 2479 }, { "epoch": 0.7628422023992618, "grad_norm": 0.6116813743203354, "learning_rate": 1.421576925216882e-05, "loss": 0.9443, "step": 2480 }, { "epoch": 0.7631498000615196, "grad_norm": 0.4886558014678238, "learning_rate": 1.4180992183093971e-05, "loss": 0.9459, "step": 2481 }, { "epoch": 0.7634573977237773, "grad_norm": 0.7314453469356249, "learning_rate": 1.4146250674311224e-05, "loss": 0.8643, "step": 2482 }, { "epoch": 0.763764995386035, "grad_norm": 0.42344520402292507, "learning_rate": 1.4111544760311157e-05, "loss": 0.8624, "step": 2483 }, { "epoch": 0.7640725930482928, "grad_norm": 0.553221100857048, "learning_rate": 1.407687447554899e-05, "loss": 0.9346, "step": 2484 }, { "epoch": 0.7643801907105506, "grad_norm": 0.5057229881302967, "learning_rate": 1.4042239854444634e-05, "loss": 1.0509, "step": 2485 }, { "epoch": 0.7646877883728084, "grad_norm": 0.9807156239203318, "learning_rate": 1.4007640931382488e-05, "loss": 1.0018, "step": 2486 }, { "epoch": 0.7649953860350661, "grad_norm": 0.5740731976867858, "learning_rate": 1.397307774071162e-05, "loss": 0.876, "step": 2487 }, { "epoch": 0.7653029836973239, "grad_norm": 0.5793980823059501, "learning_rate": 1.3938550316745564e-05, "loss": 0.8315, "step": 2488 }, { "epoch": 0.7656105813595817, "grad_norm": 0.5175599044187602, "learning_rate": 1.3904058693762345e-05, "loss": 0.807, "step": 2489 }, { "epoch": 0.7659181790218395, "grad_norm": 0.658091453226421, "learning_rate": 1.3869602906004452e-05, "loss": 1.0177, "step": 2490 }, { "epoch": 0.7662257766840972, "grad_norm": 0.6514738592055563, "learning_rate": 1.3835182987678852e-05, "loss": 0.9488, "step": 2491 }, { "epoch": 0.7665333743463549, "grad_norm": 0.6617739993354672, "learning_rate": 1.380079897295678e-05, "loss": 0.9379, "step": 2492 }, { "epoch": 0.7668409720086128, "grad_norm": 0.6667250868504906, "learning_rate": 1.376645089597396e-05, "loss": 0.9046, "step": 2493 }, { "epoch": 0.7671485696708705, "grad_norm": 0.560093464626402, "learning_rate": 1.3732138790830352e-05, "loss": 1.0578, "step": 2494 }, { "epoch": 0.7674561673331283, "grad_norm": 0.5003169129339048, "learning_rate": 1.369786269159023e-05, "loss": 1.0955, "step": 2495 }, { "epoch": 0.767763764995386, "grad_norm": 0.4098781810167048, "learning_rate": 1.3663622632282108e-05, "loss": 0.7925, "step": 2496 }, { "epoch": 0.7680713626576438, "grad_norm": 0.6251687733581858, "learning_rate": 1.3629418646898767e-05, "loss": 1.0514, "step": 2497 }, { "epoch": 0.7683789603199016, "grad_norm": 0.5153974487754498, "learning_rate": 1.3595250769397128e-05, "loss": 0.6928, "step": 2498 }, { "epoch": 0.7686865579821593, "grad_norm": 0.5038656472772688, "learning_rate": 1.3561119033698277e-05, "loss": 0.7398, "step": 2499 }, { "epoch": 0.7689941556444171, "grad_norm": 0.5199728764140187, "learning_rate": 1.3527023473687417e-05, "loss": 0.9437, "step": 2500 }, { "epoch": 0.7693017533066748, "grad_norm": 0.7168709232700488, "learning_rate": 1.3492964123213842e-05, "loss": 1.0752, "step": 2501 }, { "epoch": 0.7696093509689327, "grad_norm": 0.8609149682387761, "learning_rate": 1.3458941016090886e-05, "loss": 1.0366, "step": 2502 }, { "epoch": 0.7699169486311904, "grad_norm": 0.5199358355641753, "learning_rate": 1.3424954186095934e-05, "loss": 0.7877, "step": 2503 }, { "epoch": 0.7702245462934482, "grad_norm": 0.4308905319037347, "learning_rate": 1.339100366697033e-05, "loss": 0.8145, "step": 2504 }, { "epoch": 0.7705321439557059, "grad_norm": 0.45870001021764367, "learning_rate": 1.3357089492419361e-05, "loss": 0.9458, "step": 2505 }, { "epoch": 0.7708397416179638, "grad_norm": 0.40774905903445197, "learning_rate": 1.3323211696112254e-05, "loss": 0.978, "step": 2506 }, { "epoch": 0.7711473392802215, "grad_norm": 0.39630145334444394, "learning_rate": 1.3289370311682104e-05, "loss": 0.7343, "step": 2507 }, { "epoch": 0.7714549369424792, "grad_norm": 0.5822971490752491, "learning_rate": 1.3255565372725854e-05, "loss": 1.065, "step": 2508 }, { "epoch": 0.771762534604737, "grad_norm": 0.554912976679445, "learning_rate": 1.3221796912804302e-05, "loss": 0.837, "step": 2509 }, { "epoch": 0.7720701322669947, "grad_norm": 0.8632943218355585, "learning_rate": 1.3188064965441987e-05, "loss": 0.9639, "step": 2510 }, { "epoch": 0.7723777299292526, "grad_norm": 0.5552853178853983, "learning_rate": 1.3154369564127217e-05, "loss": 0.8269, "step": 2511 }, { "epoch": 0.7726853275915103, "grad_norm": 0.44616991139162987, "learning_rate": 1.3120710742312015e-05, "loss": 0.8871, "step": 2512 }, { "epoch": 0.7729929252537681, "grad_norm": 0.5580826685794782, "learning_rate": 1.308708853341209e-05, "loss": 1.0841, "step": 2513 }, { "epoch": 0.7733005229160258, "grad_norm": 1.8300756564065148, "learning_rate": 1.3053502970806785e-05, "loss": 1.0023, "step": 2514 }, { "epoch": 0.7736081205782837, "grad_norm": 0.5081330969260784, "learning_rate": 1.3019954087839114e-05, "loss": 0.8801, "step": 2515 }, { "epoch": 0.7739157182405414, "grad_norm": 0.5717210052329909, "learning_rate": 1.2986441917815629e-05, "loss": 0.8299, "step": 2516 }, { "epoch": 0.7742233159027991, "grad_norm": 0.6567675574403131, "learning_rate": 1.2952966494006435e-05, "loss": 0.8631, "step": 2517 }, { "epoch": 0.7745309135650569, "grad_norm": 0.47922429462832694, "learning_rate": 1.2919527849645174e-05, "loss": 0.9075, "step": 2518 }, { "epoch": 0.7748385112273146, "grad_norm": 0.5792396043333071, "learning_rate": 1.2886126017928963e-05, "loss": 0.8953, "step": 2519 }, { "epoch": 0.7751461088895725, "grad_norm": 0.6460903627928963, "learning_rate": 1.2852761032018374e-05, "loss": 0.9897, "step": 2520 }, { "epoch": 0.7754537065518302, "grad_norm": 1.383437064854223, "learning_rate": 1.2819432925037416e-05, "loss": 0.831, "step": 2521 }, { "epoch": 0.775761304214088, "grad_norm": 0.5640935084549216, "learning_rate": 1.278614173007347e-05, "loss": 1.0787, "step": 2522 }, { "epoch": 0.7760689018763457, "grad_norm": 0.5349567608168122, "learning_rate": 1.2752887480177268e-05, "loss": 0.9235, "step": 2523 }, { "epoch": 0.7763764995386035, "grad_norm": 0.3415092599637833, "learning_rate": 1.271967020836285e-05, "loss": 0.8128, "step": 2524 }, { "epoch": 0.7766840972008613, "grad_norm": 0.4639854645600562, "learning_rate": 1.2686489947607616e-05, "loss": 0.8413, "step": 2525 }, { "epoch": 0.776991694863119, "grad_norm": 0.5469918543798484, "learning_rate": 1.2653346730852106e-05, "loss": 0.8463, "step": 2526 }, { "epoch": 0.7772992925253768, "grad_norm": 0.5542201195457263, "learning_rate": 1.2620240591000193e-05, "loss": 0.9166, "step": 2527 }, { "epoch": 0.7776068901876346, "grad_norm": 0.4479958849518773, "learning_rate": 1.2587171560918886e-05, "loss": 0.7204, "step": 2528 }, { "epoch": 0.7779144878498924, "grad_norm": 0.44939857763477875, "learning_rate": 1.2554139673438358e-05, "loss": 0.9196, "step": 2529 }, { "epoch": 0.7782220855121501, "grad_norm": 0.4208016508166231, "learning_rate": 1.2521144961351893e-05, "loss": 0.9567, "step": 2530 }, { "epoch": 0.7785296831744078, "grad_norm": 0.41223602145268773, "learning_rate": 1.2488187457415928e-05, "loss": 0.7717, "step": 2531 }, { "epoch": 0.7788372808366656, "grad_norm": 0.4810719330182655, "learning_rate": 1.245526719434989e-05, "loss": 0.9696, "step": 2532 }, { "epoch": 0.7791448784989234, "grad_norm": 0.7255025872219865, "learning_rate": 1.242238420483628e-05, "loss": 1.168, "step": 2533 }, { "epoch": 0.7794524761611812, "grad_norm": 0.4611519153442204, "learning_rate": 1.238953852152056e-05, "loss": 0.907, "step": 2534 }, { "epoch": 0.7797600738234389, "grad_norm": 0.5868500695200269, "learning_rate": 1.2356730177011189e-05, "loss": 0.9729, "step": 2535 }, { "epoch": 0.7800676714856967, "grad_norm": 0.5301552905896729, "learning_rate": 1.2323959203879515e-05, "loss": 0.9543, "step": 2536 }, { "epoch": 0.7803752691479545, "grad_norm": 0.4453778945771988, "learning_rate": 1.2291225634659848e-05, "loss": 0.9843, "step": 2537 }, { "epoch": 0.7806828668102123, "grad_norm": 0.49492742908727344, "learning_rate": 1.2258529501849314e-05, "loss": 0.9576, "step": 2538 }, { "epoch": 0.78099046447247, "grad_norm": 0.43875807444093434, "learning_rate": 1.2225870837907877e-05, "loss": 0.9153, "step": 2539 }, { "epoch": 0.7812980621347277, "grad_norm": 0.489729491091851, "learning_rate": 1.2193249675258322e-05, "loss": 0.8912, "step": 2540 }, { "epoch": 0.7816056597969856, "grad_norm": 0.5577369948249472, "learning_rate": 1.2160666046286184e-05, "loss": 0.8826, "step": 2541 }, { "epoch": 0.7819132574592433, "grad_norm": 0.47951999523995725, "learning_rate": 1.2128119983339743e-05, "loss": 0.9047, "step": 2542 }, { "epoch": 0.7822208551215011, "grad_norm": 0.4267611642616841, "learning_rate": 1.2095611518729999e-05, "loss": 0.8873, "step": 2543 }, { "epoch": 0.7825284527837588, "grad_norm": 0.4539095826791148, "learning_rate": 1.2063140684730612e-05, "loss": 0.791, "step": 2544 }, { "epoch": 0.7828360504460166, "grad_norm": 1.6081617116762097, "learning_rate": 1.2030707513577878e-05, "loss": 0.78, "step": 2545 }, { "epoch": 0.7831436481082744, "grad_norm": 0.4526055171253378, "learning_rate": 1.1998312037470704e-05, "loss": 0.9678, "step": 2546 }, { "epoch": 0.7834512457705322, "grad_norm": 0.5370224477219038, "learning_rate": 1.1965954288570586e-05, "loss": 0.8559, "step": 2547 }, { "epoch": 0.7837588434327899, "grad_norm": 0.6047405223365839, "learning_rate": 1.193363429900154e-05, "loss": 0.966, "step": 2548 }, { "epoch": 0.7840664410950476, "grad_norm": 0.5599644191585214, "learning_rate": 1.1901352100850138e-05, "loss": 0.8944, "step": 2549 }, { "epoch": 0.7843740387573055, "grad_norm": 0.4739997323959172, "learning_rate": 1.1869107726165397e-05, "loss": 0.8561, "step": 2550 }, { "epoch": 0.7846816364195632, "grad_norm": 0.5405962469744285, "learning_rate": 1.1836901206958795e-05, "loss": 0.938, "step": 2551 }, { "epoch": 0.784989234081821, "grad_norm": 0.48435693679750214, "learning_rate": 1.180473257520423e-05, "loss": 0.8617, "step": 2552 }, { "epoch": 0.7852968317440787, "grad_norm": 0.6794518051269396, "learning_rate": 1.1772601862837985e-05, "loss": 0.9349, "step": 2553 }, { "epoch": 0.7856044294063365, "grad_norm": 0.5158958735611596, "learning_rate": 1.1740509101758673e-05, "loss": 0.7093, "step": 2554 }, { "epoch": 0.7859120270685943, "grad_norm": 0.5449346369132815, "learning_rate": 1.170845432382729e-05, "loss": 0.9118, "step": 2555 }, { "epoch": 0.786219624730852, "grad_norm": 0.43803250403670024, "learning_rate": 1.167643756086707e-05, "loss": 0.8246, "step": 2556 }, { "epoch": 0.7865272223931098, "grad_norm": 0.5684431531441935, "learning_rate": 1.1644458844663525e-05, "loss": 1.0685, "step": 2557 }, { "epoch": 0.7868348200553675, "grad_norm": 0.4316966000528404, "learning_rate": 1.1612518206964395e-05, "loss": 0.9762, "step": 2558 }, { "epoch": 0.7871424177176254, "grad_norm": 0.5922539113538524, "learning_rate": 1.1580615679479595e-05, "loss": 0.7765, "step": 2559 }, { "epoch": 0.7874500153798831, "grad_norm": 0.5260730785583676, "learning_rate": 1.154875129388126e-05, "loss": 0.918, "step": 2560 }, { "epoch": 0.7877576130421409, "grad_norm": 0.4791538018294858, "learning_rate": 1.1516925081803603e-05, "loss": 0.9323, "step": 2561 }, { "epoch": 0.7880652107043986, "grad_norm": 0.4878232289810632, "learning_rate": 1.1485137074842966e-05, "loss": 0.8016, "step": 2562 }, { "epoch": 0.7883728083666565, "grad_norm": 0.5469621593609234, "learning_rate": 1.145338730455775e-05, "loss": 0.8557, "step": 2563 }, { "epoch": 0.7886804060289142, "grad_norm": 0.8037748443726425, "learning_rate": 1.1421675802468407e-05, "loss": 0.8007, "step": 2564 }, { "epoch": 0.788988003691172, "grad_norm": 0.614960333051487, "learning_rate": 1.139000260005737e-05, "loss": 0.9344, "step": 2565 }, { "epoch": 0.7892956013534297, "grad_norm": 0.614960333051487, "learning_rate": 1.139000260005737e-05, "loss": 0.8864, "step": 2566 }, { "epoch": 0.7896031990156874, "grad_norm": 1.8631858368072907, "learning_rate": 1.1358367728769104e-05, "loss": 0.9957, "step": 2567 }, { "epoch": 0.7899107966779453, "grad_norm": 0.5653818074571026, "learning_rate": 1.1326771220009975e-05, "loss": 0.8877, "step": 2568 }, { "epoch": 0.790218394340203, "grad_norm": 0.3859048760266398, "learning_rate": 1.1295213105148262e-05, "loss": 0.9391, "step": 2569 }, { "epoch": 0.7905259920024608, "grad_norm": 0.47089237578430965, "learning_rate": 1.1263693415514148e-05, "loss": 0.9102, "step": 2570 }, { "epoch": 0.7908335896647185, "grad_norm": 0.7023598011551709, "learning_rate": 1.1232212182399665e-05, "loss": 0.9045, "step": 2571 }, { "epoch": 0.7911411873269764, "grad_norm": 0.5256433900232064, "learning_rate": 1.120076943705864e-05, "loss": 0.8378, "step": 2572 }, { "epoch": 0.7914487849892341, "grad_norm": 0.939171316142658, "learning_rate": 1.116936521070674e-05, "loss": 0.8838, "step": 2573 }, { "epoch": 0.7917563826514918, "grad_norm": 0.6177153063995796, "learning_rate": 1.1137999534521354e-05, "loss": 0.8294, "step": 2574 }, { "epoch": 0.7920639803137496, "grad_norm": 0.42249597288884166, "learning_rate": 1.1106672439641608e-05, "loss": 0.8898, "step": 2575 }, { "epoch": 0.7923715779760074, "grad_norm": 0.5710821522611521, "learning_rate": 1.1075383957168317e-05, "loss": 0.9455, "step": 2576 }, { "epoch": 0.7926791756382652, "grad_norm": 0.9938523067935099, "learning_rate": 1.1044134118163984e-05, "loss": 1.1261, "step": 2577 }, { "epoch": 0.7929867733005229, "grad_norm": 0.8059731478669316, "learning_rate": 1.101292295365271e-05, "loss": 1.0718, "step": 2578 }, { "epoch": 0.7932943709627807, "grad_norm": 0.72177083795192, "learning_rate": 1.0981750494620258e-05, "loss": 1.044, "step": 2579 }, { "epoch": 0.7936019686250384, "grad_norm": 0.5538017262185433, "learning_rate": 1.0950616772013921e-05, "loss": 0.8631, "step": 2580 }, { "epoch": 0.7939095662872963, "grad_norm": 0.43173022254090393, "learning_rate": 1.091952181674254e-05, "loss": 0.7608, "step": 2581 }, { "epoch": 0.794217163949554, "grad_norm": 0.5296136782725638, "learning_rate": 1.0888465659676483e-05, "loss": 0.8325, "step": 2582 }, { "epoch": 0.7945247616118117, "grad_norm": 1.0511070127578925, "learning_rate": 1.085744833164759e-05, "loss": 0.9929, "step": 2583 }, { "epoch": 0.7948323592740695, "grad_norm": 0.47714484085896003, "learning_rate": 1.0826469863449141e-05, "loss": 0.868, "step": 2584 }, { "epoch": 0.7951399569363273, "grad_norm": 0.643314784600204, "learning_rate": 1.0795530285835882e-05, "loss": 1.0294, "step": 2585 }, { "epoch": 0.7954475545985851, "grad_norm": 0.7842056503141597, "learning_rate": 1.0764629629523898e-05, "loss": 1.0277, "step": 2586 }, { "epoch": 0.7957551522608428, "grad_norm": 0.6017608772700596, "learning_rate": 1.0733767925190658e-05, "loss": 1.0042, "step": 2587 }, { "epoch": 0.7960627499231006, "grad_norm": 0.6378039999904123, "learning_rate": 1.070294520347494e-05, "loss": 1.0535, "step": 2588 }, { "epoch": 0.7963703475853583, "grad_norm": 0.5328454786883485, "learning_rate": 1.0672161494976885e-05, "loss": 0.9597, "step": 2589 }, { "epoch": 0.7966779452476161, "grad_norm": 0.5366522333004363, "learning_rate": 1.0641416830257795e-05, "loss": 0.9351, "step": 2590 }, { "epoch": 0.7969855429098739, "grad_norm": 0.40752400889873397, "learning_rate": 1.061071123984031e-05, "loss": 0.8307, "step": 2591 }, { "epoch": 0.7972931405721316, "grad_norm": 0.46597499102593287, "learning_rate": 1.0580044754208229e-05, "loss": 0.9118, "step": 2592 }, { "epoch": 0.7976007382343894, "grad_norm": 0.5219974165287725, "learning_rate": 1.0549417403806538e-05, "loss": 0.8588, "step": 2593 }, { "epoch": 0.7979083358966472, "grad_norm": 0.5940250402468039, "learning_rate": 1.0518829219041354e-05, "loss": 0.9361, "step": 2594 }, { "epoch": 0.798215933558905, "grad_norm": 0.4529416949738526, "learning_rate": 1.0488280230279957e-05, "loss": 0.9049, "step": 2595 }, { "epoch": 0.7985235312211627, "grad_norm": 0.7985587459054815, "learning_rate": 1.0457770467850669e-05, "loss": 0.9359, "step": 2596 }, { "epoch": 0.7988311288834204, "grad_norm": 0.5104007009696166, "learning_rate": 1.0427299962042886e-05, "loss": 0.8134, "step": 2597 }, { "epoch": 0.7991387265456783, "grad_norm": 0.4988727709812062, "learning_rate": 1.0396868743107024e-05, "loss": 0.9282, "step": 2598 }, { "epoch": 0.799446324207936, "grad_norm": 0.490413999265363, "learning_rate": 1.0366476841254508e-05, "loss": 0.756, "step": 2599 }, { "epoch": 0.7997539218701938, "grad_norm": 0.8475767222458146, "learning_rate": 1.0336124286657711e-05, "loss": 0.8028, "step": 2600 }, { "epoch": 0.8000615195324515, "grad_norm": 0.4884606615871406, "learning_rate": 1.0305811109449976e-05, "loss": 0.8527, "step": 2601 }, { "epoch": 0.8003691171947093, "grad_norm": 0.4539466505970026, "learning_rate": 1.0275537339725528e-05, "loss": 0.9164, "step": 2602 }, { "epoch": 0.8006767148569671, "grad_norm": 0.7119210851098061, "learning_rate": 1.0245303007539464e-05, "loss": 0.9427, "step": 2603 }, { "epoch": 0.8009843125192249, "grad_norm": 0.7119210851098061, "learning_rate": 1.0245303007539464e-05, "loss": 0.9965, "step": 2604 }, { "epoch": 0.8012919101814826, "grad_norm": 0.5602982697878308, "learning_rate": 1.0215108142907741e-05, "loss": 0.861, "step": 2605 }, { "epoch": 0.8015995078437403, "grad_norm": 0.6534102962997768, "learning_rate": 1.0184952775807133e-05, "loss": 0.8495, "step": 2606 }, { "epoch": 0.8019071055059982, "grad_norm": 0.6253841115598191, "learning_rate": 1.0154836936175182e-05, "loss": 0.9065, "step": 2607 }, { "epoch": 0.8022147031682559, "grad_norm": 0.7461465321398044, "learning_rate": 1.0124760653910232e-05, "loss": 1.0179, "step": 2608 }, { "epoch": 0.8025223008305137, "grad_norm": 0.5006753303997609, "learning_rate": 1.0094723958871322e-05, "loss": 0.9913, "step": 2609 }, { "epoch": 0.8028298984927714, "grad_norm": 0.47025507507965586, "learning_rate": 1.0064726880878183e-05, "loss": 1.0496, "step": 2610 }, { "epoch": 0.8031374961550293, "grad_norm": 0.39313698753987775, "learning_rate": 1.003476944971124e-05, "loss": 0.7654, "step": 2611 }, { "epoch": 0.803445093817287, "grad_norm": 0.7855774611154176, "learning_rate": 1.0004851695111544e-05, "loss": 0.8806, "step": 2612 }, { "epoch": 0.8037526914795448, "grad_norm": 0.681620863969015, "learning_rate": 9.974973646780738e-06, "loss": 0.9205, "step": 2613 }, { "epoch": 0.8040602891418025, "grad_norm": 0.42565394229614745, "learning_rate": 9.9451353343811e-06, "loss": 0.7873, "step": 2614 }, { "epoch": 0.8043678868040602, "grad_norm": 0.49060237689559205, "learning_rate": 9.915336787535406e-06, "loss": 0.9983, "step": 2615 }, { "epoch": 0.8046754844663181, "grad_norm": 0.42086258976911683, "learning_rate": 9.885578035826975e-06, "loss": 0.7705, "step": 2616 }, { "epoch": 0.8049830821285758, "grad_norm": 0.4852015380979352, "learning_rate": 9.855859108799598e-06, "loss": 0.8217, "step": 2617 }, { "epoch": 0.8052906797908336, "grad_norm": 0.460999691380805, "learning_rate": 9.826180035957593e-06, "loss": 0.8663, "step": 2618 }, { "epoch": 0.8055982774530913, "grad_norm": 0.6233679216113108, "learning_rate": 9.796540846765606e-06, "loss": 1.0475, "step": 2619 }, { "epoch": 0.8059058751153492, "grad_norm": 0.5289260723665723, "learning_rate": 9.766941570648785e-06, "loss": 0.7843, "step": 2620 }, { "epoch": 0.8062134727776069, "grad_norm": 0.4984813357027313, "learning_rate": 9.737382236992609e-06, "loss": 0.9038, "step": 2621 }, { "epoch": 0.8065210704398647, "grad_norm": 0.6195979112191168, "learning_rate": 9.707862875142898e-06, "loss": 0.9375, "step": 2622 }, { "epoch": 0.8068286681021224, "grad_norm": 0.5229384769407968, "learning_rate": 9.678383514405798e-06, "loss": 0.8784, "step": 2623 }, { "epoch": 0.8071362657643802, "grad_norm": 0.47470502797111064, "learning_rate": 9.648944184047776e-06, "loss": 0.7076, "step": 2624 }, { "epoch": 0.807443863426638, "grad_norm": 0.628991883666002, "learning_rate": 9.619544913295474e-06, "loss": 0.8699, "step": 2625 }, { "epoch": 0.8077514610888957, "grad_norm": 0.40604712978093566, "learning_rate": 9.59018573133586e-06, "loss": 0.8624, "step": 2626 }, { "epoch": 0.8080590587511535, "grad_norm": 0.585556482113589, "learning_rate": 9.560866667316043e-06, "loss": 0.7549, "step": 2627 }, { "epoch": 0.8083666564134112, "grad_norm": 0.5004210559221485, "learning_rate": 9.531587750343318e-06, "loss": 0.9951, "step": 2628 }, { "epoch": 0.8086742540756691, "grad_norm": 0.389864266191901, "learning_rate": 9.502349009485112e-06, "loss": 0.9004, "step": 2629 }, { "epoch": 0.8089818517379268, "grad_norm": 0.49701691121433517, "learning_rate": 9.473150473769022e-06, "loss": 0.8737, "step": 2630 }, { "epoch": 0.8092894494001845, "grad_norm": 0.4668740013011649, "learning_rate": 9.443992172182626e-06, "loss": 0.9939, "step": 2631 }, { "epoch": 0.8095970470624423, "grad_norm": 0.5487032914001688, "learning_rate": 9.414874133673669e-06, "loss": 0.7287, "step": 2632 }, { "epoch": 0.8099046447247001, "grad_norm": 0.505331497026104, "learning_rate": 9.385796387149847e-06, "loss": 0.8921, "step": 2633 }, { "epoch": 0.8102122423869579, "grad_norm": 0.4917056972710873, "learning_rate": 9.356758961478901e-06, "loss": 0.9152, "step": 2634 }, { "epoch": 0.8105198400492156, "grad_norm": 0.6176621432500902, "learning_rate": 9.32776188548849e-06, "loss": 0.9697, "step": 2635 }, { "epoch": 0.8108274377114734, "grad_norm": 0.4123940655060963, "learning_rate": 9.298805187966303e-06, "loss": 0.7672, "step": 2636 }, { "epoch": 0.8111350353737311, "grad_norm": 0.707974365349879, "learning_rate": 9.269888897659823e-06, "loss": 0.9724, "step": 2637 }, { "epoch": 0.811442633035989, "grad_norm": 0.3837254929543221, "learning_rate": 9.24101304327653e-06, "loss": 0.8938, "step": 2638 }, { "epoch": 0.8117502306982467, "grad_norm": 0.7379308433242916, "learning_rate": 9.2121776534837e-06, "loss": 1.0982, "step": 2639 }, { "epoch": 0.8120578283605044, "grad_norm": 0.45999106392387745, "learning_rate": 9.183382756908437e-06, "loss": 0.98, "step": 2640 }, { "epoch": 0.8123654260227622, "grad_norm": 0.4920469662450564, "learning_rate": 9.15462838213766e-06, "loss": 0.8739, "step": 2641 }, { "epoch": 0.81267302368502, "grad_norm": 0.6065163183483828, "learning_rate": 9.12591455771808e-06, "loss": 1.1506, "step": 2642 }, { "epoch": 0.8129806213472778, "grad_norm": 0.4458723006700617, "learning_rate": 9.097241312156073e-06, "loss": 0.8224, "step": 2643 }, { "epoch": 0.8132882190095355, "grad_norm": 0.533856248782194, "learning_rate": 9.068608673917833e-06, "loss": 0.8654, "step": 2644 }, { "epoch": 0.8135958166717933, "grad_norm": 0.6280403159528968, "learning_rate": 9.040016671429169e-06, "loss": 0.9241, "step": 2645 }, { "epoch": 0.8139034143340511, "grad_norm": 0.699540148943052, "learning_rate": 9.011465333075563e-06, "loss": 0.9167, "step": 2646 }, { "epoch": 0.8142110119963089, "grad_norm": 0.4520928793814208, "learning_rate": 8.982954687202127e-06, "loss": 0.7846, "step": 2647 }, { "epoch": 0.8145186096585666, "grad_norm": 0.5187609203805875, "learning_rate": 8.954484762113608e-06, "loss": 0.7619, "step": 2648 }, { "epoch": 0.8148262073208243, "grad_norm": 0.4870702470471351, "learning_rate": 8.92605558607425e-06, "loss": 0.8535, "step": 2649 }, { "epoch": 0.8151338049830821, "grad_norm": 0.6225636487331379, "learning_rate": 8.897667187307929e-06, "loss": 1.0128, "step": 2650 }, { "epoch": 0.8154414026453399, "grad_norm": 0.5642827617403423, "learning_rate": 8.869319593997982e-06, "loss": 0.9118, "step": 2651 }, { "epoch": 0.8157490003075977, "grad_norm": 0.494695580408648, "learning_rate": 8.841012834287255e-06, "loss": 1.0412, "step": 2652 }, { "epoch": 0.8160565979698554, "grad_norm": 0.49922898874095495, "learning_rate": 8.812746936278038e-06, "loss": 1.0485, "step": 2653 }, { "epoch": 0.8163641956321132, "grad_norm": 0.515634451723186, "learning_rate": 8.784521928032102e-06, "loss": 0.889, "step": 2654 }, { "epoch": 0.816671793294371, "grad_norm": 0.6080176141414741, "learning_rate": 8.756337837570545e-06, "loss": 0.8295, "step": 2655 }, { "epoch": 0.8169793909566287, "grad_norm": 0.5132627487730819, "learning_rate": 8.728194692873926e-06, "loss": 0.9494, "step": 2656 }, { "epoch": 0.8172869886188865, "grad_norm": 0.4548754132290717, "learning_rate": 8.700092521882098e-06, "loss": 0.8401, "step": 2657 }, { "epoch": 0.8175945862811442, "grad_norm": 0.5526490403213705, "learning_rate": 8.67203135249426e-06, "loss": 0.979, "step": 2658 }, { "epoch": 0.8179021839434021, "grad_norm": 0.5909483810566696, "learning_rate": 8.64401121256888e-06, "loss": 0.9079, "step": 2659 }, { "epoch": 0.8182097816056598, "grad_norm": 0.5776633764660893, "learning_rate": 8.61603212992375e-06, "loss": 1.025, "step": 2660 }, { "epoch": 0.8185173792679176, "grad_norm": 0.49562680886531596, "learning_rate": 8.588094132335829e-06, "loss": 0.8378, "step": 2661 }, { "epoch": 0.8188249769301753, "grad_norm": 0.7142810765789798, "learning_rate": 8.56019724754134e-06, "loss": 0.9382, "step": 2662 }, { "epoch": 0.819132574592433, "grad_norm": 0.7211165623994525, "learning_rate": 8.532341503235652e-06, "loss": 0.8526, "step": 2663 }, { "epoch": 0.8194401722546909, "grad_norm": 0.5948112113661636, "learning_rate": 8.504526927073359e-06, "loss": 0.8948, "step": 2664 }, { "epoch": 0.8197477699169486, "grad_norm": 0.5730457113448234, "learning_rate": 8.476753546668076e-06, "loss": 1.014, "step": 2665 }, { "epoch": 0.8200553675792064, "grad_norm": 0.5586462441387343, "learning_rate": 8.449021389592627e-06, "loss": 1.1265, "step": 2666 }, { "epoch": 0.8203629652414641, "grad_norm": 0.4461594845244729, "learning_rate": 8.421330483378837e-06, "loss": 0.7524, "step": 2667 }, { "epoch": 0.820670562903722, "grad_norm": 0.39358174854313144, "learning_rate": 8.393680855517616e-06, "loss": 0.6749, "step": 2668 }, { "epoch": 0.8209781605659797, "grad_norm": 0.5238335039829677, "learning_rate": 8.366072533458868e-06, "loss": 0.9373, "step": 2669 }, { "epoch": 0.8212857582282375, "grad_norm": 0.6032642113709517, "learning_rate": 8.338505544611536e-06, "loss": 0.9057, "step": 2670 }, { "epoch": 0.8215933558904952, "grad_norm": 0.539479352157989, "learning_rate": 8.31097991634346e-06, "loss": 0.8429, "step": 2671 }, { "epoch": 0.8219009535527529, "grad_norm": 0.6207133477744745, "learning_rate": 8.283495675981473e-06, "loss": 0.9477, "step": 2672 }, { "epoch": 0.8222085512150108, "grad_norm": 0.7705080270436137, "learning_rate": 8.256052850811303e-06, "loss": 1.0021, "step": 2673 }, { "epoch": 0.8225161488772685, "grad_norm": 0.6199641356329553, "learning_rate": 8.228651468077548e-06, "loss": 0.8783, "step": 2674 }, { "epoch": 0.8228237465395263, "grad_norm": 0.7169338669041996, "learning_rate": 8.20129155498367e-06, "loss": 0.9768, "step": 2675 }, { "epoch": 0.823131344201784, "grad_norm": 0.407726991282633, "learning_rate": 8.173973138692004e-06, "loss": 0.7775, "step": 2676 }, { "epoch": 0.8234389418640419, "grad_norm": 0.4741378435082955, "learning_rate": 8.146696246323593e-06, "loss": 0.8731, "step": 2677 }, { "epoch": 0.8237465395262996, "grad_norm": 0.4153814215287734, "learning_rate": 8.119460904958354e-06, "loss": 0.9656, "step": 2678 }, { "epoch": 0.8240541371885574, "grad_norm": 0.6402346788110803, "learning_rate": 8.092267141634897e-06, "loss": 0.9852, "step": 2679 }, { "epoch": 0.8243617348508151, "grad_norm": 0.5648720793585837, "learning_rate": 8.065114983350569e-06, "loss": 0.9616, "step": 2680 }, { "epoch": 0.824669332513073, "grad_norm": 0.4658509583503347, "learning_rate": 8.038004457061398e-06, "loss": 0.6573, "step": 2681 }, { "epoch": 0.8249769301753307, "grad_norm": 0.43621597002913426, "learning_rate": 8.010935589682134e-06, "loss": 0.8487, "step": 2682 }, { "epoch": 0.8252845278375884, "grad_norm": 0.9512133953769951, "learning_rate": 7.983908408086076e-06, "loss": 1.091, "step": 2683 }, { "epoch": 0.8255921254998462, "grad_norm": 0.6309183960042797, "learning_rate": 7.956922939105237e-06, "loss": 1.0136, "step": 2684 }, { "epoch": 0.8258997231621039, "grad_norm": 0.4913274913308855, "learning_rate": 7.929979209530153e-06, "loss": 0.9759, "step": 2685 }, { "epoch": 0.8262073208243618, "grad_norm": 0.41062640183603316, "learning_rate": 7.903077246109952e-06, "loss": 0.833, "step": 2686 }, { "epoch": 0.8265149184866195, "grad_norm": 0.5668651578337105, "learning_rate": 7.876217075552267e-06, "loss": 0.9415, "step": 2687 }, { "epoch": 0.8268225161488773, "grad_norm": 0.8736096965380784, "learning_rate": 7.849398724523305e-06, "loss": 1.1376, "step": 2688 }, { "epoch": 0.827130113811135, "grad_norm": 0.5350821539190334, "learning_rate": 7.822622219647658e-06, "loss": 0.7592, "step": 2689 }, { "epoch": 0.8274377114733928, "grad_norm": 0.5311480855458091, "learning_rate": 7.795887587508466e-06, "loss": 0.9601, "step": 2690 }, { "epoch": 0.8277453091356506, "grad_norm": 0.5283284551798829, "learning_rate": 7.769194854647244e-06, "loss": 0.958, "step": 2691 }, { "epoch": 0.8280529067979083, "grad_norm": 0.6130611478202773, "learning_rate": 7.742544047563937e-06, "loss": 1.0305, "step": 2692 }, { "epoch": 0.8283605044601661, "grad_norm": 0.4915038147483201, "learning_rate": 7.715935192716839e-06, "loss": 0.8751, "step": 2693 }, { "epoch": 0.8286681021224239, "grad_norm": 0.5107561496734317, "learning_rate": 7.689368316522643e-06, "loss": 0.9964, "step": 2694 }, { "epoch": 0.8289756997846817, "grad_norm": 0.43027852866950905, "learning_rate": 7.662843445356305e-06, "loss": 0.7407, "step": 2695 }, { "epoch": 0.8292832974469394, "grad_norm": 0.4560771629831086, "learning_rate": 7.63636060555113e-06, "loss": 0.9226, "step": 2696 }, { "epoch": 0.8295908951091971, "grad_norm": 0.46960087288420455, "learning_rate": 7.60991982339867e-06, "loss": 0.9168, "step": 2697 }, { "epoch": 0.8298984927714549, "grad_norm": 0.359481701645474, "learning_rate": 7.5835211251487294e-06, "loss": 0.9297, "step": 2698 }, { "epoch": 0.8302060904337127, "grad_norm": 0.44977893799113833, "learning_rate": 7.557164537009326e-06, "loss": 0.9398, "step": 2699 }, { "epoch": 0.8305136880959705, "grad_norm": 0.5167374292912624, "learning_rate": 7.530850085146701e-06, "loss": 0.8705, "step": 2700 }, { "epoch": 0.8308212857582282, "grad_norm": 0.4562137874731406, "learning_rate": 7.504577795685202e-06, "loss": 0.8191, "step": 2701 }, { "epoch": 0.831128883420486, "grad_norm": 0.4544927174717758, "learning_rate": 7.478347694707394e-06, "loss": 0.935, "step": 2702 }, { "epoch": 0.8314364810827438, "grad_norm": 0.4339237995180321, "learning_rate": 7.452159808253906e-06, "loss": 0.8667, "step": 2703 }, { "epoch": 0.8317440787450016, "grad_norm": 0.4655308740758926, "learning_rate": 7.426014162323469e-06, "loss": 0.834, "step": 2704 }, { "epoch": 0.8320516764072593, "grad_norm": 0.8275476570158146, "learning_rate": 7.399910782872882e-06, "loss": 0.9467, "step": 2705 }, { "epoch": 0.832359274069517, "grad_norm": 0.47504084271361896, "learning_rate": 7.373849695817009e-06, "loss": 0.8732, "step": 2706 }, { "epoch": 0.8326668717317749, "grad_norm": 0.9080639758941594, "learning_rate": 7.34783092702866e-06, "loss": 1.2088, "step": 2707 }, { "epoch": 0.8329744693940326, "grad_norm": 0.44732710756368843, "learning_rate": 7.321854502338704e-06, "loss": 0.8697, "step": 2708 }, { "epoch": 0.8332820670562904, "grad_norm": 0.4063450022118725, "learning_rate": 7.295920447535931e-06, "loss": 0.7706, "step": 2709 }, { "epoch": 0.8335896647185481, "grad_norm": 0.6867032126799272, "learning_rate": 7.270028788367083e-06, "loss": 0.976, "step": 2710 }, { "epoch": 0.8338972623808059, "grad_norm": 0.6012598137269272, "learning_rate": 7.244179550536795e-06, "loss": 0.9254, "step": 2711 }, { "epoch": 0.8342048600430637, "grad_norm": 0.43051582082952955, "learning_rate": 7.218372759707626e-06, "loss": 0.8561, "step": 2712 }, { "epoch": 0.8345124577053215, "grad_norm": 0.5050872987071932, "learning_rate": 7.1926084414999265e-06, "loss": 0.9812, "step": 2713 }, { "epoch": 0.8348200553675792, "grad_norm": 1.008150579308234, "learning_rate": 7.166886621491958e-06, "loss": 1.084, "step": 2714 }, { "epoch": 0.8351276530298369, "grad_norm": 0.8236709299886846, "learning_rate": 7.1412073252197455e-06, "loss": 1.1066, "step": 2715 }, { "epoch": 0.8354352506920948, "grad_norm": 0.5375374965236996, "learning_rate": 7.115570578177105e-06, "loss": 0.815, "step": 2716 }, { "epoch": 0.8357428483543525, "grad_norm": 0.5945449052522407, "learning_rate": 7.089976405815602e-06, "loss": 0.9246, "step": 2717 }, { "epoch": 0.8360504460166103, "grad_norm": 0.8336093885845082, "learning_rate": 7.064424833544581e-06, "loss": 0.982, "step": 2718 }, { "epoch": 0.836358043678868, "grad_norm": 0.6242224007153153, "learning_rate": 7.038915886731007e-06, "loss": 1.0759, "step": 2719 }, { "epoch": 0.8366656413411258, "grad_norm": 0.5930714939209206, "learning_rate": 7.013449590699622e-06, "loss": 0.9953, "step": 2720 }, { "epoch": 0.8369732390033836, "grad_norm": 0.7815197272109995, "learning_rate": 6.988025970732765e-06, "loss": 1.0357, "step": 2721 }, { "epoch": 0.8372808366656413, "grad_norm": 0.5598197358779637, "learning_rate": 6.962645052070432e-06, "loss": 1.0609, "step": 2722 }, { "epoch": 0.8375884343278991, "grad_norm": 0.5015911180091032, "learning_rate": 6.937306859910198e-06, "loss": 0.9614, "step": 2723 }, { "epoch": 0.8378960319901568, "grad_norm": 0.47621796348321316, "learning_rate": 6.912011419407282e-06, "loss": 0.9346, "step": 2724 }, { "epoch": 0.8382036296524147, "grad_norm": 0.4230633783105728, "learning_rate": 6.8867587556743715e-06, "loss": 0.8707, "step": 2725 }, { "epoch": 0.8385112273146724, "grad_norm": 0.5428277206348116, "learning_rate": 6.861548893781766e-06, "loss": 0.9789, "step": 2726 }, { "epoch": 0.8388188249769302, "grad_norm": 0.6917482199640896, "learning_rate": 6.836381858757229e-06, "loss": 0.9749, "step": 2727 }, { "epoch": 0.8391264226391879, "grad_norm": 0.5599991323655361, "learning_rate": 6.811257675586019e-06, "loss": 0.8508, "step": 2728 }, { "epoch": 0.8394340203014458, "grad_norm": 0.610222246216994, "learning_rate": 6.78617636921084e-06, "loss": 0.8343, "step": 2729 }, { "epoch": 0.8397416179637035, "grad_norm": 0.4445993008252409, "learning_rate": 6.761137964531877e-06, "loss": 0.8836, "step": 2730 }, { "epoch": 0.8400492156259612, "grad_norm": 0.4385658130931563, "learning_rate": 6.7361424864066325e-06, "loss": 0.8686, "step": 2731 }, { "epoch": 0.840356813288219, "grad_norm": 0.4070010285386065, "learning_rate": 6.7111899596500824e-06, "loss": 0.7895, "step": 2732 }, { "epoch": 0.8406644109504767, "grad_norm": 0.7224398756704782, "learning_rate": 6.686280409034501e-06, "loss": 0.9382, "step": 2733 }, { "epoch": 0.8409720086127346, "grad_norm": 0.55800346493496, "learning_rate": 6.6614138592895614e-06, "loss": 0.9841, "step": 2734 }, { "epoch": 0.8412796062749923, "grad_norm": 0.42849404669970065, "learning_rate": 6.636590335102155e-06, "loss": 0.8321, "step": 2735 }, { "epoch": 0.8415872039372501, "grad_norm": 0.5770708584397329, "learning_rate": 6.611809861116542e-06, "loss": 0.9609, "step": 2736 }, { "epoch": 0.8418948015995078, "grad_norm": 0.4812209791425042, "learning_rate": 6.587072461934202e-06, "loss": 0.7829, "step": 2737 }, { "epoch": 0.8422023992617657, "grad_norm": 0.5510996957068036, "learning_rate": 6.562378162113858e-06, "loss": 0.9035, "step": 2738 }, { "epoch": 0.8425099969240234, "grad_norm": 0.44336352424475317, "learning_rate": 6.537726986171438e-06, "loss": 0.7685, "step": 2739 }, { "epoch": 0.8428175945862811, "grad_norm": 0.5413355783767492, "learning_rate": 6.513118958580095e-06, "loss": 0.8103, "step": 2740 }, { "epoch": 0.8431251922485389, "grad_norm": 0.6305993715289777, "learning_rate": 6.4885541037700725e-06, "loss": 0.9001, "step": 2741 }, { "epoch": 0.8434327899107967, "grad_norm": 0.5085527163506426, "learning_rate": 6.464032446128837e-06, "loss": 0.9086, "step": 2742 }, { "epoch": 0.8437403875730545, "grad_norm": 0.7724639545946386, "learning_rate": 6.439554010000909e-06, "loss": 0.7784, "step": 2743 }, { "epoch": 0.8440479852353122, "grad_norm": 0.5807093881960734, "learning_rate": 6.415118819687921e-06, "loss": 0.872, "step": 2744 }, { "epoch": 0.84435558289757, "grad_norm": 0.4697318059585599, "learning_rate": 6.390726899448568e-06, "loss": 0.7826, "step": 2745 }, { "epoch": 0.8446631805598277, "grad_norm": 0.43106616926412444, "learning_rate": 6.366378273498619e-06, "loss": 0.7956, "step": 2746 }, { "epoch": 0.8449707782220856, "grad_norm": 0.9325650053952717, "learning_rate": 6.342072966010787e-06, "loss": 1.0706, "step": 2747 }, { "epoch": 0.8452783758843433, "grad_norm": 0.8604710300457059, "learning_rate": 6.317811001114865e-06, "loss": 0.9988, "step": 2748 }, { "epoch": 0.845585973546601, "grad_norm": 0.5353069578364246, "learning_rate": 6.293592402897558e-06, "loss": 0.9446, "step": 2749 }, { "epoch": 0.8458935712088588, "grad_norm": 0.42117111984072353, "learning_rate": 6.269417195402543e-06, "loss": 0.9821, "step": 2750 }, { "epoch": 0.8462011688711166, "grad_norm": 0.653535080958168, "learning_rate": 6.245285402630396e-06, "loss": 1.1009, "step": 2751 }, { "epoch": 0.8465087665333744, "grad_norm": 0.6225277496004592, "learning_rate": 6.221197048538652e-06, "loss": 0.9199, "step": 2752 }, { "epoch": 0.8468163641956321, "grad_norm": 0.504734824174126, "learning_rate": 6.197152157041625e-06, "loss": 0.9147, "step": 2753 }, { "epoch": 0.8471239618578899, "grad_norm": 0.4729817673953827, "learning_rate": 6.173150752010571e-06, "loss": 0.9227, "step": 2754 }, { "epoch": 0.8474315595201476, "grad_norm": 0.5031842613755693, "learning_rate": 6.149192857273528e-06, "loss": 0.6936, "step": 2755 }, { "epoch": 0.8477391571824054, "grad_norm": 0.4067366507883518, "learning_rate": 6.1252784966153385e-06, "loss": 0.9323, "step": 2756 }, { "epoch": 0.8480467548446632, "grad_norm": 0.45234743445331255, "learning_rate": 6.1014076937776275e-06, "loss": 0.8979, "step": 2757 }, { "epoch": 0.8483543525069209, "grad_norm": 0.6699360821582724, "learning_rate": 6.077580472458816e-06, "loss": 0.9251, "step": 2758 }, { "epoch": 0.8486619501691787, "grad_norm": 0.47398261764675315, "learning_rate": 6.053796856313976e-06, "loss": 0.7606, "step": 2759 }, { "epoch": 0.8489695478314365, "grad_norm": 0.7477035549011621, "learning_rate": 6.030056868954975e-06, "loss": 0.8459, "step": 2760 }, { "epoch": 0.8492771454936943, "grad_norm": 0.47100499098812376, "learning_rate": 6.006360533950311e-06, "loss": 0.8201, "step": 2761 }, { "epoch": 0.849584743155952, "grad_norm": 0.5111405625679069, "learning_rate": 5.982707874825178e-06, "loss": 0.8915, "step": 2762 }, { "epoch": 0.8498923408182097, "grad_norm": 0.4526512663313659, "learning_rate": 5.959098915061373e-06, "loss": 0.7317, "step": 2763 }, { "epoch": 0.8501999384804676, "grad_norm": 0.8882783970138751, "learning_rate": 5.935533678097371e-06, "loss": 0.8004, "step": 2764 }, { "epoch": 0.8505075361427253, "grad_norm": 0.6403189495653233, "learning_rate": 5.912012187328148e-06, "loss": 1.0418, "step": 2765 }, { "epoch": 0.8508151338049831, "grad_norm": 0.7365710132979341, "learning_rate": 5.8885344661053446e-06, "loss": 0.887, "step": 2766 }, { "epoch": 0.8511227314672408, "grad_norm": 0.6578144431468416, "learning_rate": 5.8651005377370895e-06, "loss": 0.9574, "step": 2767 }, { "epoch": 0.8514303291294986, "grad_norm": 0.40546457174558603, "learning_rate": 5.8417104254880506e-06, "loss": 0.8336, "step": 2768 }, { "epoch": 0.8517379267917564, "grad_norm": 0.8269111611048657, "learning_rate": 5.818364152579386e-06, "loss": 1.1871, "step": 2769 }, { "epoch": 0.8520455244540142, "grad_norm": 0.49247856408048607, "learning_rate": 5.795061742188778e-06, "loss": 0.8904, "step": 2770 }, { "epoch": 0.8523531221162719, "grad_norm": 0.8636370360481681, "learning_rate": 5.771803217450283e-06, "loss": 0.9511, "step": 2771 }, { "epoch": 0.8526607197785296, "grad_norm": 0.770799702939792, "learning_rate": 5.748588601454464e-06, "loss": 1.0343, "step": 2772 }, { "epoch": 0.8529683174407875, "grad_norm": 0.8514675899951972, "learning_rate": 5.725417917248243e-06, "loss": 0.908, "step": 2773 }, { "epoch": 0.8532759151030452, "grad_norm": 0.5223605287655815, "learning_rate": 5.7022911878349576e-06, "loss": 1.0189, "step": 2774 }, { "epoch": 0.853583512765303, "grad_norm": 0.5756007099526025, "learning_rate": 5.679208436174282e-06, "loss": 0.9404, "step": 2775 }, { "epoch": 0.8538911104275607, "grad_norm": 0.7903540044612427, "learning_rate": 5.6561696851822664e-06, "loss": 1.172, "step": 2776 }, { "epoch": 0.8541987080898186, "grad_norm": 0.5730232381035102, "learning_rate": 5.63317495773123e-06, "loss": 0.8773, "step": 2777 }, { "epoch": 0.8545063057520763, "grad_norm": 0.5216245584857715, "learning_rate": 5.610224276649828e-06, "loss": 0.809, "step": 2778 }, { "epoch": 0.854813903414334, "grad_norm": 1.0120197604255996, "learning_rate": 5.587317664722974e-06, "loss": 0.8863, "step": 2779 }, { "epoch": 0.8551215010765918, "grad_norm": 0.44775353614754765, "learning_rate": 5.5644551446918215e-06, "loss": 0.9398, "step": 2780 }, { "epoch": 0.8554290987388495, "grad_norm": 0.49482281983979876, "learning_rate": 5.541636739253752e-06, "loss": 0.8549, "step": 2781 }, { "epoch": 0.8557366964011074, "grad_norm": 0.605682279739131, "learning_rate": 5.518862471062386e-06, "loss": 1.0895, "step": 2782 }, { "epoch": 0.8560442940633651, "grad_norm": 0.5440920993069116, "learning_rate": 5.496132362727452e-06, "loss": 0.9285, "step": 2783 }, { "epoch": 0.8563518917256229, "grad_norm": 0.4128021281106642, "learning_rate": 5.473446436814916e-06, "loss": 1.1019, "step": 2784 }, { "epoch": 0.8566594893878806, "grad_norm": 0.43244729617070027, "learning_rate": 5.450804715846835e-06, "loss": 0.9913, "step": 2785 }, { "epoch": 0.8569670870501385, "grad_norm": 0.5604550344503377, "learning_rate": 5.4282072223013905e-06, "loss": 0.7847, "step": 2786 }, { "epoch": 0.8572746847123962, "grad_norm": 0.8140700167470032, "learning_rate": 5.405653978612857e-06, "loss": 0.786, "step": 2787 }, { "epoch": 0.857582282374654, "grad_norm": 0.502067559433528, "learning_rate": 5.383145007171592e-06, "loss": 0.894, "step": 2788 }, { "epoch": 0.8578898800369117, "grad_norm": 0.43299817188430406, "learning_rate": 5.360680330323964e-06, "loss": 0.8375, "step": 2789 }, { "epoch": 0.8581974776991694, "grad_norm": 0.503073823512862, "learning_rate": 5.338259970372411e-06, "loss": 0.971, "step": 2790 }, { "epoch": 0.8585050753614273, "grad_norm": 0.9316298612009806, "learning_rate": 5.315883949575351e-06, "loss": 1.0725, "step": 2791 }, { "epoch": 0.858812673023685, "grad_norm": 0.5028962696778186, "learning_rate": 5.29355229014718e-06, "loss": 0.8565, "step": 2792 }, { "epoch": 0.8591202706859428, "grad_norm": 0.4944641383900408, "learning_rate": 5.271265014258248e-06, "loss": 0.962, "step": 2793 }, { "epoch": 0.8594278683482005, "grad_norm": 0.5214357015585533, "learning_rate": 5.2490221440348875e-06, "loss": 0.8438, "step": 2794 }, { "epoch": 0.8597354660104584, "grad_norm": 0.4913462545881143, "learning_rate": 5.226823701559275e-06, "loss": 1.1002, "step": 2795 }, { "epoch": 0.8600430636727161, "grad_norm": 0.4672141462125166, "learning_rate": 5.204669708869536e-06, "loss": 0.8827, "step": 2796 }, { "epoch": 0.8603506613349738, "grad_norm": 0.47718415835771477, "learning_rate": 5.182560187959645e-06, "loss": 0.9181, "step": 2797 }, { "epoch": 0.8606582589972316, "grad_norm": 0.5030817189824095, "learning_rate": 5.160495160779427e-06, "loss": 1.0013, "step": 2798 }, { "epoch": 0.8609658566594894, "grad_norm": 0.7142119228385109, "learning_rate": 5.138474649234526e-06, "loss": 0.9356, "step": 2799 }, { "epoch": 0.8612734543217472, "grad_norm": 0.8272565220638398, "learning_rate": 5.1164986751864375e-06, "loss": 1.1148, "step": 2800 }, { "epoch": 0.8615810519840049, "grad_norm": 0.560331077465968, "learning_rate": 5.094567260452359e-06, "loss": 0.7678, "step": 2801 }, { "epoch": 0.8618886496462627, "grad_norm": 0.38712782882613717, "learning_rate": 5.0726804268053315e-06, "loss": 0.7943, "step": 2802 }, { "epoch": 0.8621962473085204, "grad_norm": 0.7943416728417263, "learning_rate": 5.050838195974078e-06, "loss": 0.8646, "step": 2803 }, { "epoch": 0.8625038449707783, "grad_norm": 0.46290249305777714, "learning_rate": 5.029040589643108e-06, "loss": 0.78, "step": 2804 }, { "epoch": 0.862811442633036, "grad_norm": 0.730053860582531, "learning_rate": 5.0072876294525335e-06, "loss": 0.9356, "step": 2805 }, { "epoch": 0.8631190402952937, "grad_norm": 0.547226940530197, "learning_rate": 4.9855793369982366e-06, "loss": 0.9763, "step": 2806 }, { "epoch": 0.8634266379575515, "grad_norm": 0.5488944552123469, "learning_rate": 4.963915733831698e-06, "loss": 0.8593, "step": 2807 }, { "epoch": 0.8637342356198093, "grad_norm": 0.4842188876546444, "learning_rate": 4.942296841460042e-06, "loss": 0.9437, "step": 2808 }, { "epoch": 0.8640418332820671, "grad_norm": 0.5139727748884757, "learning_rate": 4.920722681346013e-06, "loss": 0.8923, "step": 2809 }, { "epoch": 0.8643494309443248, "grad_norm": 0.6912967222621184, "learning_rate": 4.899193274907971e-06, "loss": 0.7865, "step": 2810 }, { "epoch": 0.8646570286065826, "grad_norm": 0.489685634981069, "learning_rate": 4.877708643519768e-06, "loss": 1.0213, "step": 2811 }, { "epoch": 0.8649646262688404, "grad_norm": 0.5183280645577654, "learning_rate": 4.8562688085109e-06, "loss": 0.7688, "step": 2812 }, { "epoch": 0.8652722239310982, "grad_norm": 0.4393992464163035, "learning_rate": 4.834873791166322e-06, "loss": 0.8503, "step": 2813 }, { "epoch": 0.8655798215933559, "grad_norm": 0.5925425761396504, "learning_rate": 4.813523612726528e-06, "loss": 0.9787, "step": 2814 }, { "epoch": 0.8658874192556136, "grad_norm": 0.6000433622875568, "learning_rate": 4.7922182943874686e-06, "loss": 0.9111, "step": 2815 }, { "epoch": 0.8661950169178714, "grad_norm": 0.4782338073400558, "learning_rate": 4.770957857300612e-06, "loss": 0.907, "step": 2816 }, { "epoch": 0.8665026145801292, "grad_norm": 0.45561232840107696, "learning_rate": 4.749742322572798e-06, "loss": 0.8463, "step": 2817 }, { "epoch": 0.866810212242387, "grad_norm": 0.646820732134986, "learning_rate": 4.728571711266344e-06, "loss": 0.9932, "step": 2818 }, { "epoch": 0.8671178099046447, "grad_norm": 0.43620363038308574, "learning_rate": 4.70744604439895e-06, "loss": 0.7819, "step": 2819 }, { "epoch": 0.8674254075669025, "grad_norm": 0.697098330242279, "learning_rate": 4.686365342943682e-06, "loss": 1.017, "step": 2820 }, { "epoch": 0.8677330052291603, "grad_norm": 0.40590585466991674, "learning_rate": 4.665329627828985e-06, "loss": 0.8852, "step": 2821 }, { "epoch": 0.868040602891418, "grad_norm": 0.588254627395077, "learning_rate": 4.644338919938651e-06, "loss": 0.9291, "step": 2822 }, { "epoch": 0.8683482005536758, "grad_norm": 0.5731887077584981, "learning_rate": 4.6233932401117394e-06, "loss": 0.8644, "step": 2823 }, { "epoch": 0.8686557982159335, "grad_norm": 1.0104637533572463, "learning_rate": 4.602492609142678e-06, "loss": 1.0072, "step": 2824 }, { "epoch": 0.8689633958781914, "grad_norm": 0.4383390616323505, "learning_rate": 4.581637047781123e-06, "loss": 0.8934, "step": 2825 }, { "epoch": 0.8692709935404491, "grad_norm": 0.6451058924450199, "learning_rate": 4.560826576731997e-06, "loss": 0.861, "step": 2826 }, { "epoch": 0.8695785912027069, "grad_norm": 1.3621847449897555, "learning_rate": 4.540061216655445e-06, "loss": 0.948, "step": 2827 }, { "epoch": 0.8698861888649646, "grad_norm": 0.4414720459301048, "learning_rate": 4.519340988166882e-06, "loss": 0.815, "step": 2828 }, { "epoch": 0.8701937865272223, "grad_norm": 0.5037262674101037, "learning_rate": 4.49866591183683e-06, "loss": 0.8939, "step": 2829 }, { "epoch": 0.8705013841894802, "grad_norm": 0.5781110487985541, "learning_rate": 4.478036008191066e-06, "loss": 1.047, "step": 2830 }, { "epoch": 0.8708089818517379, "grad_norm": 0.51352522525711, "learning_rate": 4.457451297710474e-06, "loss": 0.8795, "step": 2831 }, { "epoch": 0.8711165795139957, "grad_norm": 0.5529763075625683, "learning_rate": 4.436911800831084e-06, "loss": 0.8296, "step": 2832 }, { "epoch": 0.8714241771762534, "grad_norm": 0.5569426483394543, "learning_rate": 4.4164175379440345e-06, "loss": 0.8, "step": 2833 }, { "epoch": 0.8717317748385113, "grad_norm": 0.5772211410669383, "learning_rate": 4.3959685293955835e-06, "loss": 1.0068, "step": 2834 }, { "epoch": 0.872039372500769, "grad_norm": 0.40143287507237707, "learning_rate": 4.375564795487003e-06, "loss": 0.9016, "step": 2835 }, { "epoch": 0.8723469701630268, "grad_norm": 0.7574445402944279, "learning_rate": 4.355206356474678e-06, "loss": 0.984, "step": 2836 }, { "epoch": 0.8726545678252845, "grad_norm": 0.6664950511682118, "learning_rate": 4.334893232570003e-06, "loss": 0.8972, "step": 2837 }, { "epoch": 0.8729621654875422, "grad_norm": 0.5891497289239611, "learning_rate": 4.314625443939374e-06, "loss": 0.8705, "step": 2838 }, { "epoch": 0.8732697631498001, "grad_norm": 1.044526712039445, "learning_rate": 4.294403010704179e-06, "loss": 1.2524, "step": 2839 }, { "epoch": 0.8735773608120578, "grad_norm": 0.5470983424350996, "learning_rate": 4.274225952940819e-06, "loss": 0.8765, "step": 2840 }, { "epoch": 0.8738849584743156, "grad_norm": 0.5045242968499185, "learning_rate": 4.254094290680577e-06, "loss": 0.7377, "step": 2841 }, { "epoch": 0.8741925561365733, "grad_norm": 0.8254880918992521, "learning_rate": 4.234008043909737e-06, "loss": 1.1593, "step": 2842 }, { "epoch": 0.8745001537988312, "grad_norm": 0.5311679511867591, "learning_rate": 4.213967232569455e-06, "loss": 0.8748, "step": 2843 }, { "epoch": 0.8748077514610889, "grad_norm": 0.5060000629986554, "learning_rate": 4.193971876555786e-06, "loss": 0.9544, "step": 2844 }, { "epoch": 0.8751153491233467, "grad_norm": 0.48028412810505644, "learning_rate": 4.174021995719657e-06, "loss": 0.9239, "step": 2845 }, { "epoch": 0.8754229467856044, "grad_norm": 0.46461666858150363, "learning_rate": 4.154117609866881e-06, "loss": 0.905, "step": 2846 }, { "epoch": 0.8757305444478622, "grad_norm": 0.46295233593520396, "learning_rate": 4.13425873875804e-06, "loss": 0.9776, "step": 2847 }, { "epoch": 0.87603814211012, "grad_norm": 0.45217531982462417, "learning_rate": 4.1144454021085965e-06, "loss": 0.7646, "step": 2848 }, { "epoch": 0.8763457397723777, "grad_norm": 0.6549882448134474, "learning_rate": 4.094677619588771e-06, "loss": 1.1179, "step": 2849 }, { "epoch": 0.8766533374346355, "grad_norm": 0.4410110003157383, "learning_rate": 4.0749554108235555e-06, "loss": 0.9171, "step": 2850 }, { "epoch": 0.8769609350968932, "grad_norm": 0.5907225974885267, "learning_rate": 4.055278795392714e-06, "loss": 0.8704, "step": 2851 }, { "epoch": 0.8772685327591511, "grad_norm": 0.5678085205430017, "learning_rate": 4.035647792830743e-06, "loss": 1.1017, "step": 2852 }, { "epoch": 0.8775761304214088, "grad_norm": 0.8477938001843818, "learning_rate": 4.016062422626859e-06, "loss": 0.7605, "step": 2853 }, { "epoch": 0.8778837280836665, "grad_norm": 0.48632887599044566, "learning_rate": 3.996522704224958e-06, "loss": 0.9585, "step": 2854 }, { "epoch": 0.8781913257459243, "grad_norm": 0.6224087261960803, "learning_rate": 3.977028657023635e-06, "loss": 0.9201, "step": 2855 }, { "epoch": 0.8784989234081821, "grad_norm": 0.5129434221773838, "learning_rate": 3.957580300376123e-06, "loss": 1.0008, "step": 2856 }, { "epoch": 0.8788065210704399, "grad_norm": 0.8752658548405995, "learning_rate": 3.938177653590308e-06, "loss": 1.0235, "step": 2857 }, { "epoch": 0.8791141187326976, "grad_norm": 0.667495580733765, "learning_rate": 3.918820735928708e-06, "loss": 0.8059, "step": 2858 }, { "epoch": 0.8794217163949554, "grad_norm": 0.3944974745960631, "learning_rate": 3.899509566608417e-06, "loss": 0.7917, "step": 2859 }, { "epoch": 0.8797293140572132, "grad_norm": 0.4914653587108449, "learning_rate": 3.880244164801128e-06, "loss": 0.7869, "step": 2860 }, { "epoch": 0.880036911719471, "grad_norm": 0.5340109263323993, "learning_rate": 3.861024549633085e-06, "loss": 0.8867, "step": 2861 }, { "epoch": 0.8803445093817287, "grad_norm": 0.4490739784246534, "learning_rate": 3.841850740185088e-06, "loss": 0.9437, "step": 2862 }, { "epoch": 0.8806521070439864, "grad_norm": 0.4309030507849765, "learning_rate": 3.8227227554924416e-06, "loss": 0.8158, "step": 2863 }, { "epoch": 0.8809597047062442, "grad_norm": 0.5437891942609785, "learning_rate": 3.8036406145449954e-06, "loss": 0.8928, "step": 2864 }, { "epoch": 0.881267302368502, "grad_norm": 0.6190469583027255, "learning_rate": 3.7846043362870508e-06, "loss": 1.0486, "step": 2865 }, { "epoch": 0.8815749000307598, "grad_norm": 0.44543314451175825, "learning_rate": 3.7656139396173907e-06, "loss": 0.9549, "step": 2866 }, { "epoch": 0.8818824976930175, "grad_norm": 0.49642043164782335, "learning_rate": 3.74666944338925e-06, "loss": 0.8583, "step": 2867 }, { "epoch": 0.8821900953552753, "grad_norm": 0.4492729158845771, "learning_rate": 3.727770866410285e-06, "loss": 0.8469, "step": 2868 }, { "epoch": 0.8824976930175331, "grad_norm": 0.6349106237779059, "learning_rate": 3.708918227442565e-06, "loss": 0.9859, "step": 2869 }, { "epoch": 0.8828052906797909, "grad_norm": 0.562204582977789, "learning_rate": 3.6901115452025793e-06, "loss": 0.8565, "step": 2870 }, { "epoch": 0.8831128883420486, "grad_norm": 0.7804522060798508, "learning_rate": 3.6713508383611595e-06, "loss": 0.7972, "step": 2871 }, { "epoch": 0.8834204860043063, "grad_norm": 0.4589963282386529, "learning_rate": 3.652636125543507e-06, "loss": 0.9509, "step": 2872 }, { "epoch": 0.8837280836665641, "grad_norm": 0.4742978363597965, "learning_rate": 3.6339674253291546e-06, "loss": 0.9449, "step": 2873 }, { "epoch": 0.8840356813288219, "grad_norm": 0.5912588235786046, "learning_rate": 3.6153447562519814e-06, "loss": 0.9705, "step": 2874 }, { "epoch": 0.8843432789910797, "grad_norm": 0.7774224235816312, "learning_rate": 3.5967681368001218e-06, "loss": 1.1918, "step": 2875 }, { "epoch": 0.8846508766533374, "grad_norm": 0.7421866989820202, "learning_rate": 3.5782375854160386e-06, "loss": 1.1448, "step": 2876 }, { "epoch": 0.8849584743155952, "grad_norm": 0.4824765996054838, "learning_rate": 3.5597531204964286e-06, "loss": 0.9071, "step": 2877 }, { "epoch": 0.885266071977853, "grad_norm": 0.5579725014766594, "learning_rate": 3.5413147603922504e-06, "loss": 0.8945, "step": 2878 }, { "epoch": 0.8855736696401107, "grad_norm": 0.6726398988037287, "learning_rate": 3.522922523408684e-06, "loss": 1.0345, "step": 2879 }, { "epoch": 0.8858812673023685, "grad_norm": 0.9237380424166495, "learning_rate": 3.504576427805123e-06, "loss": 1.1554, "step": 2880 }, { "epoch": 0.8861888649646262, "grad_norm": 0.42864152707015524, "learning_rate": 3.4862764917951597e-06, "loss": 0.8483, "step": 2881 }, { "epoch": 0.8864964626268841, "grad_norm": 0.5823940113232323, "learning_rate": 3.4680227335465385e-06, "loss": 0.9706, "step": 2882 }, { "epoch": 0.8868040602891418, "grad_norm": 1.2484402064591034, "learning_rate": 3.449815171181181e-06, "loss": 0.9649, "step": 2883 }, { "epoch": 0.8871116579513996, "grad_norm": 0.39687170237685393, "learning_rate": 3.431653822775138e-06, "loss": 0.7899, "step": 2884 }, { "epoch": 0.8874192556136573, "grad_norm": 0.5838616693859972, "learning_rate": 3.4135387063585766e-06, "loss": 0.7983, "step": 2885 }, { "epoch": 0.887726853275915, "grad_norm": 0.5038616875671732, "learning_rate": 3.3954698399157768e-06, "loss": 0.9367, "step": 2886 }, { "epoch": 0.8880344509381729, "grad_norm": 0.6766091748364769, "learning_rate": 3.3774472413850957e-06, "loss": 0.8824, "step": 2887 }, { "epoch": 0.8883420486004306, "grad_norm": 0.5461897678786353, "learning_rate": 3.3594709286589533e-06, "loss": 0.7012, "step": 2888 }, { "epoch": 0.8886496462626884, "grad_norm": 0.5286839887867114, "learning_rate": 3.3415409195838242e-06, "loss": 0.9138, "step": 2889 }, { "epoch": 0.8889572439249461, "grad_norm": 0.7111064843224527, "learning_rate": 3.323657231960209e-06, "loss": 0.8343, "step": 2890 }, { "epoch": 0.889264841587204, "grad_norm": 0.4312634565124968, "learning_rate": 3.305819883542621e-06, "loss": 0.8396, "step": 2891 }, { "epoch": 0.8895724392494617, "grad_norm": 0.499039170817191, "learning_rate": 3.288028892039585e-06, "loss": 0.9148, "step": 2892 }, { "epoch": 0.8898800369117195, "grad_norm": 0.5384191878736143, "learning_rate": 3.2702842751135775e-06, "loss": 0.9245, "step": 2893 }, { "epoch": 0.8901876345739772, "grad_norm": 0.5384191878736143, "learning_rate": 3.2702842751135775e-06, "loss": 0.8491, "step": 2894 }, { "epoch": 0.8904952322362351, "grad_norm": 0.7480126345423, "learning_rate": 3.252586050381057e-06, "loss": 0.874, "step": 2895 }, { "epoch": 0.8908028298984928, "grad_norm": 0.4798933607396212, "learning_rate": 3.2349342354124122e-06, "loss": 0.9345, "step": 2896 }, { "epoch": 0.8911104275607505, "grad_norm": 0.5675004852788275, "learning_rate": 3.2173288477319595e-06, "loss": 0.8106, "step": 2897 }, { "epoch": 0.8914180252230083, "grad_norm": 1.1144871096359155, "learning_rate": 3.199769904817923e-06, "loss": 0.9984, "step": 2898 }, { "epoch": 0.891725622885266, "grad_norm": 0.8493874076409035, "learning_rate": 3.1822574241024306e-06, "loss": 0.9899, "step": 2899 }, { "epoch": 0.8920332205475239, "grad_norm": 0.6563332990532286, "learning_rate": 3.164791422971469e-06, "loss": 0.8447, "step": 2900 }, { "epoch": 0.8923408182097816, "grad_norm": 0.5238469647696333, "learning_rate": 3.1473719187648798e-06, "loss": 0.9942, "step": 2901 }, { "epoch": 0.8926484158720394, "grad_norm": 0.4860219846327632, "learning_rate": 3.129998928776351e-06, "loss": 0.9447, "step": 2902 }, { "epoch": 0.8929560135342971, "grad_norm": 0.4717728122204222, "learning_rate": 3.1126724702533918e-06, "loss": 0.6996, "step": 2903 }, { "epoch": 0.893263611196555, "grad_norm": 0.474815159100942, "learning_rate": 3.0953925603973035e-06, "loss": 0.8956, "step": 2904 }, { "epoch": 0.8935712088588127, "grad_norm": 0.5562536797013568, "learning_rate": 3.078159216363202e-06, "loss": 0.8829, "step": 2905 }, { "epoch": 0.8938788065210704, "grad_norm": 0.6068127462829448, "learning_rate": 3.060972455259953e-06, "loss": 0.9642, "step": 2906 }, { "epoch": 0.8941864041833282, "grad_norm": 0.7930563026263208, "learning_rate": 3.043832294150173e-06, "loss": 0.9458, "step": 2907 }, { "epoch": 0.894494001845586, "grad_norm": 0.508128308239352, "learning_rate": 3.0267387500502286e-06, "loss": 1.0193, "step": 2908 }, { "epoch": 0.8948015995078438, "grad_norm": 0.5442840775161847, "learning_rate": 3.00969183993019e-06, "loss": 0.8426, "step": 2909 }, { "epoch": 0.8951091971701015, "grad_norm": 0.5401979138137231, "learning_rate": 2.9926915807138524e-06, "loss": 0.8906, "step": 2910 }, { "epoch": 0.8954167948323593, "grad_norm": 0.8693052256207507, "learning_rate": 2.9757379892786817e-06, "loss": 0.9898, "step": 2911 }, { "epoch": 0.895724392494617, "grad_norm": 0.7626588203804611, "learning_rate": 2.9588310824558094e-06, "loss": 0.9206, "step": 2912 }, { "epoch": 0.8960319901568748, "grad_norm": 0.6110323149189625, "learning_rate": 2.9419708770300312e-06, "loss": 1.0052, "step": 2913 }, { "epoch": 0.8963395878191326, "grad_norm": 0.583544558308001, "learning_rate": 2.9251573897397743e-06, "loss": 0.9537, "step": 2914 }, { "epoch": 0.8966471854813903, "grad_norm": 0.5461421672098362, "learning_rate": 2.9083906372770698e-06, "loss": 1.0093, "step": 2915 }, { "epoch": 0.8969547831436481, "grad_norm": 0.5478974353917891, "learning_rate": 2.891670636287591e-06, "loss": 0.9547, "step": 2916 }, { "epoch": 0.8972623808059059, "grad_norm": 0.5187932295343725, "learning_rate": 2.874997403370561e-06, "loss": 0.9673, "step": 2917 }, { "epoch": 0.8975699784681637, "grad_norm": 0.5303248084399286, "learning_rate": 2.858370955078782e-06, "loss": 0.9564, "step": 2918 }, { "epoch": 0.8978775761304214, "grad_norm": 0.516252912122998, "learning_rate": 2.841791307918612e-06, "loss": 0.8245, "step": 2919 }, { "epoch": 0.8981851737926791, "grad_norm": 0.5136695005822381, "learning_rate": 2.8252584783499515e-06, "loss": 0.9327, "step": 2920 }, { "epoch": 0.8984927714549369, "grad_norm": 0.6410368361172933, "learning_rate": 2.8087724827862037e-06, "loss": 0.9577, "step": 2921 }, { "epoch": 0.8988003691171947, "grad_norm": 0.4117843882964646, "learning_rate": 2.7923333375942995e-06, "loss": 0.8684, "step": 2922 }, { "epoch": 0.8991079667794525, "grad_norm": 0.5387114805133568, "learning_rate": 2.7759410590946444e-06, "loss": 0.9022, "step": 2923 }, { "epoch": 0.8994155644417102, "grad_norm": 0.8121674882288255, "learning_rate": 2.759595663561121e-06, "loss": 1.005, "step": 2924 }, { "epoch": 0.899723162103968, "grad_norm": 0.6081693553954205, "learning_rate": 2.743297167221054e-06, "loss": 0.8849, "step": 2925 }, { "epoch": 0.9000307597662258, "grad_norm": 0.45705365283099153, "learning_rate": 2.7270455862552223e-06, "loss": 0.8465, "step": 2926 }, { "epoch": 0.9003383574284836, "grad_norm": 0.4259384391465337, "learning_rate": 2.7108409367978205e-06, "loss": 0.9122, "step": 2927 }, { "epoch": 0.9006459550907413, "grad_norm": 0.5033070970075343, "learning_rate": 2.694683234936468e-06, "loss": 0.8023, "step": 2928 }, { "epoch": 0.900953552752999, "grad_norm": 0.5497632728585192, "learning_rate": 2.6785724967121504e-06, "loss": 0.982, "step": 2929 }, { "epoch": 0.9012611504152569, "grad_norm": 0.6826316016213126, "learning_rate": 2.6625087381192403e-06, "loss": 1.121, "step": 2930 }, { "epoch": 0.9015687480775146, "grad_norm": 0.4922573251312733, "learning_rate": 2.6464919751054753e-06, "loss": 0.8842, "step": 2931 }, { "epoch": 0.9018763457397724, "grad_norm": 0.5354338798298556, "learning_rate": 2.6305222235719297e-06, "loss": 0.7903, "step": 2932 }, { "epoch": 0.9021839434020301, "grad_norm": 0.6322588646036721, "learning_rate": 2.614599499372994e-06, "loss": 0.8346, "step": 2933 }, { "epoch": 0.9024915410642879, "grad_norm": 0.5635903425311192, "learning_rate": 2.598723818316412e-06, "loss": 1.0184, "step": 2934 }, { "epoch": 0.9027991387265457, "grad_norm": 0.4530157833619063, "learning_rate": 2.5828951961631765e-06, "loss": 0.8302, "step": 2935 }, { "epoch": 0.9031067363888035, "grad_norm": 0.5016596558096474, "learning_rate": 2.5671136486275895e-06, "loss": 0.8663, "step": 2936 }, { "epoch": 0.9034143340510612, "grad_norm": 0.5668358352953395, "learning_rate": 2.551379191377207e-06, "loss": 0.9055, "step": 2937 }, { "epoch": 0.9037219317133189, "grad_norm": 0.8077761180057417, "learning_rate": 2.535691840032839e-06, "loss": 0.7424, "step": 2938 }, { "epoch": 0.9040295293755768, "grad_norm": 0.6845156217080712, "learning_rate": 2.520051610168528e-06, "loss": 1.0071, "step": 2939 }, { "epoch": 0.9043371270378345, "grad_norm": 0.5734628109590203, "learning_rate": 2.504458517311542e-06, "loss": 0.946, "step": 2940 }, { "epoch": 0.9046447247000923, "grad_norm": 0.4535709277561084, "learning_rate": 2.4889125769423427e-06, "loss": 0.9487, "step": 2941 }, { "epoch": 0.90495232236235, "grad_norm": 0.4631755368839813, "learning_rate": 2.4734138044945897e-06, "loss": 0.9313, "step": 2942 }, { "epoch": 0.9052599200246079, "grad_norm": 0.5485811505569375, "learning_rate": 2.4579622153550972e-06, "loss": 0.883, "step": 2943 }, { "epoch": 0.9055675176868656, "grad_norm": 0.5861386569900713, "learning_rate": 2.4425578248638614e-06, "loss": 0.9797, "step": 2944 }, { "epoch": 0.9058751153491233, "grad_norm": 0.6782234168032142, "learning_rate": 2.427200648314015e-06, "loss": 0.9882, "step": 2945 }, { "epoch": 0.9061827130113811, "grad_norm": 0.5781900676605041, "learning_rate": 2.4118907009517964e-06, "loss": 0.8612, "step": 2946 }, { "epoch": 0.9064903106736388, "grad_norm": 0.4159766766995919, "learning_rate": 2.396627997976575e-06, "loss": 0.8552, "step": 2947 }, { "epoch": 0.9067979083358967, "grad_norm": 0.470715585605672, "learning_rate": 2.381412554540824e-06, "loss": 0.9735, "step": 2948 }, { "epoch": 0.9071055059981544, "grad_norm": 0.7392983560570807, "learning_rate": 2.366244385750066e-06, "loss": 0.9339, "step": 2949 }, { "epoch": 0.9074131036604122, "grad_norm": 0.44181666692461646, "learning_rate": 2.351123506662939e-06, "loss": 0.787, "step": 2950 }, { "epoch": 0.9077207013226699, "grad_norm": 0.5783727227997117, "learning_rate": 2.33604993229109e-06, "loss": 0.941, "step": 2951 }, { "epoch": 0.9080282989849278, "grad_norm": 0.6751290990416619, "learning_rate": 2.3210236775992213e-06, "loss": 0.9095, "step": 2952 }, { "epoch": 0.9083358966471855, "grad_norm": 0.4684615158055443, "learning_rate": 2.306044757505055e-06, "loss": 0.9218, "step": 2953 }, { "epoch": 0.9086434943094432, "grad_norm": 0.548998260244737, "learning_rate": 2.2911131868793233e-06, "loss": 0.7854, "step": 2954 }, { "epoch": 0.908951091971701, "grad_norm": 0.8524220862193891, "learning_rate": 2.2762289805457416e-06, "loss": 1.0783, "step": 2955 }, { "epoch": 0.9092586896339587, "grad_norm": 0.47188082965232775, "learning_rate": 2.261392153281022e-06, "loss": 0.6914, "step": 2956 }, { "epoch": 0.9095662872962166, "grad_norm": 0.5711741436288577, "learning_rate": 2.2466027198148165e-06, "loss": 0.8489, "step": 2957 }, { "epoch": 0.9098738849584743, "grad_norm": 0.6630381085772311, "learning_rate": 2.2318606948297405e-06, "loss": 0.8605, "step": 2958 }, { "epoch": 0.9101814826207321, "grad_norm": 0.8227719913946684, "learning_rate": 2.2171660929613425e-06, "loss": 0.9748, "step": 2959 }, { "epoch": 0.9104890802829898, "grad_norm": 0.5803552708223303, "learning_rate": 2.202518928798075e-06, "loss": 0.9644, "step": 2960 }, { "epoch": 0.9107966779452477, "grad_norm": 0.589434839122105, "learning_rate": 2.187919216881318e-06, "loss": 1.0023, "step": 2961 }, { "epoch": 0.9111042756075054, "grad_norm": 0.5128691346568118, "learning_rate": 2.173366971705332e-06, "loss": 0.9652, "step": 2962 }, { "epoch": 0.9114118732697631, "grad_norm": 0.48314980279174324, "learning_rate": 2.158862207717255e-06, "loss": 0.8513, "step": 2963 }, { "epoch": 0.9117194709320209, "grad_norm": 0.6008428346526075, "learning_rate": 2.1444049393170796e-06, "loss": 0.9895, "step": 2964 }, { "epoch": 0.9120270685942787, "grad_norm": 0.4372050508761347, "learning_rate": 2.1299951808576524e-06, "loss": 0.9101, "step": 2965 }, { "epoch": 0.9123346662565365, "grad_norm": 0.48643257122311506, "learning_rate": 2.1156329466446577e-06, "loss": 0.8254, "step": 2966 }, { "epoch": 0.9126422639187942, "grad_norm": 0.5446430070929179, "learning_rate": 2.1013182509365737e-06, "loss": 0.8152, "step": 2967 }, { "epoch": 0.912949861581052, "grad_norm": 0.5544070450342345, "learning_rate": 2.087051107944732e-06, "loss": 0.7985, "step": 2968 }, { "epoch": 0.9132574592433097, "grad_norm": 0.5396218734782416, "learning_rate": 2.072831531833208e-06, "loss": 0.9545, "step": 2969 }, { "epoch": 0.9135650569055676, "grad_norm": 0.45585653194431736, "learning_rate": 2.0586595367188765e-06, "loss": 0.7696, "step": 2970 }, { "epoch": 0.9138726545678253, "grad_norm": 0.58845974652524, "learning_rate": 2.044535136671366e-06, "loss": 0.8737, "step": 2971 }, { "epoch": 0.914180252230083, "grad_norm": 0.3839196715080991, "learning_rate": 2.030458345713071e-06, "loss": 0.8557, "step": 2972 }, { "epoch": 0.9144878498923408, "grad_norm": 0.5462849088355257, "learning_rate": 2.016429177819085e-06, "loss": 0.9245, "step": 2973 }, { "epoch": 0.9147954475545986, "grad_norm": 0.6713625801794745, "learning_rate": 2.002447646917266e-06, "loss": 1.0304, "step": 2974 }, { "epoch": 0.9151030452168564, "grad_norm": 0.38295074566255044, "learning_rate": 1.988513766888156e-06, "loss": 0.6389, "step": 2975 }, { "epoch": 0.9154106428791141, "grad_norm": 0.8909651565448196, "learning_rate": 1.974627551564978e-06, "loss": 1.0195, "step": 2976 }, { "epoch": 0.9157182405413719, "grad_norm": 0.45997062400228855, "learning_rate": 1.9607890147336607e-06, "loss": 0.9749, "step": 2977 }, { "epoch": 0.9160258382036297, "grad_norm": 0.4945806241326876, "learning_rate": 1.946998170132791e-06, "loss": 0.871, "step": 2978 }, { "epoch": 0.9163334358658874, "grad_norm": 0.44478825585415177, "learning_rate": 1.9332550314535847e-06, "loss": 0.811, "step": 2979 }, { "epoch": 0.9166410335281452, "grad_norm": 0.5716531479628899, "learning_rate": 1.9195596123399383e-06, "loss": 0.7591, "step": 2980 }, { "epoch": 0.9169486311904029, "grad_norm": 0.5580274186006876, "learning_rate": 1.905911926388343e-06, "loss": 0.87, "step": 2981 }, { "epoch": 0.9172562288526607, "grad_norm": 0.6824408941287851, "learning_rate": 1.8923119871479045e-06, "loss": 0.8545, "step": 2982 }, { "epoch": 0.9175638265149185, "grad_norm": 0.5295115505660337, "learning_rate": 1.8787598081203285e-06, "loss": 0.6982, "step": 2983 }, { "epoch": 0.9178714241771763, "grad_norm": 0.4058361201221115, "learning_rate": 1.86525540275993e-06, "loss": 0.8696, "step": 2984 }, { "epoch": 0.918179021839434, "grad_norm": 0.5654910232979484, "learning_rate": 1.8517987844735463e-06, "loss": 0.7972, "step": 2985 }, { "epoch": 0.9184866195016917, "grad_norm": 0.5058377845321761, "learning_rate": 1.8383899666206184e-06, "loss": 0.8284, "step": 2986 }, { "epoch": 0.9187942171639496, "grad_norm": 0.5965178707675876, "learning_rate": 1.825028962513109e-06, "loss": 0.8765, "step": 2987 }, { "epoch": 0.9191018148262073, "grad_norm": 0.4201394211604816, "learning_rate": 1.8117157854155132e-06, "loss": 0.813, "step": 2988 }, { "epoch": 0.9194094124884651, "grad_norm": 0.41865430157694433, "learning_rate": 1.7984504485448527e-06, "loss": 0.7813, "step": 2989 }, { "epoch": 0.9197170101507228, "grad_norm": 0.428015205655404, "learning_rate": 1.78523296507066e-06, "loss": 0.8608, "step": 2990 }, { "epoch": 0.9200246078129806, "grad_norm": 0.6919908641152824, "learning_rate": 1.7720633481149273e-06, "loss": 0.9062, "step": 2991 }, { "epoch": 0.9203322054752384, "grad_norm": 0.5914438263741801, "learning_rate": 1.758941610752174e-06, "loss": 0.8224, "step": 2992 }, { "epoch": 0.9206398031374962, "grad_norm": 0.45529850116741083, "learning_rate": 1.7458677660093458e-06, "loss": 0.8932, "step": 2993 }, { "epoch": 0.9209474007997539, "grad_norm": 0.6651574875219898, "learning_rate": 1.7328418268658664e-06, "loss": 1.0585, "step": 2994 }, { "epoch": 0.9212549984620116, "grad_norm": 0.38515639340536173, "learning_rate": 1.7198638062535744e-06, "loss": 0.8238, "step": 2995 }, { "epoch": 0.9215625961242695, "grad_norm": 0.7271345812725579, "learning_rate": 1.7069337170567744e-06, "loss": 0.7899, "step": 2996 }, { "epoch": 0.9218701937865272, "grad_norm": 0.653452612235038, "learning_rate": 1.694051572112143e-06, "loss": 0.9331, "step": 2997 }, { "epoch": 0.922177791448785, "grad_norm": 0.5229580062811675, "learning_rate": 1.681217384208794e-06, "loss": 0.7408, "step": 2998 }, { "epoch": 0.9224853891110427, "grad_norm": 0.5839407406322659, "learning_rate": 1.6684311660882079e-06, "loss": 1.1757, "step": 2999 }, { "epoch": 0.9227929867733006, "grad_norm": 0.5166630949353516, "learning_rate": 1.655692930444258e-06, "loss": 0.8389, "step": 3000 }, { "epoch": 0.9231005844355583, "grad_norm": 1.1842735670203823, "learning_rate": 1.6430026899231565e-06, "loss": 1.08, "step": 3001 }, { "epoch": 0.923408182097816, "grad_norm": 0.5011569584223041, "learning_rate": 1.6303604571235032e-06, "loss": 0.8505, "step": 3002 }, { "epoch": 0.9237157797600738, "grad_norm": 0.6366641534612533, "learning_rate": 1.6177662445962027e-06, "loss": 0.7509, "step": 3003 }, { "epoch": 0.9240233774223315, "grad_norm": 0.7278224550542499, "learning_rate": 1.6052200648445093e-06, "loss": 0.9857, "step": 3004 }, { "epoch": 0.9243309750845894, "grad_norm": 1.0473614329010072, "learning_rate": 1.592721930323987e-06, "loss": 0.9301, "step": 3005 }, { "epoch": 0.9246385727468471, "grad_norm": 0.43097169987579187, "learning_rate": 1.5802718534424887e-06, "loss": 0.8722, "step": 3006 }, { "epoch": 0.9249461704091049, "grad_norm": 0.6444699352342226, "learning_rate": 1.567869846560166e-06, "loss": 0.8196, "step": 3007 }, { "epoch": 0.9252537680713626, "grad_norm": 0.6099531438114985, "learning_rate": 1.5555159219894643e-06, "loss": 0.9963, "step": 3008 }, { "epoch": 0.9255613657336205, "grad_norm": 0.8182048708595486, "learning_rate": 1.5432100919950566e-06, "loss": 1.072, "step": 3009 }, { "epoch": 0.9258689633958782, "grad_norm": 0.9925114664589234, "learning_rate": 1.530952368793903e-06, "loss": 1.0348, "step": 3010 }, { "epoch": 0.926176561058136, "grad_norm": 0.4640428603464355, "learning_rate": 1.518742764555181e-06, "loss": 1.099, "step": 3011 }, { "epoch": 0.9264841587203937, "grad_norm": 0.5207631443856517, "learning_rate": 1.5065812914003164e-06, "loss": 0.8822, "step": 3012 }, { "epoch": 0.9267917563826515, "grad_norm": 0.7029919144821944, "learning_rate": 1.4944679614029344e-06, "loss": 1.1245, "step": 3013 }, { "epoch": 0.9270993540449093, "grad_norm": 0.835454259885165, "learning_rate": 1.4824027865888767e-06, "loss": 0.8896, "step": 3014 }, { "epoch": 0.927406951707167, "grad_norm": 0.619013500802905, "learning_rate": 1.470385778936173e-06, "loss": 0.8455, "step": 3015 }, { "epoch": 0.9277145493694248, "grad_norm": 0.3701616266795015, "learning_rate": 1.45841695037503e-06, "loss": 0.8775, "step": 3016 }, { "epoch": 0.9280221470316825, "grad_norm": 0.6694852781908837, "learning_rate": 1.4464963127878207e-06, "loss": 0.8832, "step": 3017 }, { "epoch": 0.9283297446939404, "grad_norm": 0.6825845297625576, "learning_rate": 1.4346238780091058e-06, "loss": 0.8877, "step": 3018 }, { "epoch": 0.9286373423561981, "grad_norm": 0.5060774118742647, "learning_rate": 1.4227996578255408e-06, "loss": 0.7469, "step": 3019 }, { "epoch": 0.9289449400184558, "grad_norm": 0.5191139865249208, "learning_rate": 1.4110236639759578e-06, "loss": 0.8269, "step": 3020 }, { "epoch": 0.9292525376807136, "grad_norm": 0.5504925229108828, "learning_rate": 1.3992959081512825e-06, "loss": 0.9287, "step": 3021 }, { "epoch": 0.9295601353429714, "grad_norm": 0.6027482405811195, "learning_rate": 1.3876164019945738e-06, "loss": 0.8242, "step": 3022 }, { "epoch": 0.9298677330052292, "grad_norm": 0.5230873157366401, "learning_rate": 1.3759851571009675e-06, "loss": 1.014, "step": 3023 }, { "epoch": 0.9301753306674869, "grad_norm": 0.5109390434745364, "learning_rate": 1.3644021850177102e-06, "loss": 0.9291, "step": 3024 }, { "epoch": 0.9304829283297447, "grad_norm": 0.39497719096701694, "learning_rate": 1.352867497244098e-06, "loss": 0.8073, "step": 3025 }, { "epoch": 0.9307905259920025, "grad_norm": 0.47991633680812884, "learning_rate": 1.341381105231515e-06, "loss": 0.7661, "step": 3026 }, { "epoch": 0.9310981236542603, "grad_norm": 0.4681616484009856, "learning_rate": 1.329943020383384e-06, "loss": 1.0245, "step": 3027 }, { "epoch": 0.931405721316518, "grad_norm": 0.47810903493113627, "learning_rate": 1.318553254055177e-06, "loss": 0.9398, "step": 3028 }, { "epoch": 0.9317133189787757, "grad_norm": 0.5424238362020419, "learning_rate": 1.3072118175543934e-06, "loss": 0.8865, "step": 3029 }, { "epoch": 0.9320209166410335, "grad_norm": 0.5307045088586039, "learning_rate": 1.2959187221405544e-06, "loss": 0.8648, "step": 3030 }, { "epoch": 0.9323285143032913, "grad_norm": 0.6546935242200751, "learning_rate": 1.284673979025175e-06, "loss": 0.9835, "step": 3031 }, { "epoch": 0.9326361119655491, "grad_norm": 0.6183613791955714, "learning_rate": 1.2734775993717917e-06, "loss": 0.9396, "step": 3032 }, { "epoch": 0.9329437096278068, "grad_norm": 0.6391994240835446, "learning_rate": 1.2623295942959135e-06, "loss": 1.0804, "step": 3033 }, { "epoch": 0.9332513072900646, "grad_norm": 0.724831593113272, "learning_rate": 1.2512299748650202e-06, "loss": 1.0501, "step": 3034 }, { "epoch": 0.9335589049523224, "grad_norm": 0.5422584972704819, "learning_rate": 1.2401787520985531e-06, "loss": 1.0435, "step": 3035 }, { "epoch": 0.9338665026145802, "grad_norm": 0.3436975465908646, "learning_rate": 1.2291759369679357e-06, "loss": 0.8652, "step": 3036 }, { "epoch": 0.9341741002768379, "grad_norm": 0.4736361150397447, "learning_rate": 1.2182215403964913e-06, "loss": 0.8649, "step": 3037 }, { "epoch": 0.9344816979390956, "grad_norm": 0.49595057782567115, "learning_rate": 1.2073155732594988e-06, "loss": 0.8594, "step": 3038 }, { "epoch": 0.9347892956013534, "grad_norm": 0.6869837971344962, "learning_rate": 1.1964580463841635e-06, "loss": 1.0557, "step": 3039 }, { "epoch": 0.9350968932636112, "grad_norm": 0.4806198930562316, "learning_rate": 1.1856489705495744e-06, "loss": 0.8559, "step": 3040 }, { "epoch": 0.935404490925869, "grad_norm": 0.7999274483600374, "learning_rate": 1.1748883564867418e-06, "loss": 1.1544, "step": 3041 }, { "epoch": 0.9357120885881267, "grad_norm": 0.7551218863205699, "learning_rate": 1.1641762148785651e-06, "loss": 0.9351, "step": 3042 }, { "epoch": 0.9360196862503845, "grad_norm": 0.6937674246826907, "learning_rate": 1.1535125563597926e-06, "loss": 0.9888, "step": 3043 }, { "epoch": 0.9363272839126423, "grad_norm": 0.4242109250475142, "learning_rate": 1.1428973915170838e-06, "loss": 0.8753, "step": 3044 }, { "epoch": 0.9366348815749, "grad_norm": 0.4461065328733987, "learning_rate": 1.1323307308889198e-06, "loss": 0.8735, "step": 3045 }, { "epoch": 0.9369424792371578, "grad_norm": 0.48055095047180335, "learning_rate": 1.1218125849656425e-06, "loss": 0.7588, "step": 3046 }, { "epoch": 0.9372500768994155, "grad_norm": 0.6272643854071995, "learning_rate": 1.111342964189427e-06, "loss": 0.8696, "step": 3047 }, { "epoch": 0.9375576745616734, "grad_norm": 0.7461198403060759, "learning_rate": 1.1009218789542752e-06, "loss": 0.9426, "step": 3048 }, { "epoch": 0.9378652722239311, "grad_norm": 0.5380381459672336, "learning_rate": 1.0905493396059895e-06, "loss": 0.9792, "step": 3049 }, { "epoch": 0.9381728698861889, "grad_norm": 0.5072127357604413, "learning_rate": 1.0802253564422104e-06, "loss": 0.9027, "step": 3050 }, { "epoch": 0.9384804675484466, "grad_norm": 0.6999016479536618, "learning_rate": 1.0699499397123392e-06, "loss": 0.7924, "step": 3051 }, { "epoch": 0.9387880652107043, "grad_norm": 0.5376320196469643, "learning_rate": 1.0597230996175766e-06, "loss": 0.8751, "step": 3052 }, { "epoch": 0.9390956628729622, "grad_norm": 0.40037138631506686, "learning_rate": 1.049544846310896e-06, "loss": 0.6828, "step": 3053 }, { "epoch": 0.9394032605352199, "grad_norm": 0.8350366689120307, "learning_rate": 1.0394151898970417e-06, "loss": 1.0157, "step": 3054 }, { "epoch": 0.9397108581974777, "grad_norm": 0.5915383804157786, "learning_rate": 1.0293341404324863e-06, "loss": 0.8718, "step": 3055 }, { "epoch": 0.9400184558597354, "grad_norm": 0.5426446186021523, "learning_rate": 1.0193017079254852e-06, "loss": 0.9931, "step": 3056 }, { "epoch": 0.9403260535219933, "grad_norm": 0.5684363443469569, "learning_rate": 1.0093179023359989e-06, "loss": 0.8747, "step": 3057 }, { "epoch": 0.940633651184251, "grad_norm": 0.5251181663628067, "learning_rate": 9.993827335757267e-07, "loss": 0.9207, "step": 3058 }, { "epoch": 0.9409412488465088, "grad_norm": 0.4550522816006357, "learning_rate": 9.89496211508062e-07, "loss": 0.9058, "step": 3059 }, { "epoch": 0.9412488465087665, "grad_norm": 0.5876727498722059, "learning_rate": 9.796583459481423e-07, "loss": 0.9678, "step": 3060 }, { "epoch": 0.9415564441710244, "grad_norm": 1.0787941886933967, "learning_rate": 9.698691466627551e-07, "loss": 1.0511, "step": 3061 }, { "epoch": 0.9418640418332821, "grad_norm": 0.574031347734561, "learning_rate": 9.601286233703987e-07, "loss": 0.8863, "step": 3062 }, { "epoch": 0.9421716394955398, "grad_norm": 0.8705831366529256, "learning_rate": 9.504367857412489e-07, "loss": 0.9244, "step": 3063 }, { "epoch": 0.9424792371577976, "grad_norm": 0.5389128626823761, "learning_rate": 9.407936433971365e-07, "loss": 0.8199, "step": 3064 }, { "epoch": 0.9427868348200553, "grad_norm": 0.5587771770263661, "learning_rate": 9.311992059115482e-07, "loss": 0.9911, "step": 3065 }, { "epoch": 0.9430944324823132, "grad_norm": 0.5141376872095335, "learning_rate": 9.216534828096313e-07, "loss": 0.8205, "step": 3066 }, { "epoch": 0.9434020301445709, "grad_norm": 0.6835427030895262, "learning_rate": 9.121564835681495e-07, "loss": 0.9186, "step": 3067 }, { "epoch": 0.9437096278068287, "grad_norm": 0.5450157630410802, "learning_rate": 9.027082176155166e-07, "loss": 0.8089, "step": 3068 }, { "epoch": 0.9440172254690864, "grad_norm": 0.6340239383306221, "learning_rate": 8.933086943317514e-07, "loss": 0.942, "step": 3069 }, { "epoch": 0.9443248231313442, "grad_norm": 0.7456040323454204, "learning_rate": 8.83957923048484e-07, "loss": 0.982, "step": 3070 }, { "epoch": 0.944632420793602, "grad_norm": 0.765340035291631, "learning_rate": 8.746559130489495e-07, "loss": 0.9797, "step": 3071 }, { "epoch": 0.9449400184558597, "grad_norm": 0.5728124877656231, "learning_rate": 8.654026735679721e-07, "loss": 0.8371, "step": 3072 }, { "epoch": 0.9452476161181175, "grad_norm": 0.41016874998366676, "learning_rate": 8.561982137919478e-07, "loss": 0.9003, "step": 3073 }, { "epoch": 0.9455552137803752, "grad_norm": 0.4067989880265945, "learning_rate": 8.470425428588669e-07, "loss": 0.8314, "step": 3074 }, { "epoch": 0.9458628114426331, "grad_norm": 0.4221347058109387, "learning_rate": 8.379356698582641e-07, "loss": 0.7471, "step": 3075 }, { "epoch": 0.9461704091048908, "grad_norm": 0.5736656373467464, "learning_rate": 8.288776038312296e-07, "loss": 0.9433, "step": 3076 }, { "epoch": 0.9464780067671485, "grad_norm": 0.7759688400935095, "learning_rate": 8.198683537704033e-07, "loss": 1.285, "step": 3077 }, { "epoch": 0.9467856044294063, "grad_norm": 0.5887941649011355, "learning_rate": 8.109079286199806e-07, "loss": 0.9644, "step": 3078 }, { "epoch": 0.9470932020916641, "grad_norm": 0.449263907541219, "learning_rate": 8.019963372756457e-07, "loss": 0.8725, "step": 3079 }, { "epoch": 0.9474007997539219, "grad_norm": 0.45084145108768425, "learning_rate": 7.931335885846325e-07, "loss": 0.842, "step": 3080 }, { "epoch": 0.9477083974161796, "grad_norm": 0.4842014607417555, "learning_rate": 7.843196913456752e-07, "loss": 0.9599, "step": 3081 }, { "epoch": 0.9480159950784374, "grad_norm": 0.5850855810539121, "learning_rate": 7.755546543090075e-07, "loss": 0.8538, "step": 3082 }, { "epoch": 0.9483235927406952, "grad_norm": 0.5065497623505054, "learning_rate": 7.668384861763522e-07, "loss": 0.8461, "step": 3083 }, { "epoch": 0.948631190402953, "grad_norm": 0.3902648089372053, "learning_rate": 7.581711956009374e-07, "loss": 0.8724, "step": 3084 }, { "epoch": 0.9489387880652107, "grad_norm": 0.6152300260718977, "learning_rate": 7.495527911874411e-07, "loss": 0.7507, "step": 3085 }, { "epoch": 0.9492463857274684, "grad_norm": 0.4577553108998056, "learning_rate": 7.409832814920137e-07, "loss": 0.939, "step": 3086 }, { "epoch": 0.9495539833897262, "grad_norm": 0.6200508980249039, "learning_rate": 7.324626750222719e-07, "loss": 0.9701, "step": 3087 }, { "epoch": 0.949861581051984, "grad_norm": 0.6868491888874285, "learning_rate": 7.239909802372935e-07, "loss": 1.0687, "step": 3088 }, { "epoch": 0.9501691787142418, "grad_norm": 0.4665292658549405, "learning_rate": 7.155682055475566e-07, "loss": 0.9912, "step": 3089 }, { "epoch": 0.9504767763764995, "grad_norm": 0.5633828891353738, "learning_rate": 7.071943593150221e-07, "loss": 0.9236, "step": 3090 }, { "epoch": 0.9507843740387573, "grad_norm": 0.5305237217035088, "learning_rate": 6.988694498530401e-07, "loss": 0.9252, "step": 3091 }, { "epoch": 0.9510919717010151, "grad_norm": 0.4738915506075934, "learning_rate": 6.905934854263995e-07, "loss": 0.9308, "step": 3092 }, { "epoch": 0.9513995693632729, "grad_norm": 0.7623303502303196, "learning_rate": 6.823664742512725e-07, "loss": 0.9859, "step": 3093 }, { "epoch": 0.9517071670255306, "grad_norm": 0.7921981681681975, "learning_rate": 6.741884244952756e-07, "loss": 1.1015, "step": 3094 }, { "epoch": 0.9520147646877883, "grad_norm": 0.4759015883976684, "learning_rate": 6.660593442773588e-07, "loss": 0.8962, "step": 3095 }, { "epoch": 0.9523223623500462, "grad_norm": 0.4594697240861669, "learning_rate": 6.579792416679054e-07, "loss": 0.9943, "step": 3096 }, { "epoch": 0.9526299600123039, "grad_norm": 0.45971419627719756, "learning_rate": 6.499481246886596e-07, "loss": 0.877, "step": 3097 }, { "epoch": 0.9529375576745617, "grad_norm": 0.601364449560997, "learning_rate": 6.41966001312716e-07, "loss": 0.9593, "step": 3098 }, { "epoch": 0.9532451553368194, "grad_norm": 0.4765349403001703, "learning_rate": 6.340328794645523e-07, "loss": 0.8805, "step": 3099 }, { "epoch": 0.9535527529990772, "grad_norm": 0.556732052928935, "learning_rate": 6.261487670199961e-07, "loss": 0.9226, "step": 3100 }, { "epoch": 0.953860350661335, "grad_norm": 0.5138284158363982, "learning_rate": 6.183136718061977e-07, "loss": 0.7584, "step": 3101 }, { "epoch": 0.9541679483235928, "grad_norm": 0.6307210340273879, "learning_rate": 6.105276016016792e-07, "loss": 0.8489, "step": 3102 }, { "epoch": 0.9544755459858505, "grad_norm": 0.37239917308681286, "learning_rate": 6.02790564136263e-07, "loss": 0.9152, "step": 3103 }, { "epoch": 0.9547831436481082, "grad_norm": 0.4241808824206684, "learning_rate": 5.951025670911048e-07, "loss": 0.9196, "step": 3104 }, { "epoch": 0.9550907413103661, "grad_norm": 0.4748725665778254, "learning_rate": 5.874636180986659e-07, "loss": 0.9509, "step": 3105 }, { "epoch": 0.9553983389726238, "grad_norm": 0.6593631040150145, "learning_rate": 5.798737247427355e-07, "loss": 1.0101, "step": 3106 }, { "epoch": 0.9557059366348816, "grad_norm": 0.49909465581076357, "learning_rate": 5.723328945583694e-07, "loss": 0.9088, "step": 3107 }, { "epoch": 0.9560135342971393, "grad_norm": 0.5103501603477251, "learning_rate": 5.648411350319461e-07, "loss": 0.8898, "step": 3108 }, { "epoch": 0.9563211319593972, "grad_norm": 0.676820911177715, "learning_rate": 5.573984536011045e-07, "loss": 0.9178, "step": 3109 }, { "epoch": 0.9566287296216549, "grad_norm": 1.801416524459903, "learning_rate": 5.500048576547679e-07, "loss": 0.7403, "step": 3110 }, { "epoch": 0.9569363272839126, "grad_norm": 0.5803700444993739, "learning_rate": 5.42660354533131e-07, "loss": 1.0241, "step": 3111 }, { "epoch": 0.9572439249461704, "grad_norm": 0.4296322102176167, "learning_rate": 5.35364951527656e-07, "loss": 0.9265, "step": 3112 }, { "epoch": 0.9575515226084281, "grad_norm": 0.5310661148401262, "learning_rate": 5.28118655881038e-07, "loss": 0.9342, "step": 3113 }, { "epoch": 0.957859120270686, "grad_norm": 0.669375291336843, "learning_rate": 5.209214747872503e-07, "loss": 0.939, "step": 3114 }, { "epoch": 0.9581667179329437, "grad_norm": 0.4326064709432471, "learning_rate": 5.137734153914719e-07, "loss": 0.8269, "step": 3115 }, { "epoch": 0.9584743155952015, "grad_norm": 0.47161938240848517, "learning_rate": 5.066744847901484e-07, "loss": 0.9256, "step": 3116 }, { "epoch": 0.9587819132574592, "grad_norm": 0.5179543935129054, "learning_rate": 4.996246900309198e-07, "loss": 0.8596, "step": 3117 }, { "epoch": 0.9590895109197171, "grad_norm": 0.5921872783473823, "learning_rate": 4.926240381126823e-07, "loss": 0.7484, "step": 3118 }, { "epoch": 0.9593971085819748, "grad_norm": 0.5418772556130338, "learning_rate": 4.856725359855041e-07, "loss": 0.8472, "step": 3119 }, { "epoch": 0.9597047062442325, "grad_norm": 0.49304677761439336, "learning_rate": 4.78770190550687e-07, "loss": 0.8979, "step": 3120 }, { "epoch": 0.9600123039064903, "grad_norm": 0.6103275869047551, "learning_rate": 4.719170086607161e-07, "loss": 0.909, "step": 3121 }, { "epoch": 0.960319901568748, "grad_norm": 0.6197335414913415, "learning_rate": 4.6511299711928244e-07, "loss": 0.8896, "step": 3122 }, { "epoch": 0.9606274992310059, "grad_norm": 0.43184492049276696, "learning_rate": 4.583581626812383e-07, "loss": 0.8903, "step": 3123 }, { "epoch": 0.9609350968932636, "grad_norm": 0.6410696353776848, "learning_rate": 4.516525120526527e-07, "loss": 0.8157, "step": 3124 }, { "epoch": 0.9612426945555214, "grad_norm": 0.717128047248507, "learning_rate": 4.4499605189071703e-07, "loss": 0.9024, "step": 3125 }, { "epoch": 0.9615502922177791, "grad_norm": 0.5803221620183977, "learning_rate": 4.383887888038285e-07, "loss": 0.8357, "step": 3126 }, { "epoch": 0.961857889880037, "grad_norm": 0.554451839062181, "learning_rate": 4.318307293515178e-07, "loss": 1.01, "step": 3127 }, { "epoch": 0.9621654875422947, "grad_norm": 0.41145861626004715, "learning_rate": 4.2532188004448245e-07, "loss": 0.9104, "step": 3128 }, { "epoch": 0.9624730852045524, "grad_norm": 0.5006421083902896, "learning_rate": 4.1886224734455916e-07, "loss": 0.8135, "step": 3129 }, { "epoch": 0.9627806828668102, "grad_norm": 0.7339340659535931, "learning_rate": 4.124518376647235e-07, "loss": 1.013, "step": 3130 }, { "epoch": 0.963088280529068, "grad_norm": 0.9180469899835767, "learning_rate": 4.060906573690737e-07, "loss": 0.9496, "step": 3131 }, { "epoch": 0.9633958781913258, "grad_norm": 0.6499487697066987, "learning_rate": 3.9977871277285804e-07, "loss": 1.0004, "step": 3132 }, { "epoch": 0.9637034758535835, "grad_norm": 0.5198614757051064, "learning_rate": 3.935160101424251e-07, "loss": 0.9406, "step": 3133 }, { "epoch": 0.9640110735158413, "grad_norm": 0.5593902387326274, "learning_rate": 3.873025556952459e-07, "loss": 0.8533, "step": 3134 }, { "epoch": 0.964318671178099, "grad_norm": 0.3729251666283709, "learning_rate": 3.81138355599886e-07, "loss": 0.8057, "step": 3135 }, { "epoch": 0.9646262688403568, "grad_norm": 0.5253562857296289, "learning_rate": 3.7502341597603906e-07, "loss": 1.061, "step": 3136 }, { "epoch": 0.9649338665026146, "grad_norm": 0.47670288250600107, "learning_rate": 3.689577428944602e-07, "loss": 1.0854, "step": 3137 }, { "epoch": 0.9652414641648723, "grad_norm": 0.5186899411540242, "learning_rate": 3.6294134237702113e-07, "loss": 1.0033, "step": 3138 }, { "epoch": 0.9655490618271301, "grad_norm": 0.6360745850015813, "learning_rate": 3.569742203966664e-07, "loss": 0.8103, "step": 3139 }, { "epoch": 0.9658566594893879, "grad_norm": 0.6807134410322855, "learning_rate": 3.510563828774127e-07, "loss": 1.0071, "step": 3140 }, { "epoch": 0.9661642571516457, "grad_norm": 0.5076229545684128, "learning_rate": 3.4518783569435496e-07, "loss": 0.9082, "step": 3141 }, { "epoch": 0.9664718548139034, "grad_norm": 0.5576200531730102, "learning_rate": 3.393685846736605e-07, "loss": 1.0844, "step": 3142 }, { "epoch": 0.9667794524761611, "grad_norm": 0.9823792757125777, "learning_rate": 3.3359863559254136e-07, "loss": 1.3191, "step": 3143 }, { "epoch": 0.967087050138419, "grad_norm": 0.7257124382353679, "learning_rate": 3.278779941792709e-07, "loss": 1.0355, "step": 3144 }, { "epoch": 0.9673946478006767, "grad_norm": 0.40131809001715824, "learning_rate": 3.2220666611317287e-07, "loss": 0.8705, "step": 3145 }, { "epoch": 0.9677022454629345, "grad_norm": 0.532704826824931, "learning_rate": 3.1658465702461557e-07, "loss": 0.9726, "step": 3146 }, { "epoch": 0.9680098431251922, "grad_norm": 0.8084459281350053, "learning_rate": 3.1101197249499005e-07, "loss": 0.9897, "step": 3147 }, { "epoch": 0.96831744078745, "grad_norm": 0.49847955522609166, "learning_rate": 3.0548861805674844e-07, "loss": 0.7994, "step": 3148 }, { "epoch": 0.9686250384497078, "grad_norm": 0.8851932811719448, "learning_rate": 3.000145991933323e-07, "loss": 0.9333, "step": 3149 }, { "epoch": 0.9689326361119656, "grad_norm": 0.4936847731973475, "learning_rate": 2.9458992133923333e-07, "loss": 0.9503, "step": 3150 }, { "epoch": 0.9692402337742233, "grad_norm": 0.5200462295075284, "learning_rate": 2.892145898799437e-07, "loss": 0.9296, "step": 3151 }, { "epoch": 0.969547831436481, "grad_norm": 0.4083086856038706, "learning_rate": 2.838886101519722e-07, "loss": 0.7924, "step": 3152 }, { "epoch": 0.9698554290987389, "grad_norm": 0.6371914628913328, "learning_rate": 2.786119874428228e-07, "loss": 0.7748, "step": 3153 }, { "epoch": 0.9701630267609966, "grad_norm": 0.6205214168566453, "learning_rate": 2.7338472699101613e-07, "loss": 0.9594, "step": 3154 }, { "epoch": 0.9704706244232544, "grad_norm": 0.49695028538989117, "learning_rate": 2.6820683398605106e-07, "loss": 0.8226, "step": 3155 }, { "epoch": 0.9707782220855121, "grad_norm": 0.6908046375689677, "learning_rate": 2.6307831356842673e-07, "loss": 0.9041, "step": 3156 }, { "epoch": 0.9710858197477699, "grad_norm": 0.7787623158792013, "learning_rate": 2.5799917082960923e-07, "loss": 1.0009, "step": 3157 }, { "epoch": 0.9713934174100277, "grad_norm": 0.5124263818322373, "learning_rate": 2.5296941081206504e-07, "loss": 0.7154, "step": 3158 }, { "epoch": 0.9717010150722855, "grad_norm": 0.4246665767931411, "learning_rate": 2.4798903850922206e-07, "loss": 0.8616, "step": 3159 }, { "epoch": 0.9720086127345432, "grad_norm": 0.49643561749560533, "learning_rate": 2.4305805886548075e-07, "loss": 0.8597, "step": 3160 }, { "epoch": 0.9723162103968009, "grad_norm": 0.6278886847100319, "learning_rate": 2.3817647677620848e-07, "loss": 0.8989, "step": 3161 }, { "epoch": 0.9726238080590588, "grad_norm": 0.6096075794345037, "learning_rate": 2.333442970877231e-07, "loss": 0.9684, "step": 3162 }, { "epoch": 0.9729314057213165, "grad_norm": 0.8640701824733507, "learning_rate": 2.285615245972983e-07, "loss": 0.9529, "step": 3163 }, { "epoch": 0.9732390033835743, "grad_norm": 0.46833904002736715, "learning_rate": 2.2382816405317474e-07, "loss": 0.8854, "step": 3164 }, { "epoch": 0.973546601045832, "grad_norm": 0.5142747162203113, "learning_rate": 2.191442201545102e-07, "loss": 0.9893, "step": 3165 }, { "epoch": 0.9738541987080899, "grad_norm": 0.5607829587452432, "learning_rate": 2.1450969755142381e-07, "loss": 0.9876, "step": 3166 }, { "epoch": 0.9741617963703476, "grad_norm": 0.9299859426718722, "learning_rate": 2.099246008449629e-07, "loss": 1.123, "step": 3167 }, { "epoch": 0.9744693940326054, "grad_norm": 0.5222348644110588, "learning_rate": 2.053889345871085e-07, "loss": 0.804, "step": 3168 }, { "epoch": 0.9747769916948631, "grad_norm": 0.5418305318518991, "learning_rate": 2.0090270328076423e-07, "loss": 0.9328, "step": 3169 }, { "epoch": 0.9750845893571208, "grad_norm": 0.4001665093355395, "learning_rate": 1.9646591137976177e-07, "loss": 0.8674, "step": 3170 }, { "epoch": 0.9753921870193787, "grad_norm": 0.7508343878050482, "learning_rate": 1.9207856328883888e-07, "loss": 1.1062, "step": 3171 }, { "epoch": 0.9756997846816364, "grad_norm": 0.3990984608273166, "learning_rate": 1.8774066336366135e-07, "loss": 0.8672, "step": 3172 }, { "epoch": 0.9760073823438942, "grad_norm": 0.4694848552910679, "learning_rate": 1.8345221591079542e-07, "loss": 0.9629, "step": 3173 }, { "epoch": 0.9763149800061519, "grad_norm": 1.0470163790959717, "learning_rate": 1.7921322518771322e-07, "loss": 1.0077, "step": 3174 }, { "epoch": 0.9766225776684098, "grad_norm": 0.44388241536463496, "learning_rate": 1.7502369540278174e-07, "loss": 0.8047, "step": 3175 }, { "epoch": 0.9769301753306675, "grad_norm": 0.8206232153637492, "learning_rate": 1.70883630715285e-07, "loss": 1.0509, "step": 3176 }, { "epoch": 0.9772377729929252, "grad_norm": 0.4888232279453433, "learning_rate": 1.667930352353686e-07, "loss": 0.8761, "step": 3177 }, { "epoch": 0.977545370655183, "grad_norm": 0.8389555852180556, "learning_rate": 1.627519130240951e-07, "loss": 1.0332, "step": 3178 }, { "epoch": 0.9778529683174408, "grad_norm": 0.6486152695842069, "learning_rate": 1.5876026809338863e-07, "loss": 0.9103, "step": 3179 }, { "epoch": 0.9781605659796986, "grad_norm": 0.5290500173283056, "learning_rate": 1.5481810440606813e-07, "loss": 0.879, "step": 3180 }, { "epoch": 0.9784681636419563, "grad_norm": 0.5040238245611566, "learning_rate": 1.5092542587582525e-07, "loss": 1.045, "step": 3181 }, { "epoch": 0.9787757613042141, "grad_norm": 0.5702406899537288, "learning_rate": 1.4708223636721862e-07, "loss": 0.8436, "step": 3182 }, { "epoch": 0.9790833589664718, "grad_norm": 0.4478115644422497, "learning_rate": 1.43288539695674e-07, "loss": 0.9888, "step": 3183 }, { "epoch": 0.9793909566287297, "grad_norm": 0.6009148284760597, "learning_rate": 1.3954433962749536e-07, "loss": 0.8411, "step": 3184 }, { "epoch": 0.9796985542909874, "grad_norm": 0.5783553856120014, "learning_rate": 1.35849639879837e-07, "loss": 0.9963, "step": 3185 }, { "epoch": 0.9800061519532451, "grad_norm": 0.506486062306369, "learning_rate": 1.3220444412071485e-07, "loss": 0.8711, "step": 3186 }, { "epoch": 0.9803137496155029, "grad_norm": 0.45149989117974054, "learning_rate": 1.2860875596898414e-07, "loss": 1.0081, "step": 3187 }, { "epoch": 0.9806213472777607, "grad_norm": 0.6789216842434949, "learning_rate": 1.2506257899437824e-07, "loss": 0.8669, "step": 3188 }, { "epoch": 0.9809289449400185, "grad_norm": 0.5537660256807363, "learning_rate": 1.2156591671744766e-07, "loss": 0.9833, "step": 3189 }, { "epoch": 0.9812365426022762, "grad_norm": 0.6123855642618854, "learning_rate": 1.1811877260961002e-07, "loss": 1.0168, "step": 3190 }, { "epoch": 0.981544140264534, "grad_norm": 0.5587798299110127, "learning_rate": 1.1472115009310003e-07, "loss": 0.9674, "step": 3191 }, { "epoch": 0.9818517379267917, "grad_norm": 0.43504683317403586, "learning_rate": 1.1137305254100283e-07, "loss": 0.7722, "step": 3192 }, { "epoch": 0.9821593355890496, "grad_norm": 0.43804927450116254, "learning_rate": 1.0807448327723735e-07, "loss": 0.7642, "step": 3193 }, { "epoch": 0.9824669332513073, "grad_norm": 0.41461673350635325, "learning_rate": 1.0482544557654517e-07, "loss": 0.8274, "step": 3194 }, { "epoch": 0.982774530913565, "grad_norm": 0.6103894965884861, "learning_rate": 1.016259426644961e-07, "loss": 0.9931, "step": 3195 }, { "epoch": 0.9830821285758228, "grad_norm": 0.6810835543685314, "learning_rate": 9.847597771748263e-08, "loss": 0.9532, "step": 3196 }, { "epoch": 0.9833897262380806, "grad_norm": 0.6402776328392683, "learning_rate": 9.53755538627199e-08, "loss": 0.8217, "step": 3197 }, { "epoch": 0.9836973239003384, "grad_norm": 0.42996588381878786, "learning_rate": 9.232467417823465e-08, "loss": 0.8317, "step": 3198 }, { "epoch": 0.9840049215625961, "grad_norm": 0.4781116715236349, "learning_rate": 8.932334169287626e-08, "loss": 0.7207, "step": 3199 }, { "epoch": 0.9843125192248539, "grad_norm": 0.5378492490939668, "learning_rate": 8.637155938630015e-08, "loss": 0.8669, "step": 3200 }, { "epoch": 0.9846201168871117, "grad_norm": 0.8335674753453408, "learning_rate": 8.346933018895664e-08, "loss": 0.9502, "step": 3201 }, { "epoch": 0.9849277145493694, "grad_norm": 0.46349636063857425, "learning_rate": 8.061665698212428e-08, "loss": 0.8724, "step": 3202 }, { "epoch": 0.9852353122116272, "grad_norm": 0.5816060338130405, "learning_rate": 7.781354259786544e-08, "loss": 0.8773, "step": 3203 }, { "epoch": 0.9855429098738849, "grad_norm": 0.479742875309302, "learning_rate": 7.50599898190485e-08, "loss": 0.8892, "step": 3204 }, { "epoch": 0.9858505075361427, "grad_norm": 0.4579761803761792, "learning_rate": 7.235600137934228e-08, "loss": 0.7628, "step": 3205 }, { "epoch": 0.9861581051984005, "grad_norm": 0.6233986688310813, "learning_rate": 6.970157996319949e-08, "loss": 1.1034, "step": 3206 }, { "epoch": 0.9864657028606583, "grad_norm": 0.8005968057341729, "learning_rate": 6.70967282058732e-08, "loss": 1.0616, "step": 3207 }, { "epoch": 0.986773300522916, "grad_norm": 0.5778448368257472, "learning_rate": 6.454144869339485e-08, "loss": 0.9218, "step": 3208 }, { "epoch": 0.9870808981851737, "grad_norm": 0.7516204667661361, "learning_rate": 6.203574396259625e-08, "loss": 1.0806, "step": 3209 }, { "epoch": 0.9873884958474316, "grad_norm": 0.4275282989748105, "learning_rate": 5.957961650107646e-08, "loss": 0.8069, "step": 3210 }, { "epoch": 0.9876960935096893, "grad_norm": 0.44337120208800823, "learning_rate": 5.717306874722383e-08, "loss": 0.8738, "step": 3211 }, { "epoch": 0.9880036911719471, "grad_norm": 0.5550521450448873, "learning_rate": 5.481610309020502e-08, "loss": 0.829, "step": 3212 }, { "epoch": 0.9883112888342048, "grad_norm": 0.5401042165657979, "learning_rate": 5.250872186996492e-08, "loss": 0.8534, "step": 3213 }, { "epoch": 0.9886188864964627, "grad_norm": 0.5072735576019891, "learning_rate": 5.025092737721559e-08, "loss": 0.9921, "step": 3214 }, { "epoch": 0.9889264841587204, "grad_norm": 0.6789952849445062, "learning_rate": 4.804272185344183e-08, "loss": 0.9256, "step": 3215 }, { "epoch": 0.9892340818209782, "grad_norm": 0.5583881309017721, "learning_rate": 4.588410749090111e-08, "loss": 0.7844, "step": 3216 }, { "epoch": 0.9895416794832359, "grad_norm": 0.5491968508386571, "learning_rate": 4.3775086432618075e-08, "loss": 0.8249, "step": 3217 }, { "epoch": 0.9898492771454936, "grad_norm": 0.5254225413733209, "learning_rate": 4.171566077237898e-08, "loss": 0.8472, "step": 3218 }, { "epoch": 0.9901568748077515, "grad_norm": 0.7814215396123878, "learning_rate": 3.9705832554737254e-08, "loss": 1.0856, "step": 3219 }, { "epoch": 0.9904644724700092, "grad_norm": 0.485648321936469, "learning_rate": 3.774560377500236e-08, "loss": 0.9637, "step": 3220 }, { "epoch": 0.990772070132267, "grad_norm": 0.41528219112882275, "learning_rate": 3.5834976379239827e-08, "loss": 0.7523, "step": 3221 }, { "epoch": 0.9910796677945247, "grad_norm": 0.5417306205464572, "learning_rate": 3.397395226428235e-08, "loss": 0.944, "step": 3222 }, { "epoch": 0.9913872654567826, "grad_norm": 0.5515030898606424, "learning_rate": 3.216253327771313e-08, "loss": 1.1014, "step": 3223 }, { "epoch": 0.9916948631190403, "grad_norm": 0.44325296695522, "learning_rate": 3.0400721217865856e-08, "loss": 0.8611, "step": 3224 }, { "epoch": 0.9920024607812981, "grad_norm": 0.5288339676208952, "learning_rate": 2.868851783383031e-08, "loss": 1.0198, "step": 3225 }, { "epoch": 0.9923100584435558, "grad_norm": 0.48389116421314815, "learning_rate": 2.7025924825435644e-08, "loss": 0.8767, "step": 3226 }, { "epoch": 0.9926176561058137, "grad_norm": 0.5298304202817435, "learning_rate": 2.541294384327264e-08, "loss": 0.7539, "step": 3227 }, { "epoch": 0.9929252537680714, "grad_norm": 0.5255611570528039, "learning_rate": 2.3849576488665926e-08, "loss": 1.0152, "step": 3228 }, { "epoch": 0.9932328514303291, "grad_norm": 0.6065111151800793, "learning_rate": 2.2335824313696186e-08, "loss": 1.0638, "step": 3229 }, { "epoch": 0.9935404490925869, "grad_norm": 0.4373109907933577, "learning_rate": 2.087168882118351e-08, "loss": 0.8231, "step": 3230 }, { "epoch": 0.9938480467548446, "grad_norm": 0.8019635314210095, "learning_rate": 1.9457171464681846e-08, "loss": 0.9934, "step": 3231 }, { "epoch": 0.9941556444171025, "grad_norm": 0.5530981326169255, "learning_rate": 1.809227364849564e-08, "loss": 0.91, "step": 3232 }, { "epoch": 0.9944632420793602, "grad_norm": 0.5386030285713364, "learning_rate": 1.6776996727657647e-08, "loss": 0.856, "step": 3233 }, { "epoch": 0.994770839741618, "grad_norm": 0.5195412263126705, "learning_rate": 1.5511342007956676e-08, "loss": 0.8849, "step": 3234 }, { "epoch": 0.9950784374038757, "grad_norm": 0.43696895812604564, "learning_rate": 1.4295310745898738e-08, "loss": 0.813, "step": 3235 }, { "epoch": 0.9953860350661335, "grad_norm": 0.6532212916588821, "learning_rate": 1.31289041487348e-08, "loss": 0.793, "step": 3236 }, { "epoch": 0.9956936327283913, "grad_norm": 0.5376238495971823, "learning_rate": 1.2012123374438577e-08, "loss": 0.8584, "step": 3237 }, { "epoch": 0.996001230390649, "grad_norm": 0.6333950709385197, "learning_rate": 1.0944969531734294e-08, "loss": 1.1213, "step": 3238 }, { "epoch": 0.9963088280529068, "grad_norm": 0.7354879184187464, "learning_rate": 9.927443680063375e-09, "loss": 1.0019, "step": 3239 }, { "epoch": 0.9966164257151645, "grad_norm": 0.6106539822392645, "learning_rate": 8.959546829601095e-09, "loss": 0.8333, "step": 3240 }, { "epoch": 0.9969240233774224, "grad_norm": 0.4799792047602106, "learning_rate": 8.041279941262137e-09, "loss": 0.8967, "step": 3241 }, { "epoch": 0.9972316210396801, "grad_norm": 0.4762850748596598, "learning_rate": 7.172643926667277e-09, "loss": 0.9635, "step": 3242 }, { "epoch": 0.9975392187019378, "grad_norm": 0.6296510975494211, "learning_rate": 6.353639648187804e-09, "loss": 0.9043, "step": 3243 }, { "epoch": 0.9978468163641956, "grad_norm": 0.5035427682671989, "learning_rate": 5.58426791890665e-09, "loss": 0.957, "step": 3244 }, { "epoch": 0.9981544140264534, "grad_norm": 0.4911570072944277, "learning_rate": 4.864529502640603e-09, "loss": 1.0513, "step": 3245 }, { "epoch": 0.9984620116887112, "grad_norm": 0.7818085450997592, "learning_rate": 4.1944251139347525e-09, "loss": 0.9392, "step": 3246 }, { "epoch": 0.9987696093509689, "grad_norm": 0.5824809493475775, "learning_rate": 3.5739554180458377e-09, "loss": 0.7951, "step": 3247 }, { "epoch": 0.9990772070132267, "grad_norm": 0.5699231651825681, "learning_rate": 3.003121030970002e-09, "loss": 1.0193, "step": 3248 }, { "epoch": 0.9993848046754845, "grad_norm": 0.4875062813136044, "learning_rate": 2.4819225194094854e-09, "loss": 0.8502, "step": 3249 }, { "epoch": 0.9996924023377423, "grad_norm": 0.45779148753053156, "learning_rate": 2.010360400805933e-09, "loss": 0.9288, "step": 3250 }, { "epoch": 1.0, "grad_norm": 0.5087455541574444, "learning_rate": 1.5884351433070877e-09, "loss": 0.74, "step": 3251 }, { "epoch": 1.0, "step": 3251, "total_flos": 1451970551808000.0, "train_loss": 0.9185743353360252, "train_runtime": 6334.2285, "train_samples_per_second": 8.21, "train_steps_per_second": 0.513 } ], "logging_steps": 1, "max_steps": 3251, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1451970551808000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }